acgc.stats.bivariate

Bivariate statistics

Statistical measures of relationships between two populations

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3""" Bivariate statistics
  4
  5Statistical measures of relationships between two populations
  6"""
  7
  8import numpy as np
  9from scipy import stats
 10from .bivariate_lines import sma
 11# import xarray as xr
 12
 13__all__ = [
 14    "BivariateStatistics",
 15    "nmb",
 16    "nmae",
 17    "nmbf",
 18    "nmaef"
 19]
 20
 21def nmb( x0, x1 ):
 22    '''Compute Normalized Mean Bias (NMB)
 23
 24    NMB = ( mean(x1) - mean(x0) ) / mean(x0)
 25
 26    Parameters
 27    ----------
 28    x0 : array_like
 29        reference values
 30    x1 : array_like
 31        experiment values
 32    '''
 33
 34    assert (len(x0) == len(x1)), \
 35        "Parameters x0 and x1 must have the same length"
 36
 37    # Mean values
 38    x0_mean = np.mean(x0)
 39    x1_mean = np.mean(x1)
 40
 41    # Metric value
 42    return x1_mean / x0_mean - 1
 43
 44def nmae( x0, x1 ):
 45    '''Compute Normalized Mean Absolute Error (NMAE)
 46
 47    NMAE = mean(abs(x1 - x0)) / abs(mean(x0))
 48
 49    Parameters
 50    ---------
 51    x0 : array_like
 52        reference values
 53    x1 : array_like
 54        experiment values
 55    '''
 56
 57     # Mean values
 58    x0_mean = np.mean(x0)
 59
 60    # Mean absolute difference
 61    abs_diff = np.mean( np.abs(x1 - x0) )
 62
 63    # Metric value
 64    return abs_diff / np.abs( x0_mean )
 65
 66
 67def nmbf( x0, x1 ):
 68    '''Compute Normalized Mean Bias Factor (NMBF)
 69
 70    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
 71
 72    Parameters
 73    ----------
 74    x0 : array_like
 75        reference values
 76    x1 : array_like
 77        experiment values
 78    '''
 79
 80    # Ensure that arguments have the same length
 81    assert (len(x0) == len(x1)), \
 82        "Parameters x0 and x1 must have the same length"
 83
 84    # Mean values
 85    x0_mean = np.mean(x0)
 86    x1_mean = np.mean(x1)
 87
 88    # Metric value
 89    if x1_mean >= x0_mean:
 90        result = x1_mean / x0_mean - 1
 91    else:
 92        result= 1 - x0_mean / x1_mean
 93    # Equivalent (faster?) implementation
 94    #S = (mMean - oMean) / np.abs(mMean - oMean)
 95    #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 )
 96
 97    return result
 98
 99def nmaef( x0, x1 ):
100    '''Compute Normalized Mean Absolute Error Factor (NMAEF)
101
102    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
103    
104    Parameters
105    ----------
106    x0 : array_like
107        reference values
108    x1 : array_like
109        experiment values
110    '''
111
112    # Ensure that arguments have the same length
113    assert (len(x0) == len(x1)), \
114        "Parameters x0 and x1 must have the same length"
115
116    # Mean values
117    x0_mean = np.mean(x0)
118    x1_mean = np.mean(x1)
119
120    # Mean absolute difference
121    abs_diff = np.mean( np.abs(x1 - x0))
122
123    # Metric value
124    if x1_mean >= x0_mean:
125        result = abs_diff / x0_mean 
126    else:
127        result = abs_diff / x1_mean
128    # Equivalent (faster?) implementation
129    #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean)
130    #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) )
131
132    return result
133
134def _texify_name(name):
135    '''Return a LaTex formatted string for some variables
136    
137    Parameter
138    ---------
139    name : str
140    
141    Returns
142    -------
143    pretty_name : str
144    '''
145    if name=='R2':
146        pretty_name = f'$R^2$'
147    elif name=='r2':
148        pretty_name = f'$r^2$'
149    else:
150        pretty_name = name
151    return pretty_name
152
153class BivariateStatistics:
154    '''A suite of common statistics to quantify bivariate relationships
155
156    Class method 'summary' provides a formatted summary of these statistics
157    
158    Attributes
159    ----------
160    xmean, ymean : float
161        mean of x and y variables
162    xmedian, ymedian :float
163        median of x and y variables
164    xstd, ystd : float
165        standard deviation of x and y variables
166    mean_difference, md : float
167        ymean - xmean
168    mean_absolute_difference, mad : float
169        mean( |y-x| )
170    relative_mean_difference, rmd : float
171        md / xmean
172    relative_mean_absolute_difference, rmad :float
173        mad / xmean
174    standardized_mean_difference, smd : float
175        md / xstd
176    standardized_mean_absolute_difference, smad : float
177        mad /xstd
178    mean_relative_difference, mrd : float
179        mean(y/x) - 1
180    median_difference, medd : float
181        median(y-x)
182    median_absolute_difference, medad : float
183        median(|y-x|)
184    relative_median_difference, rmedd : float
185        median(y-x) / xmedian
186    relative_median_absolute_difference, rmedad : float
187        median(|y-x|) / xmedian
188    median_relative_difference, medianrd, medrd : float
189        median(y/x)-1
190    normalized_mean_bias_factor, nmbf : float
191        see `nmbf` 
192    normalized_mean_absolute_error_factor, nmaef : float
193        see `nmaef`
194    root_mean_square_difference, rmsd : float
195        $\\sqrt{ \\langle (y - x)^2 \\rangle }$
196    covariance : float
197        cov(x,y)
198    correlation_pearson, correlation, pearsonr, R, r : float
199        Pearson linear correlation coefficient 
200    correlation_spearman, spearmanr : float
201        Spearman, non-parametric rank correlation coefficient
202    R2, r2 : float
203        Linear coefficient of determination, $R^2$
204    '''
205
206    def __init__(self,x,y,w=None,dropna=False,data=None):
207        '''Compute suite of bivariate statistics during initialization
208        
209        Statistic values are saved in attributes.
210        CAUTION: Weights w are ignored except in SMA fit
211
212        Parameters
213        ----------
214        x : ndarray or str
215            independent variable values
216        y : ndarray or str
217            dependent variable values, same size as x
218        w : ndarray or str, optional
219            weights for points (x,y), same size as x and y
220        dropna : bool, optional (default=False)
221            drops NaN values from x, y, and w
222        data : dict-like, optional
223            if x, y, or w are str, then they should be keys in data
224        '''
225
226        # Get values from data if needed
227        if data is None and (isinstance(x,str) or isinstance(y,str) or isinstance(w,str)):
228            raise ValueError( 'Data argument must be used if x, y, or w is a string')
229        if isinstance(x,str):
230            x = data[x]
231        if isinstance(y,str):
232            y = data[y]
233        if isinstance(w,str):
234            w = data[w]
235
236        #Ensure that x and y have same length
237        if len(x) != len(y):
238            raise ValueError( 'Arguments x and y must have the same length' )
239        if w is None:
240            w = np.ones_like(x)
241        if len(w) != len(x):
242            raise ValueError( 'Argument w (if present) must have the same length as x' )
243
244        # Drop NaN values
245        if dropna:
246            isna = np.isnan(x*y*w)
247            x = x[~isna]
248            y = y[~isna]
249            w = w[~isna]
250
251        diff = y - x
252        absdiff = np.abs( y - x )
253        # Ignore divide by zero and 0/0 while dividing
254        old_settings = np.seterr(divide='ignore',invalid='ignore')
255        ratio = y/x
256        np.seterr(**old_settings)
257
258        # Means, medians, and standard deviations
259        self.xmean = np.mean(x)
260        self.ymean = np.mean(y)
261        self.xmedian = np.median(x)
262        self.ymedian = np.median(y)
263        self.xstd   = np.std(x)
264        self.ystd   = np.std(y)
265
266        # Save values for use later
267        self._x = x
268        self._y = y
269        self._w = w
270
271        # Mean and mean absolute differences
272        self.mean_difference            = self.md   = self.ymean - self.xmean
273        self.mean_absolute_difference   = self.mad  = np.mean( absdiff )
274
275        # Relative and standardized differences
276        self.relative_mean_difference           = self.rmd  = self.mean_difference / self.xmean
277        self.relative_mean_absolute_difference  = self.rmad = self.mean_absolute_difference / self.xmean
278        self.standardized_mean_difference       = self.smd  = self.mean_difference / self.xstd
279        self.standardized_mean_absolute_difference  = self.smad = self.mean_absolute_difference / self.xstd
280
281        # Mean and median relative differences
282        self.mean_relative_difference   = self.mrd  = np.mean( ratio - 1 )
283        self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 )
284
285        # Median and median absolute differences
286        self.median_difference          = self.medd  = np.median( diff )
287        self.median_absolute_difference = self.medad = np.median( absdiff )
288
289        # Relative median differences
290        self.relative_median_difference          = self.rmedd  = self.median_difference / self.xmedian
291        self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian
292
293        self.normalized_mean_bias_factor            = self.nmbf  = nmbf(x,y)
294        self.normalized_mean_absolute_error_factor  = self.nmaef = nmaef(x,y)
295
296        # RMS difference
297        self.root_mean_square_difference    = self.rmsd     = np.sqrt( np.mean( np.power( diff, 2) ) )
298
299        # Covariance, correlation
300        self.covariance = np.cov(x,y)[0][1]
301        self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \
302            np.corrcoef(x,y)[0][1]
303        self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic
304        self.R2 = self.r2 = self.R**2
305
306    def __getitem__(self,key):
307        '''Accesses attribute values via object['key']'''
308        return getattr(self,key)
309
310    def fitline(self,method='sma',intercept=True,**kwargs):
311        '''Compute bivariate line fit
312        
313        Parameters
314        ----------
315        method : str
316            line fitting method: sma (default), ols, wls, York, sen, siegel
317        intercept : bool
318            defines whether non-zero intercept should be fitted
319        **kwargs 
320            passed to `acgc.stats.sma` (e.g. robust=True)
321
322        Returns
323        -------
324        result : dict
325            dictionary with keys:
326            - slope (float)
327                slope of fitted line
328            - intercept (float)
329                intercept of fitted line
330            - fittedvalues (array (N,))
331                values on fit line
332            - residuals (array (N,))
333                residual from fit line
334        '''
335
336        if method.lower()=='sma':
337            fit = sma(  self._x,
338                        self._y,
339                        self._w,
340                        intercept=intercept,
341                        **kwargs)
342            slope = fit['slope']
343            intercept= fit['intercept']
344
345        elif method.lower()=='ols':
346            if intercept:
347                ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 
348                                      self._y, rcond=None )
349            else:
350                ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None )
351            slope = ols[0][0]
352            intercept = ols[0][1]
353
354        elif method.lower() in ['theil','sen','theilsen']:
355            sen = stats.theilslopes( self._y,
356                                     self._x )
357            slope = sen.slope
358            intercept = sen.intercept
359
360        elif method.lower()=='siegel':
361            siegel = stats.siegelslopes( self._x,
362                                         self._y )
363            slope = siegel.slope
364            intercept = siegel.intercept
365
366        elif method.lower()=='wls':
367            raise NotImplementedError('WLS regression not implemented yet')
368
369        elif method.lower()=='york':
370            raise NotImplementedError('York regression not implemented yet')
371
372        else:
373            raise ValueError('Undefined method '+method)
374
375        line = dict( slope          = slope,
376                     intercept      = intercept,
377                     fittedvalues   = slope * self._x + intercept,
378                     residuals      = self._y - ( slope * self._x + intercept ) )
379
380        return line
381
382    def slope(self,method='sma',intercept=True,**kwargs):
383        '''Compute slope of bivariate line fit
384        
385        Parameters
386        ----------
387        method : str
388            line fitting method: sma (default), ols, wls
389        intercept : bool
390            defines whether non-zero intercept should be fitted
391        **kwargs 
392            passed to `fitline`
393
394        Returns
395        -------
396        slope : float
397            value of y intercept
398        '''
399        return self.fitline(method,intercept,**kwargs)['slope']
400
401    def intercept(self,method='sma',intercept=True,**kwargs):
402        '''Compute intercept of bivariate line fit
403        
404        Parameters
405        ----------
406        method : str
407            line fitting method: sma (default) or ols
408        intercept : bool
409            defines whether non-zero intercept should be fitted
410        **kwargs 
411            passed to `fitline`
412
413        Returns
414        -------
415        intercept : float
416            value of y intercept
417        '''
418        return self.fitline(method,intercept,**kwargs)['intercept']
419
420    def _expand_variables(self,variables):
421        '''Expand special strings into a list of variables
422        
423        Parameter
424        ---------
425        variables : list or str, default='common'
426            Special strings ("all","common") will be expanded to a list of variables
427            list arguments will not be modified
428
429        Returns
430        -------
431        list 
432            variable names
433        '''
434        if variables is None:
435            variables='common'
436        if variables=='all':
437            variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD',
438                       'MedD','MedAD','RMedD','RMedAD','MedRD',
439                       'NMBF','NMAEF','RMSD',
440                       'R','R2','spearmanr','slope','intercept']
441        elif variables=='common':
442            variables=['MD','MAD','RMD','RMAD','MRD','R2','slope']
443        if not isinstance(variables,list):
444            raise ValueError(
445                'variables must be a list, None, or one of these strings: "all","common"')
446
447        return variables
448
449    def summary_dict(self, variables=None, fitline_kw=None ):
450        '''Summarize bivariate statistics into a dict
451
452        Parameters
453        ----------
454        vars : list or str, default='common'
455            names of attribute variables to include in summary
456            names are case insensitive            
457            The following strings are also accepted in place of a list 
458                "all" (displays all variables)
459                "common" (displays all measures of mean difference)
460        fitline_kw : dict, default=None)
461            keywords passed to self.fitline()
462        
463        Returns
464        -------
465        summary : dict
466            names and values of variables
467        '''
468
469        # List of variables
470        variables = self._expand_variables(variables)
471
472        if fitline_kw is None:
473            fitline_kw = {'method':'sma',
474                          'intercept':True}
475
476        # Construct the dict
477        summary = {}
478        for v in variables:
479            if v in ['slope','intercept']:
480                # These variables are object methods
481                func = getattr(self,v)
482                value = func(**fitline_kw)
483            else:
484                # Retrieve values
485                value = getattr(self,v.lower())
486
487            # summary += (stringformat+'='+floatformat+'\n').format(v,value)
488            summary[v] = value
489
490        return summary
491
492    def summary(self, variables=None, fitline_kw=None, 
493                floatformat='{:.4f}', stringlength=None ):
494        '''Summarize bivariate statistics
495
496        Parameters
497        ----------
498        vars : list or str, default='common'
499            names of attribute variables to include in summary
500            names are case insensitive            
501            The following strings are also accepted in place of a list 
502                "all" (displays all variables)
503                "common" (displays all measures of mean difference)
504        floatformat : str, default='{:.4f}'
505            format specifier for floating point values
506        stringlength : int, default=None
507            length of the variables on output
508            default (None) is to use the length of the longest variable name
509        fitline_kw : dict, default=None
510            keywords passed to `fitline`
511        
512        Returns
513        -------
514        summary : str
515            names and values of variables
516        '''
517        # List of variables
518        variables = self._expand_variables(variables)
519
520        if stringlength is None:
521            stringlength = np.max([len(v) for v in variables])
522        stringformat = '{:'+str(stringlength)+'s}'
523
524        # Get a dict containing the needed variables
525        summarydict = self.summary_dict( variables, fitline_kw )
526
527        # Extract length of the float numbers from floatformat
528        # import re
529        # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)",
530        #       floatformat )[0] ) ).astype(int)
531
532        # summary = (stringformat+'{:>10s}').format('Variable','Value')
533        summarytext = ''
534        for k,v in summarydict.items():
535            summarytext += (stringformat+' = '+floatformat+'\n').format(k,v)
536
537        return summarytext
538
539    def summary_fig_inset(self, ax, variables=None, fitline_kw=None,
540                          floatformat='{:.3f}',
541                          loc=None, loc_units='axes',
542                          **kwargs):
543        '''Display bivariate statistics as a table inset on a plot axis
544
545        Parameters
546        ----------
547        ax : matplotlib.Figure.Axis 
548            axis where the table will be displayed
549        variables : list or str, default='common'
550            names of attribute variables to include in summary
551            names are case insensitive            
552            The following strings are also accepted in place of a list 
553                "all" (displays all variables)
554                "common" (displays all measures of mean difference)
555        fitline_kw : dict, default=None
556            keywords passed to `fitline`
557        floatformat : str, default='{:.3f}'
558            format specifier for floating point values
559        loc : tuple (x0,y0), default=(0.85, 0.05)
560            location on the axis where the table will be drawn
561            can be in data units or axes units [0-1]
562        loc_units : {'axes' (default), 'data'}
563            specifies whether loc has 'data' units or 'axes' units [0-1]
564                    
565        Returns
566        -------
567        text1, text2 : matplotlib text object
568            Artist for the two text boxes        
569        '''
570        # List of variables
571        variables = self._expand_variables(variables)
572
573        # Default location in lower right corner
574        if loc is None:
575            loc = (0.8,0.05)
576
577        # Coordinates for loc
578        if loc_units.lower()=='data':
579            coord=ax.transData
580        elif loc_units.lower() in ['axes','axis']:
581            coord=ax.transAxes
582        else:
583            raise ValueError('Display units should be "Data" or "Axes"')
584
585        # Get a dict containing the needed variables
586        summarydict = self.summary_dict( variables, fitline_kw )
587
588        # Column of label text
589        label_text = '\n'.join([_texify_name(key) for key in summarydict])
590        # Column of value text
591        value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()])
592
593        # Check if horizontal alignment keyword is used
594        ha=''
595        try:
596            ha = kwargs['ha']
597        except KeyError:
598            pass
599        try:
600            ha = kwargs['horizontalalignment']
601        except KeyError:
602            pass
603
604        # For right alignment, align on values first
605        # Otherwise, align on labels
606        if ha=='right':
607            first_text = value_text
608            second_text = label_text
609            sign = -1
610        else:
611            first_text = label_text
612            second_text = value_text
613            sign = +1
614
615        # Add first column of text
616        t1=ax.text(loc[0],loc[1],
617                first_text,
618                transform=coord,
619                **kwargs
620                )
621
622        # Get width of first text column
623        bbox = t1.get_window_extent().transformed(coord.inverted())
624        width = bbox.x1-bbox.x0
625
626        # Add second column of text
627        t2 = ax.text(loc[0]+width*sign,loc[1],
628                     second_text,
629                     transform=coord,
630                     **kwargs
631                     )
632
633        ##################################
634        # Early version of this function using matplotlib.table.table()
635
636        # if isinstance(loc,(tuple,list)):
637        #     # Create an inset axis to contain the table
638        #     tableaxis = ax.inset_axes(loc)
639        #     table_width=1
640        # else:
641        #     tableaxis = ax
642
643        # # Display the table on the axis
644        # return mtable.table(
645        #     tableaxis,
646        #     cellText=[[floatformat.format(value)] for value in summarydict.values()],
647        #     rowLabels=[texify_name(key) for key in summarydict],
648        #     colWidths=[table_width/2]*2,
649        #     edges=edges,
650        #     loc=loc, bbox=bbox
651        #     )
652
653        return [t1,t2]
class BivariateStatistics:
154class BivariateStatistics:
155    '''A suite of common statistics to quantify bivariate relationships
156
157    Class method 'summary' provides a formatted summary of these statistics
158    
159    Attributes
160    ----------
161    xmean, ymean : float
162        mean of x and y variables
163    xmedian, ymedian :float
164        median of x and y variables
165    xstd, ystd : float
166        standard deviation of x and y variables
167    mean_difference, md : float
168        ymean - xmean
169    mean_absolute_difference, mad : float
170        mean( |y-x| )
171    relative_mean_difference, rmd : float
172        md / xmean
173    relative_mean_absolute_difference, rmad :float
174        mad / xmean
175    standardized_mean_difference, smd : float
176        md / xstd
177    standardized_mean_absolute_difference, smad : float
178        mad /xstd
179    mean_relative_difference, mrd : float
180        mean(y/x) - 1
181    median_difference, medd : float
182        median(y-x)
183    median_absolute_difference, medad : float
184        median(|y-x|)
185    relative_median_difference, rmedd : float
186        median(y-x) / xmedian
187    relative_median_absolute_difference, rmedad : float
188        median(|y-x|) / xmedian
189    median_relative_difference, medianrd, medrd : float
190        median(y/x)-1
191    normalized_mean_bias_factor, nmbf : float
192        see `nmbf` 
193    normalized_mean_absolute_error_factor, nmaef : float
194        see `nmaef`
195    root_mean_square_difference, rmsd : float
196        $\\sqrt{ \\langle (y - x)^2 \\rangle }$
197    covariance : float
198        cov(x,y)
199    correlation_pearson, correlation, pearsonr, R, r : float
200        Pearson linear correlation coefficient 
201    correlation_spearman, spearmanr : float
202        Spearman, non-parametric rank correlation coefficient
203    R2, r2 : float
204        Linear coefficient of determination, $R^2$
205    '''
206
207    def __init__(self,x,y,w=None,dropna=False,data=None):
208        '''Compute suite of bivariate statistics during initialization
209        
210        Statistic values are saved in attributes.
211        CAUTION: Weights w are ignored except in SMA fit
212
213        Parameters
214        ----------
215        x : ndarray or str
216            independent variable values
217        y : ndarray or str
218            dependent variable values, same size as x
219        w : ndarray or str, optional
220            weights for points (x,y), same size as x and y
221        dropna : bool, optional (default=False)
222            drops NaN values from x, y, and w
223        data : dict-like, optional
224            if x, y, or w are str, then they should be keys in data
225        '''
226
227        # Get values from data if needed
228        if data is None and (isinstance(x,str) or isinstance(y,str) or isinstance(w,str)):
229            raise ValueError( 'Data argument must be used if x, y, or w is a string')
230        if isinstance(x,str):
231            x = data[x]
232        if isinstance(y,str):
233            y = data[y]
234        if isinstance(w,str):
235            w = data[w]
236
237        #Ensure that x and y have same length
238        if len(x) != len(y):
239            raise ValueError( 'Arguments x and y must have the same length' )
240        if w is None:
241            w = np.ones_like(x)
242        if len(w) != len(x):
243            raise ValueError( 'Argument w (if present) must have the same length as x' )
244
245        # Drop NaN values
246        if dropna:
247            isna = np.isnan(x*y*w)
248            x = x[~isna]
249            y = y[~isna]
250            w = w[~isna]
251
252        diff = y - x
253        absdiff = np.abs( y - x )
254        # Ignore divide by zero and 0/0 while dividing
255        old_settings = np.seterr(divide='ignore',invalid='ignore')
256        ratio = y/x
257        np.seterr(**old_settings)
258
259        # Means, medians, and standard deviations
260        self.xmean = np.mean(x)
261        self.ymean = np.mean(y)
262        self.xmedian = np.median(x)
263        self.ymedian = np.median(y)
264        self.xstd   = np.std(x)
265        self.ystd   = np.std(y)
266
267        # Save values for use later
268        self._x = x
269        self._y = y
270        self._w = w
271
272        # Mean and mean absolute differences
273        self.mean_difference            = self.md   = self.ymean - self.xmean
274        self.mean_absolute_difference   = self.mad  = np.mean( absdiff )
275
276        # Relative and standardized differences
277        self.relative_mean_difference           = self.rmd  = self.mean_difference / self.xmean
278        self.relative_mean_absolute_difference  = self.rmad = self.mean_absolute_difference / self.xmean
279        self.standardized_mean_difference       = self.smd  = self.mean_difference / self.xstd
280        self.standardized_mean_absolute_difference  = self.smad = self.mean_absolute_difference / self.xstd
281
282        # Mean and median relative differences
283        self.mean_relative_difference   = self.mrd  = np.mean( ratio - 1 )
284        self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 )
285
286        # Median and median absolute differences
287        self.median_difference          = self.medd  = np.median( diff )
288        self.median_absolute_difference = self.medad = np.median( absdiff )
289
290        # Relative median differences
291        self.relative_median_difference          = self.rmedd  = self.median_difference / self.xmedian
292        self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian
293
294        self.normalized_mean_bias_factor            = self.nmbf  = nmbf(x,y)
295        self.normalized_mean_absolute_error_factor  = self.nmaef = nmaef(x,y)
296
297        # RMS difference
298        self.root_mean_square_difference    = self.rmsd     = np.sqrt( np.mean( np.power( diff, 2) ) )
299
300        # Covariance, correlation
301        self.covariance = np.cov(x,y)[0][1]
302        self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \
303            np.corrcoef(x,y)[0][1]
304        self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic
305        self.R2 = self.r2 = self.R**2
306
307    def __getitem__(self,key):
308        '''Accesses attribute values via object['key']'''
309        return getattr(self,key)
310
311    def fitline(self,method='sma',intercept=True,**kwargs):
312        '''Compute bivariate line fit
313        
314        Parameters
315        ----------
316        method : str
317            line fitting method: sma (default), ols, wls, York, sen, siegel
318        intercept : bool
319            defines whether non-zero intercept should be fitted
320        **kwargs 
321            passed to `acgc.stats.sma` (e.g. robust=True)
322
323        Returns
324        -------
325        result : dict
326            dictionary with keys:
327            - slope (float)
328                slope of fitted line
329            - intercept (float)
330                intercept of fitted line
331            - fittedvalues (array (N,))
332                values on fit line
333            - residuals (array (N,))
334                residual from fit line
335        '''
336
337        if method.lower()=='sma':
338            fit = sma(  self._x,
339                        self._y,
340                        self._w,
341                        intercept=intercept,
342                        **kwargs)
343            slope = fit['slope']
344            intercept= fit['intercept']
345
346        elif method.lower()=='ols':
347            if intercept:
348                ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 
349                                      self._y, rcond=None )
350            else:
351                ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None )
352            slope = ols[0][0]
353            intercept = ols[0][1]
354
355        elif method.lower() in ['theil','sen','theilsen']:
356            sen = stats.theilslopes( self._y,
357                                     self._x )
358            slope = sen.slope
359            intercept = sen.intercept
360
361        elif method.lower()=='siegel':
362            siegel = stats.siegelslopes( self._x,
363                                         self._y )
364            slope = siegel.slope
365            intercept = siegel.intercept
366
367        elif method.lower()=='wls':
368            raise NotImplementedError('WLS regression not implemented yet')
369
370        elif method.lower()=='york':
371            raise NotImplementedError('York regression not implemented yet')
372
373        else:
374            raise ValueError('Undefined method '+method)
375
376        line = dict( slope          = slope,
377                     intercept      = intercept,
378                     fittedvalues   = slope * self._x + intercept,
379                     residuals      = self._y - ( slope * self._x + intercept ) )
380
381        return line
382
383    def slope(self,method='sma',intercept=True,**kwargs):
384        '''Compute slope of bivariate line fit
385        
386        Parameters
387        ----------
388        method : str
389            line fitting method: sma (default), ols, wls
390        intercept : bool
391            defines whether non-zero intercept should be fitted
392        **kwargs 
393            passed to `fitline`
394
395        Returns
396        -------
397        slope : float
398            value of y intercept
399        '''
400        return self.fitline(method,intercept,**kwargs)['slope']
401
402    def intercept(self,method='sma',intercept=True,**kwargs):
403        '''Compute intercept of bivariate line fit
404        
405        Parameters
406        ----------
407        method : str
408            line fitting method: sma (default) or ols
409        intercept : bool
410            defines whether non-zero intercept should be fitted
411        **kwargs 
412            passed to `fitline`
413
414        Returns
415        -------
416        intercept : float
417            value of y intercept
418        '''
419        return self.fitline(method,intercept,**kwargs)['intercept']
420
421    def _expand_variables(self,variables):
422        '''Expand special strings into a list of variables
423        
424        Parameter
425        ---------
426        variables : list or str, default='common'
427            Special strings ("all","common") will be expanded to a list of variables
428            list arguments will not be modified
429
430        Returns
431        -------
432        list 
433            variable names
434        '''
435        if variables is None:
436            variables='common'
437        if variables=='all':
438            variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD',
439                       'MedD','MedAD','RMedD','RMedAD','MedRD',
440                       'NMBF','NMAEF','RMSD',
441                       'R','R2','spearmanr','slope','intercept']
442        elif variables=='common':
443            variables=['MD','MAD','RMD','RMAD','MRD','R2','slope']
444        if not isinstance(variables,list):
445            raise ValueError(
446                'variables must be a list, None, or one of these strings: "all","common"')
447
448        return variables
449
450    def summary_dict(self, variables=None, fitline_kw=None ):
451        '''Summarize bivariate statistics into a dict
452
453        Parameters
454        ----------
455        vars : list or str, default='common'
456            names of attribute variables to include in summary
457            names are case insensitive            
458            The following strings are also accepted in place of a list 
459                "all" (displays all variables)
460                "common" (displays all measures of mean difference)
461        fitline_kw : dict, default=None)
462            keywords passed to self.fitline()
463        
464        Returns
465        -------
466        summary : dict
467            names and values of variables
468        '''
469
470        # List of variables
471        variables = self._expand_variables(variables)
472
473        if fitline_kw is None:
474            fitline_kw = {'method':'sma',
475                          'intercept':True}
476
477        # Construct the dict
478        summary = {}
479        for v in variables:
480            if v in ['slope','intercept']:
481                # These variables are object methods
482                func = getattr(self,v)
483                value = func(**fitline_kw)
484            else:
485                # Retrieve values
486                value = getattr(self,v.lower())
487
488            # summary += (stringformat+'='+floatformat+'\n').format(v,value)
489            summary[v] = value
490
491        return summary
492
493    def summary(self, variables=None, fitline_kw=None, 
494                floatformat='{:.4f}', stringlength=None ):
495        '''Summarize bivariate statistics
496
497        Parameters
498        ----------
499        vars : list or str, default='common'
500            names of attribute variables to include in summary
501            names are case insensitive            
502            The following strings are also accepted in place of a list 
503                "all" (displays all variables)
504                "common" (displays all measures of mean difference)
505        floatformat : str, default='{:.4f}'
506            format specifier for floating point values
507        stringlength : int, default=None
508            length of the variables on output
509            default (None) is to use the length of the longest variable name
510        fitline_kw : dict, default=None
511            keywords passed to `fitline`
512        
513        Returns
514        -------
515        summary : str
516            names and values of variables
517        '''
518        # List of variables
519        variables = self._expand_variables(variables)
520
521        if stringlength is None:
522            stringlength = np.max([len(v) for v in variables])
523        stringformat = '{:'+str(stringlength)+'s}'
524
525        # Get a dict containing the needed variables
526        summarydict = self.summary_dict( variables, fitline_kw )
527
528        # Extract length of the float numbers from floatformat
529        # import re
530        # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)",
531        #       floatformat )[0] ) ).astype(int)
532
533        # summary = (stringformat+'{:>10s}').format('Variable','Value')
534        summarytext = ''
535        for k,v in summarydict.items():
536            summarytext += (stringformat+' = '+floatformat+'\n').format(k,v)
537
538        return summarytext
539
540    def summary_fig_inset(self, ax, variables=None, fitline_kw=None,
541                          floatformat='{:.3f}',
542                          loc=None, loc_units='axes',
543                          **kwargs):
544        '''Display bivariate statistics as a table inset on a plot axis
545
546        Parameters
547        ----------
548        ax : matplotlib.Figure.Axis 
549            axis where the table will be displayed
550        variables : list or str, default='common'
551            names of attribute variables to include in summary
552            names are case insensitive            
553            The following strings are also accepted in place of a list 
554                "all" (displays all variables)
555                "common" (displays all measures of mean difference)
556        fitline_kw : dict, default=None
557            keywords passed to `fitline`
558        floatformat : str, default='{:.3f}'
559            format specifier for floating point values
560        loc : tuple (x0,y0), default=(0.85, 0.05)
561            location on the axis where the table will be drawn
562            can be in data units or axes units [0-1]
563        loc_units : {'axes' (default), 'data'}
564            specifies whether loc has 'data' units or 'axes' units [0-1]
565                    
566        Returns
567        -------
568        text1, text2 : matplotlib text object
569            Artist for the two text boxes        
570        '''
571        # List of variables
572        variables = self._expand_variables(variables)
573
574        # Default location in lower right corner
575        if loc is None:
576            loc = (0.8,0.05)
577
578        # Coordinates for loc
579        if loc_units.lower()=='data':
580            coord=ax.transData
581        elif loc_units.lower() in ['axes','axis']:
582            coord=ax.transAxes
583        else:
584            raise ValueError('Display units should be "Data" or "Axes"')
585
586        # Get a dict containing the needed variables
587        summarydict = self.summary_dict( variables, fitline_kw )
588
589        # Column of label text
590        label_text = '\n'.join([_texify_name(key) for key in summarydict])
591        # Column of value text
592        value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()])
593
594        # Check if horizontal alignment keyword is used
595        ha=''
596        try:
597            ha = kwargs['ha']
598        except KeyError:
599            pass
600        try:
601            ha = kwargs['horizontalalignment']
602        except KeyError:
603            pass
604
605        # For right alignment, align on values first
606        # Otherwise, align on labels
607        if ha=='right':
608            first_text = value_text
609            second_text = label_text
610            sign = -1
611        else:
612            first_text = label_text
613            second_text = value_text
614            sign = +1
615
616        # Add first column of text
617        t1=ax.text(loc[0],loc[1],
618                first_text,
619                transform=coord,
620                **kwargs
621                )
622
623        # Get width of first text column
624        bbox = t1.get_window_extent().transformed(coord.inverted())
625        width = bbox.x1-bbox.x0
626
627        # Add second column of text
628        t2 = ax.text(loc[0]+width*sign,loc[1],
629                     second_text,
630                     transform=coord,
631                     **kwargs
632                     )
633
634        ##################################
635        # Early version of this function using matplotlib.table.table()
636
637        # if isinstance(loc,(tuple,list)):
638        #     # Create an inset axis to contain the table
639        #     tableaxis = ax.inset_axes(loc)
640        #     table_width=1
641        # else:
642        #     tableaxis = ax
643
644        # # Display the table on the axis
645        # return mtable.table(
646        #     tableaxis,
647        #     cellText=[[floatformat.format(value)] for value in summarydict.values()],
648        #     rowLabels=[texify_name(key) for key in summarydict],
649        #     colWidths=[table_width/2]*2,
650        #     edges=edges,
651        #     loc=loc, bbox=bbox
652        #     )
653
654        return [t1,t2]

A suite of common statistics to quantify bivariate relationships

Class method 'summary' provides a formatted summary of these statistics

Attributes
  • xmean, ymean (float): mean of x and y variables
  • xmedian, ymedian (float): median of x and y variables
  • xstd, ystd (float): standard deviation of x and y variables
  • mean_difference, md (float): ymean - xmean
  • mean_absolute_difference, mad (float): mean( |y-x| )
  • relative_mean_difference, rmd (float): md / xmean
  • relative_mean_absolute_difference, rmad (float): mad / xmean
  • standardized_mean_difference, smd (float): md / xstd
  • standardized_mean_absolute_difference, smad (float): mad /xstd
  • mean_relative_difference, mrd (float): mean(y/x) - 1
  • median_difference, medd (float): median(y-x)
  • median_absolute_difference, medad (float): median(|y-x|)
  • relative_median_difference, rmedd (float): median(y-x) / xmedian
  • relative_median_absolute_difference, rmedad (float): median(|y-x|) / xmedian
  • median_relative_difference, medianrd, medrd (float): median(y/x)-1
  • normalized_mean_bias_factor, nmbf (float): see nmbf
  • normalized_mean_absolute_error_factor, nmaef (float): see nmaef
  • root_mean_square_difference, rmsd (float): $\sqrt{ \langle (y - x)^2 \rangle }$
  • covariance (float): cov(x,y)
  • correlation_pearson, correlation, pearsonr, R, r (float): Pearson linear correlation coefficient
  • correlation_spearman, spearmanr (float): Spearman, non-parametric rank correlation coefficient
  • R2, r2 (float): Linear coefficient of determination, $R^2$
BivariateStatistics(x, y, w=None, dropna=False, data=None)
207    def __init__(self,x,y,w=None,dropna=False,data=None):
208        '''Compute suite of bivariate statistics during initialization
209        
210        Statistic values are saved in attributes.
211        CAUTION: Weights w are ignored except in SMA fit
212
213        Parameters
214        ----------
215        x : ndarray or str
216            independent variable values
217        y : ndarray or str
218            dependent variable values, same size as x
219        w : ndarray or str, optional
220            weights for points (x,y), same size as x and y
221        dropna : bool, optional (default=False)
222            drops NaN values from x, y, and w
223        data : dict-like, optional
224            if x, y, or w are str, then they should be keys in data
225        '''
226
227        # Get values from data if needed
228        if data is None and (isinstance(x,str) or isinstance(y,str) or isinstance(w,str)):
229            raise ValueError( 'Data argument must be used if x, y, or w is a string')
230        if isinstance(x,str):
231            x = data[x]
232        if isinstance(y,str):
233            y = data[y]
234        if isinstance(w,str):
235            w = data[w]
236
237        #Ensure that x and y have same length
238        if len(x) != len(y):
239            raise ValueError( 'Arguments x and y must have the same length' )
240        if w is None:
241            w = np.ones_like(x)
242        if len(w) != len(x):
243            raise ValueError( 'Argument w (if present) must have the same length as x' )
244
245        # Drop NaN values
246        if dropna:
247            isna = np.isnan(x*y*w)
248            x = x[~isna]
249            y = y[~isna]
250            w = w[~isna]
251
252        diff = y - x
253        absdiff = np.abs( y - x )
254        # Ignore divide by zero and 0/0 while dividing
255        old_settings = np.seterr(divide='ignore',invalid='ignore')
256        ratio = y/x
257        np.seterr(**old_settings)
258
259        # Means, medians, and standard deviations
260        self.xmean = np.mean(x)
261        self.ymean = np.mean(y)
262        self.xmedian = np.median(x)
263        self.ymedian = np.median(y)
264        self.xstd   = np.std(x)
265        self.ystd   = np.std(y)
266
267        # Save values for use later
268        self._x = x
269        self._y = y
270        self._w = w
271
272        # Mean and mean absolute differences
273        self.mean_difference            = self.md   = self.ymean - self.xmean
274        self.mean_absolute_difference   = self.mad  = np.mean( absdiff )
275
276        # Relative and standardized differences
277        self.relative_mean_difference           = self.rmd  = self.mean_difference / self.xmean
278        self.relative_mean_absolute_difference  = self.rmad = self.mean_absolute_difference / self.xmean
279        self.standardized_mean_difference       = self.smd  = self.mean_difference / self.xstd
280        self.standardized_mean_absolute_difference  = self.smad = self.mean_absolute_difference / self.xstd
281
282        # Mean and median relative differences
283        self.mean_relative_difference   = self.mrd  = np.mean( ratio - 1 )
284        self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 )
285
286        # Median and median absolute differences
287        self.median_difference          = self.medd  = np.median( diff )
288        self.median_absolute_difference = self.medad = np.median( absdiff )
289
290        # Relative median differences
291        self.relative_median_difference          = self.rmedd  = self.median_difference / self.xmedian
292        self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian
293
294        self.normalized_mean_bias_factor            = self.nmbf  = nmbf(x,y)
295        self.normalized_mean_absolute_error_factor  = self.nmaef = nmaef(x,y)
296
297        # RMS difference
298        self.root_mean_square_difference    = self.rmsd     = np.sqrt( np.mean( np.power( diff, 2) ) )
299
300        # Covariance, correlation
301        self.covariance = np.cov(x,y)[0][1]
302        self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \
303            np.corrcoef(x,y)[0][1]
304        self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic
305        self.R2 = self.r2 = self.R**2

Compute suite of bivariate statistics during initialization

Statistic values are saved in attributes. CAUTION: Weights w are ignored except in SMA fit

Parameters
  • x (ndarray or str): independent variable values
  • y (ndarray or str): dependent variable values, same size as x
  • w (ndarray or str, optional): weights for points (x,y), same size as x and y
  • dropna (bool, optional (default=False)): drops NaN values from x, y, and w
  • data (dict-like, optional): if x, y, or w are str, then they should be keys in data
xmean
ymean
xmedian
ymedian
xstd
ystd
covariance
def fitline(self, method='sma', intercept=True, **kwargs):
311    def fitline(self,method='sma',intercept=True,**kwargs):
312        '''Compute bivariate line fit
313        
314        Parameters
315        ----------
316        method : str
317            line fitting method: sma (default), ols, wls, York, sen, siegel
318        intercept : bool
319            defines whether non-zero intercept should be fitted
320        **kwargs 
321            passed to `acgc.stats.sma` (e.g. robust=True)
322
323        Returns
324        -------
325        result : dict
326            dictionary with keys:
327            - slope (float)
328                slope of fitted line
329            - intercept (float)
330                intercept of fitted line
331            - fittedvalues (array (N,))
332                values on fit line
333            - residuals (array (N,))
334                residual from fit line
335        '''
336
337        if method.lower()=='sma':
338            fit = sma(  self._x,
339                        self._y,
340                        self._w,
341                        intercept=intercept,
342                        **kwargs)
343            slope = fit['slope']
344            intercept= fit['intercept']
345
346        elif method.lower()=='ols':
347            if intercept:
348                ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 
349                                      self._y, rcond=None )
350            else:
351                ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None )
352            slope = ols[0][0]
353            intercept = ols[0][1]
354
355        elif method.lower() in ['theil','sen','theilsen']:
356            sen = stats.theilslopes( self._y,
357                                     self._x )
358            slope = sen.slope
359            intercept = sen.intercept
360
361        elif method.lower()=='siegel':
362            siegel = stats.siegelslopes( self._x,
363                                         self._y )
364            slope = siegel.slope
365            intercept = siegel.intercept
366
367        elif method.lower()=='wls':
368            raise NotImplementedError('WLS regression not implemented yet')
369
370        elif method.lower()=='york':
371            raise NotImplementedError('York regression not implemented yet')
372
373        else:
374            raise ValueError('Undefined method '+method)
375
376        line = dict( slope          = slope,
377                     intercept      = intercept,
378                     fittedvalues   = slope * self._x + intercept,
379                     residuals      = self._y - ( slope * self._x + intercept ) )
380
381        return line

Compute bivariate line fit

Parameters
  • method (str): line fitting method: sma (default), ols, wls, York, sen, siegel
  • intercept (bool): defines whether non-zero intercept should be fitted
  • **kwargs: passed to acgc.stats.sma (e.g. robust=True)
Returns
  • result (dict): dictionary with keys:
    • slope (float) slope of fitted line
    • intercept (float) intercept of fitted line
    • fittedvalues (array (N,)) values on fit line
    • residuals (array (N,)) residual from fit line
def slope(self, method='sma', intercept=True, **kwargs):
383    def slope(self,method='sma',intercept=True,**kwargs):
384        '''Compute slope of bivariate line fit
385        
386        Parameters
387        ----------
388        method : str
389            line fitting method: sma (default), ols, wls
390        intercept : bool
391            defines whether non-zero intercept should be fitted
392        **kwargs 
393            passed to `fitline`
394
395        Returns
396        -------
397        slope : float
398            value of y intercept
399        '''
400        return self.fitline(method,intercept,**kwargs)['slope']

Compute slope of bivariate line fit

Parameters
  • method (str): line fitting method: sma (default), ols, wls
  • intercept (bool): defines whether non-zero intercept should be fitted
  • **kwargs: passed to fitline
Returns
  • slope (float): value of y intercept
def intercept(self, method='sma', intercept=True, **kwargs):
402    def intercept(self,method='sma',intercept=True,**kwargs):
403        '''Compute intercept of bivariate line fit
404        
405        Parameters
406        ----------
407        method : str
408            line fitting method: sma (default) or ols
409        intercept : bool
410            defines whether non-zero intercept should be fitted
411        **kwargs 
412            passed to `fitline`
413
414        Returns
415        -------
416        intercept : float
417            value of y intercept
418        '''
419        return self.fitline(method,intercept,**kwargs)['intercept']

Compute intercept of bivariate line fit

Parameters
  • method (str): line fitting method: sma (default) or ols
  • intercept (bool): defines whether non-zero intercept should be fitted
  • **kwargs: passed to fitline
Returns
  • intercept (float): value of y intercept
def summary_dict(self, variables=None, fitline_kw=None):
450    def summary_dict(self, variables=None, fitline_kw=None ):
451        '''Summarize bivariate statistics into a dict
452
453        Parameters
454        ----------
455        vars : list or str, default='common'
456            names of attribute variables to include in summary
457            names are case insensitive            
458            The following strings are also accepted in place of a list 
459                "all" (displays all variables)
460                "common" (displays all measures of mean difference)
461        fitline_kw : dict, default=None)
462            keywords passed to self.fitline()
463        
464        Returns
465        -------
466        summary : dict
467            names and values of variables
468        '''
469
470        # List of variables
471        variables = self._expand_variables(variables)
472
473        if fitline_kw is None:
474            fitline_kw = {'method':'sma',
475                          'intercept':True}
476
477        # Construct the dict
478        summary = {}
479        for v in variables:
480            if v in ['slope','intercept']:
481                # These variables are object methods
482                func = getattr(self,v)
483                value = func(**fitline_kw)
484            else:
485                # Retrieve values
486                value = getattr(self,v.lower())
487
488            # summary += (stringformat+'='+floatformat+'\n').format(v,value)
489            summary[v] = value
490
491        return summary

Summarize bivariate statistics into a dict

Parameters
  • vars (list or str, default='common'): names of attribute variables to include in summary names are case insensitive
    The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference)
  • fitline_kw (dict, default=None)): keywords passed to self.fitline()
Returns
  • summary (dict): names and values of variables
def summary( self, variables=None, fitline_kw=None, floatformat='{:.4f}', stringlength=None):
493    def summary(self, variables=None, fitline_kw=None, 
494                floatformat='{:.4f}', stringlength=None ):
495        '''Summarize bivariate statistics
496
497        Parameters
498        ----------
499        vars : list or str, default='common'
500            names of attribute variables to include in summary
501            names are case insensitive            
502            The following strings are also accepted in place of a list 
503                "all" (displays all variables)
504                "common" (displays all measures of mean difference)
505        floatformat : str, default='{:.4f}'
506            format specifier for floating point values
507        stringlength : int, default=None
508            length of the variables on output
509            default (None) is to use the length of the longest variable name
510        fitline_kw : dict, default=None
511            keywords passed to `fitline`
512        
513        Returns
514        -------
515        summary : str
516            names and values of variables
517        '''
518        # List of variables
519        variables = self._expand_variables(variables)
520
521        if stringlength is None:
522            stringlength = np.max([len(v) for v in variables])
523        stringformat = '{:'+str(stringlength)+'s}'
524
525        # Get a dict containing the needed variables
526        summarydict = self.summary_dict( variables, fitline_kw )
527
528        # Extract length of the float numbers from floatformat
529        # import re
530        # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)",
531        #       floatformat )[0] ) ).astype(int)
532
533        # summary = (stringformat+'{:>10s}').format('Variable','Value')
534        summarytext = ''
535        for k,v in summarydict.items():
536            summarytext += (stringformat+' = '+floatformat+'\n').format(k,v)
537
538        return summarytext

Summarize bivariate statistics

Parameters
  • vars (list or str, default='common'): names of attribute variables to include in summary names are case insensitive
    The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference)
  • floatformat : str, default='{ (.4f}'): format specifier for floating point values
  • stringlength (int, default=None): length of the variables on output default (None) is to use the length of the longest variable name
  • fitline_kw (dict, default=None): keywords passed to fitline
Returns
  • summary (str): names and values of variables
def summary_fig_inset( self, ax, variables=None, fitline_kw=None, floatformat='{:.3f}', loc=None, loc_units='axes', **kwargs):
540    def summary_fig_inset(self, ax, variables=None, fitline_kw=None,
541                          floatformat='{:.3f}',
542                          loc=None, loc_units='axes',
543                          **kwargs):
544        '''Display bivariate statistics as a table inset on a plot axis
545
546        Parameters
547        ----------
548        ax : matplotlib.Figure.Axis 
549            axis where the table will be displayed
550        variables : list or str, default='common'
551            names of attribute variables to include in summary
552            names are case insensitive            
553            The following strings are also accepted in place of a list 
554                "all" (displays all variables)
555                "common" (displays all measures of mean difference)
556        fitline_kw : dict, default=None
557            keywords passed to `fitline`
558        floatformat : str, default='{:.3f}'
559            format specifier for floating point values
560        loc : tuple (x0,y0), default=(0.85, 0.05)
561            location on the axis where the table will be drawn
562            can be in data units or axes units [0-1]
563        loc_units : {'axes' (default), 'data'}
564            specifies whether loc has 'data' units or 'axes' units [0-1]
565                    
566        Returns
567        -------
568        text1, text2 : matplotlib text object
569            Artist for the two text boxes        
570        '''
571        # List of variables
572        variables = self._expand_variables(variables)
573
574        # Default location in lower right corner
575        if loc is None:
576            loc = (0.8,0.05)
577
578        # Coordinates for loc
579        if loc_units.lower()=='data':
580            coord=ax.transData
581        elif loc_units.lower() in ['axes','axis']:
582            coord=ax.transAxes
583        else:
584            raise ValueError('Display units should be "Data" or "Axes"')
585
586        # Get a dict containing the needed variables
587        summarydict = self.summary_dict( variables, fitline_kw )
588
589        # Column of label text
590        label_text = '\n'.join([_texify_name(key) for key in summarydict])
591        # Column of value text
592        value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()])
593
594        # Check if horizontal alignment keyword is used
595        ha=''
596        try:
597            ha = kwargs['ha']
598        except KeyError:
599            pass
600        try:
601            ha = kwargs['horizontalalignment']
602        except KeyError:
603            pass
604
605        # For right alignment, align on values first
606        # Otherwise, align on labels
607        if ha=='right':
608            first_text = value_text
609            second_text = label_text
610            sign = -1
611        else:
612            first_text = label_text
613            second_text = value_text
614            sign = +1
615
616        # Add first column of text
617        t1=ax.text(loc[0],loc[1],
618                first_text,
619                transform=coord,
620                **kwargs
621                )
622
623        # Get width of first text column
624        bbox = t1.get_window_extent().transformed(coord.inverted())
625        width = bbox.x1-bbox.x0
626
627        # Add second column of text
628        t2 = ax.text(loc[0]+width*sign,loc[1],
629                     second_text,
630                     transform=coord,
631                     **kwargs
632                     )
633
634        ##################################
635        # Early version of this function using matplotlib.table.table()
636
637        # if isinstance(loc,(tuple,list)):
638        #     # Create an inset axis to contain the table
639        #     tableaxis = ax.inset_axes(loc)
640        #     table_width=1
641        # else:
642        #     tableaxis = ax
643
644        # # Display the table on the axis
645        # return mtable.table(
646        #     tableaxis,
647        #     cellText=[[floatformat.format(value)] for value in summarydict.values()],
648        #     rowLabels=[texify_name(key) for key in summarydict],
649        #     colWidths=[table_width/2]*2,
650        #     edges=edges,
651        #     loc=loc, bbox=bbox
652        #     )
653
654        return [t1,t2]

Display bivariate statistics as a table inset on a plot axis

Parameters
  • ax (matplotlib.Figure.Axis): axis where the table will be displayed
  • variables (list or str, default='common'): names of attribute variables to include in summary names are case insensitive
    The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference)
  • fitline_kw (dict, default=None): keywords passed to fitline
  • floatformat : str, default='{ (.3f}'): format specifier for floating point values
  • loc (tuple (x0,y0), default=(0.85, 0.05)): location on the axis where the table will be drawn can be in data units or axes units [0-1]
  • loc_units ({'axes' (default), 'data'}): specifies whether loc has 'data' units or 'axes' units [0-1]
Returns
  • text1, text2 (matplotlib text object): Artist for the two text boxes
def nmb(x0, x1):
22def nmb( x0, x1 ):
23    '''Compute Normalized Mean Bias (NMB)
24
25    NMB = ( mean(x1) - mean(x0) ) / mean(x0)
26
27    Parameters
28    ----------
29    x0 : array_like
30        reference values
31    x1 : array_like
32        experiment values
33    '''
34
35    assert (len(x0) == len(x1)), \
36        "Parameters x0 and x1 must have the same length"
37
38    # Mean values
39    x0_mean = np.mean(x0)
40    x1_mean = np.mean(x1)
41
42    # Metric value
43    return x1_mean / x0_mean - 1

Compute Normalized Mean Bias (NMB)

NMB = ( mean(x1) - mean(x0) ) / mean(x0)

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values
def nmae(x0, x1):
45def nmae( x0, x1 ):
46    '''Compute Normalized Mean Absolute Error (NMAE)
47
48    NMAE = mean(abs(x1 - x0)) / abs(mean(x0))
49
50    Parameters
51    ---------
52    x0 : array_like
53        reference values
54    x1 : array_like
55        experiment values
56    '''
57
58     # Mean values
59    x0_mean = np.mean(x0)
60
61    # Mean absolute difference
62    abs_diff = np.mean( np.abs(x1 - x0) )
63
64    # Metric value
65    return abs_diff / np.abs( x0_mean )

Compute Normalized Mean Absolute Error (NMAE)

NMAE = mean(abs(x1 - x0)) / abs(mean(x0))

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values
def nmbf(x0, x1):
68def nmbf( x0, x1 ):
69    '''Compute Normalized Mean Bias Factor (NMBF)
70
71    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
72
73    Parameters
74    ----------
75    x0 : array_like
76        reference values
77    x1 : array_like
78        experiment values
79    '''
80
81    # Ensure that arguments have the same length
82    assert (len(x0) == len(x1)), \
83        "Parameters x0 and x1 must have the same length"
84
85    # Mean values
86    x0_mean = np.mean(x0)
87    x1_mean = np.mean(x1)
88
89    # Metric value
90    if x1_mean >= x0_mean:
91        result = x1_mean / x0_mean - 1
92    else:
93        result= 1 - x0_mean / x1_mean
94    # Equivalent (faster?) implementation
95    #S = (mMean - oMean) / np.abs(mMean - oMean)
96    #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 )
97
98    return result

Compute Normalized Mean Bias Factor (NMBF)

Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values
def nmaef(x0, x1):
100def nmaef( x0, x1 ):
101    '''Compute Normalized Mean Absolute Error Factor (NMAEF)
102
103    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
104    
105    Parameters
106    ----------
107    x0 : array_like
108        reference values
109    x1 : array_like
110        experiment values
111    '''
112
113    # Ensure that arguments have the same length
114    assert (len(x0) == len(x1)), \
115        "Parameters x0 and x1 must have the same length"
116
117    # Mean values
118    x0_mean = np.mean(x0)
119    x1_mean = np.mean(x1)
120
121    # Mean absolute difference
122    abs_diff = np.mean( np.abs(x1 - x0))
123
124    # Metric value
125    if x1_mean >= x0_mean:
126        result = abs_diff / x0_mean 
127    else:
128        result = abs_diff / x1_mean
129    # Equivalent (faster?) implementation
130    #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean)
131    #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) )
132
133    return result

Compute Normalized Mean Absolute Error Factor (NMAEF)

Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values