acgc.stats.bivariate
Bivariate statistics
Statistical measures of relationships between two populations
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3""" Bivariate statistics 4 5Statistical measures of relationships between two populations 6""" 7 8import numpy as np 9from scipy import stats 10from .bivariate_lines import sma 11# import xarray as xr 12 13__all__ = [ 14 "BivariateStatistics", 15 "nmb", 16 "nmae", 17 "nmbf", 18 "nmaef" 19] 20 21def nmb( x0, x1 ): 22 '''Compute Normalized Mean Bias (NMB) 23 24 NMB = ( mean(x1) - mean(x0) ) / mean(x0) 25 26 Parameters 27 ---------- 28 x0 : array_like 29 reference values 30 x1 : array_like 31 experiment values 32 ''' 33 34 assert (len(x0) == len(x1)), \ 35 "Parameters x0 and x1 must have the same length" 36 37 # Mean values 38 x0_mean = np.mean(x0) 39 x1_mean = np.mean(x1) 40 41 # Metric value 42 return x1_mean / x0_mean - 1 43 44def nmae( x0, x1 ): 45 '''Compute Normalized Mean Absolute Error (NMAE) 46 47 NMAE = mean(abs(x1 - x0)) / abs(mean(x0)) 48 49 Parameters 50 --------- 51 x0 : array_like 52 reference values 53 x1 : array_like 54 experiment values 55 ''' 56 57 # Mean values 58 x0_mean = np.mean(x0) 59 60 # Mean absolute difference 61 abs_diff = np.mean( np.abs(x1 - x0) ) 62 63 # Metric value 64 return abs_diff / np.abs( x0_mean ) 65 66 67def nmbf( x0, x1 ): 68 '''Compute Normalized Mean Bias Factor (NMBF) 69 70 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 71 72 Parameters 73 ---------- 74 x0 : array_like 75 reference values 76 x1 : array_like 77 experiment values 78 ''' 79 80 # Ensure that arguments have the same length 81 assert (len(x0) == len(x1)), \ 82 "Parameters x0 and x1 must have the same length" 83 84 # Mean values 85 x0_mean = np.mean(x0) 86 x1_mean = np.mean(x1) 87 88 # Metric value 89 if x1_mean >= x0_mean: 90 result = x1_mean / x0_mean - 1 91 else: 92 result= 1 - x0_mean / x1_mean 93 # Equivalent (faster?) implementation 94 #S = (mMean - oMean) / np.abs(mMean - oMean) 95 #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 ) 96 97 return result 98 99def nmaef( x0, x1 ): 100 '''Compute Normalized Mean Absolute Error Factor (NMAEF) 101 102 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 103 104 Parameters 105 ---------- 106 x0 : array_like 107 reference values 108 x1 : array_like 109 experiment values 110 ''' 111 112 # Ensure that arguments have the same length 113 assert (len(x0) == len(x1)), \ 114 "Parameters x0 and x1 must have the same length" 115 116 # Mean values 117 x0_mean = np.mean(x0) 118 x1_mean = np.mean(x1) 119 120 # Mean absolute difference 121 abs_diff = np.mean( np.abs(x1 - x0)) 122 123 # Metric value 124 if x1_mean >= x0_mean: 125 result = abs_diff / x0_mean 126 else: 127 result = abs_diff / x1_mean 128 # Equivalent (faster?) implementation 129 #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean) 130 #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) ) 131 132 return result 133 134def _texify_name(name): 135 '''Return a LaTex formatted string for some variables 136 137 Parameter 138 --------- 139 name : str 140 141 Returns 142 ------- 143 pretty_name : str 144 ''' 145 if name=='R2': 146 pretty_name = f'$R^2$' 147 elif name=='r2': 148 pretty_name = f'$r^2$' 149 else: 150 pretty_name = name 151 return pretty_name 152 153class BivariateStatistics: 154 '''A suite of common statistics to quantify bivariate relationships 155 156 Class method 'summary' provides a formatted summary of these statistics 157 158 Attributes 159 ---------- 160 xmean, ymean : float 161 mean of x and y variables 162 xmedian, ymedian :float 163 median of x and y variables 164 xstd, ystd : float 165 standard deviation of x and y variables 166 mean_difference, md : float 167 ymean - xmean 168 mean_absolute_difference, mad : float 169 mean( |y-x| ) 170 relative_mean_difference, rmd : float 171 md / xmean 172 relative_mean_absolute_difference, rmad :float 173 mad / xmean 174 standardized_mean_difference, smd : float 175 md / xstd 176 standardized_mean_absolute_difference, smad : float 177 mad /xstd 178 mean_relative_difference, mrd : float 179 mean(y/x) - 1 180 median_difference, medd : float 181 median(y-x) 182 median_absolute_difference, medad : float 183 median(|y-x|) 184 relative_median_difference, rmedd : float 185 median(y-x) / xmedian 186 relative_median_absolute_difference, rmedad : float 187 median(|y-x|) / xmedian 188 median_relative_difference, medianrd, medrd : float 189 median(y/x)-1 190 normalized_mean_bias_factor, nmbf : float 191 see `nmbf` 192 normalized_mean_absolute_error_factor, nmaef : float 193 see `nmaef` 194 root_mean_square_difference, rmsd : float 195 $\\sqrt{ \\langle (y - x)^2 \\rangle }$ 196 covariance : float 197 cov(x,y) 198 correlation_pearson, correlation, pearsonr, R, r : float 199 Pearson linear correlation coefficient 200 correlation_spearman, spearmanr : float 201 Spearman, non-parametric rank correlation coefficient 202 R2, r2 : float 203 Linear coefficient of determination, $R^2$ 204 ''' 205 206 def __init__(self,x,y,w=None,dropna=False,data=None): 207 '''Compute suite of bivariate statistics during initialization 208 209 Statistic values are saved in attributes. 210 CAUTION: Weights w are ignored except in SMA fit 211 212 Parameters 213 ---------- 214 x : ndarray or str 215 independent variable values 216 y : ndarray or str 217 dependent variable values, same size as x 218 w : ndarray or str, optional 219 weights for points (x,y), same size as x and y 220 dropna : bool, optional (default=False) 221 drops NaN values from x, y, and w 222 data : dict-like, optional 223 if x, y, or w are str, then they should be keys in data 224 ''' 225 226 # Get values from data if needed 227 if data is None and (isinstance(x,str) or isinstance(y,str) or isinstance(w,str)): 228 raise ValueError( 'Data argument must be used if x, y, or w is a string') 229 if isinstance(x,str): 230 x = data[x] 231 if isinstance(y,str): 232 y = data[y] 233 if isinstance(w,str): 234 w = data[w] 235 236 #Ensure that x and y have same length 237 if len(x) != len(y): 238 raise ValueError( 'Arguments x and y must have the same length' ) 239 if w is None: 240 w = np.ones_like(x) 241 if len(w) != len(x): 242 raise ValueError( 'Argument w (if present) must have the same length as x' ) 243 244 # Drop NaN values 245 if dropna: 246 isna = np.isnan(x*y*w) 247 x = x[~isna] 248 y = y[~isna] 249 w = w[~isna] 250 251 diff = y - x 252 absdiff = np.abs( y - x ) 253 # Ignore divide by zero and 0/0 while dividing 254 old_settings = np.seterr(divide='ignore',invalid='ignore') 255 ratio = y/x 256 np.seterr(**old_settings) 257 258 # Means, medians, and standard deviations 259 self.xmean = np.mean(x) 260 self.ymean = np.mean(y) 261 self.xmedian = np.median(x) 262 self.ymedian = np.median(y) 263 self.xstd = np.std(x) 264 self.ystd = np.std(y) 265 266 # Save values for use later 267 self._x = x 268 self._y = y 269 self._w = w 270 271 # Mean and mean absolute differences 272 self.mean_difference = self.md = self.ymean - self.xmean 273 self.mean_absolute_difference = self.mad = np.mean( absdiff ) 274 275 # Relative and standardized differences 276 self.relative_mean_difference = self.rmd = self.mean_difference / self.xmean 277 self.relative_mean_absolute_difference = self.rmad = self.mean_absolute_difference / self.xmean 278 self.standardized_mean_difference = self.smd = self.mean_difference / self.xstd 279 self.standardized_mean_absolute_difference = self.smad = self.mean_absolute_difference / self.xstd 280 281 # Mean and median relative differences 282 self.mean_relative_difference = self.mrd = np.mean( ratio - 1 ) 283 self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 ) 284 285 # Median and median absolute differences 286 self.median_difference = self.medd = np.median( diff ) 287 self.median_absolute_difference = self.medad = np.median( absdiff ) 288 289 # Relative median differences 290 self.relative_median_difference = self.rmedd = self.median_difference / self.xmedian 291 self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian 292 293 self.normalized_mean_bias_factor = self.nmbf = nmbf(x,y) 294 self.normalized_mean_absolute_error_factor = self.nmaef = nmaef(x,y) 295 296 # RMS difference 297 self.root_mean_square_difference = self.rmsd = np.sqrt( np.mean( np.power( diff, 2) ) ) 298 299 # Covariance, correlation 300 self.covariance = np.cov(x,y)[0][1] 301 self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \ 302 np.corrcoef(x,y)[0][1] 303 self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic 304 self.R2 = self.r2 = self.R**2 305 306 def __getitem__(self,key): 307 '''Accesses attribute values via object['key']''' 308 return getattr(self,key) 309 310 def fitline(self,method='sma',intercept=True,**kwargs): 311 '''Compute bivariate line fit 312 313 Parameters 314 ---------- 315 method : str 316 line fitting method: sma (default), ols, wls, York, sen, siegel 317 intercept : bool 318 defines whether non-zero intercept should be fitted 319 **kwargs 320 passed to `acgc.stats.sma` (e.g. robust=True) 321 322 Returns 323 ------- 324 result : dict 325 dictionary with keys: 326 - slope (float) 327 slope of fitted line 328 - intercept (float) 329 intercept of fitted line 330 - fittedvalues (array (N,)) 331 values on fit line 332 - residuals (array (N,)) 333 residual from fit line 334 ''' 335 336 if method.lower()=='sma': 337 fit = sma( self._x, 338 self._y, 339 self._w, 340 intercept=intercept, 341 **kwargs) 342 slope = fit['slope'] 343 intercept= fit['intercept'] 344 345 elif method.lower()=='ols': 346 if intercept: 347 ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 348 self._y, rcond=None ) 349 else: 350 ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None ) 351 slope = ols[0][0] 352 intercept = ols[0][1] 353 354 elif method.lower() in ['theil','sen','theilsen']: 355 sen = stats.theilslopes( self._y, 356 self._x ) 357 slope = sen.slope 358 intercept = sen.intercept 359 360 elif method.lower()=='siegel': 361 siegel = stats.siegelslopes( self._x, 362 self._y ) 363 slope = siegel.slope 364 intercept = siegel.intercept 365 366 elif method.lower()=='wls': 367 raise NotImplementedError('WLS regression not implemented yet') 368 369 elif method.lower()=='york': 370 raise NotImplementedError('York regression not implemented yet') 371 372 else: 373 raise ValueError('Undefined method '+method) 374 375 line = dict( slope = slope, 376 intercept = intercept, 377 fittedvalues = slope * self._x + intercept, 378 residuals = self._y - ( slope * self._x + intercept ) ) 379 380 return line 381 382 def slope(self,method='sma',intercept=True,**kwargs): 383 '''Compute slope of bivariate line fit 384 385 Parameters 386 ---------- 387 method : str 388 line fitting method: sma (default), ols, wls 389 intercept : bool 390 defines whether non-zero intercept should be fitted 391 **kwargs 392 passed to `fitline` 393 394 Returns 395 ------- 396 slope : float 397 value of y intercept 398 ''' 399 return self.fitline(method,intercept,**kwargs)['slope'] 400 401 def intercept(self,method='sma',intercept=True,**kwargs): 402 '''Compute intercept of bivariate line fit 403 404 Parameters 405 ---------- 406 method : str 407 line fitting method: sma (default) or ols 408 intercept : bool 409 defines whether non-zero intercept should be fitted 410 **kwargs 411 passed to `fitline` 412 413 Returns 414 ------- 415 intercept : float 416 value of y intercept 417 ''' 418 return self.fitline(method,intercept,**kwargs)['intercept'] 419 420 def _expand_variables(self,variables): 421 '''Expand special strings into a list of variables 422 423 Parameter 424 --------- 425 variables : list or str, default='common' 426 Special strings ("all","common") will be expanded to a list of variables 427 list arguments will not be modified 428 429 Returns 430 ------- 431 list 432 variable names 433 ''' 434 if variables is None: 435 variables='common' 436 if variables=='all': 437 variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD', 438 'MedD','MedAD','RMedD','RMedAD','MedRD', 439 'NMBF','NMAEF','RMSD', 440 'R','R2','spearmanr','slope','intercept'] 441 elif variables=='common': 442 variables=['MD','MAD','RMD','RMAD','MRD','R2','slope'] 443 if not isinstance(variables,list): 444 raise ValueError( 445 'variables must be a list, None, or one of these strings: "all","common"') 446 447 return variables 448 449 def summary_dict(self, variables=None, fitline_kw=None ): 450 '''Summarize bivariate statistics into a dict 451 452 Parameters 453 ---------- 454 vars : list or str, default='common' 455 names of attribute variables to include in summary 456 names are case insensitive 457 The following strings are also accepted in place of a list 458 "all" (displays all variables) 459 "common" (displays all measures of mean difference) 460 fitline_kw : dict, default=None) 461 keywords passed to self.fitline() 462 463 Returns 464 ------- 465 summary : dict 466 names and values of variables 467 ''' 468 469 # List of variables 470 variables = self._expand_variables(variables) 471 472 if fitline_kw is None: 473 fitline_kw = {'method':'sma', 474 'intercept':True} 475 476 # Construct the dict 477 summary = {} 478 for v in variables: 479 if v in ['slope','intercept']: 480 # These variables are object methods 481 func = getattr(self,v) 482 value = func(**fitline_kw) 483 else: 484 # Retrieve values 485 value = getattr(self,v.lower()) 486 487 # summary += (stringformat+'='+floatformat+'\n').format(v,value) 488 summary[v] = value 489 490 return summary 491 492 def summary(self, variables=None, fitline_kw=None, 493 floatformat='{:.4f}', stringlength=None ): 494 '''Summarize bivariate statistics 495 496 Parameters 497 ---------- 498 vars : list or str, default='common' 499 names of attribute variables to include in summary 500 names are case insensitive 501 The following strings are also accepted in place of a list 502 "all" (displays all variables) 503 "common" (displays all measures of mean difference) 504 floatformat : str, default='{:.4f}' 505 format specifier for floating point values 506 stringlength : int, default=None 507 length of the variables on output 508 default (None) is to use the length of the longest variable name 509 fitline_kw : dict, default=None 510 keywords passed to `fitline` 511 512 Returns 513 ------- 514 summary : str 515 names and values of variables 516 ''' 517 # List of variables 518 variables = self._expand_variables(variables) 519 520 if stringlength is None: 521 stringlength = np.max([len(v) for v in variables]) 522 stringformat = '{:'+str(stringlength)+'s}' 523 524 # Get a dict containing the needed variables 525 summarydict = self.summary_dict( variables, fitline_kw ) 526 527 # Extract length of the float numbers from floatformat 528 # import re 529 # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)", 530 # floatformat )[0] ) ).astype(int) 531 532 # summary = (stringformat+'{:>10s}').format('Variable','Value') 533 summarytext = '' 534 for k,v in summarydict.items(): 535 summarytext += (stringformat+' = '+floatformat+'\n').format(k,v) 536 537 return summarytext 538 539 def summary_fig_inset(self, ax, variables=None, fitline_kw=None, 540 floatformat='{:.3f}', 541 loc=None, loc_units='axes', 542 **kwargs): 543 '''Display bivariate statistics as a table inset on a plot axis 544 545 Parameters 546 ---------- 547 ax : matplotlib.Figure.Axis 548 axis where the table will be displayed 549 variables : list or str, default='common' 550 names of attribute variables to include in summary 551 names are case insensitive 552 The following strings are also accepted in place of a list 553 "all" (displays all variables) 554 "common" (displays all measures of mean difference) 555 fitline_kw : dict, default=None 556 keywords passed to `fitline` 557 floatformat : str, default='{:.3f}' 558 format specifier for floating point values 559 loc : tuple (x0,y0), default=(0.85, 0.05) 560 location on the axis where the table will be drawn 561 can be in data units or axes units [0-1] 562 loc_units : {'axes' (default), 'data'} 563 specifies whether loc has 'data' units or 'axes' units [0-1] 564 565 Returns 566 ------- 567 text1, text2 : matplotlib text object 568 Artist for the two text boxes 569 ''' 570 # List of variables 571 variables = self._expand_variables(variables) 572 573 # Default location in lower right corner 574 if loc is None: 575 loc = (0.8,0.05) 576 577 # Coordinates for loc 578 if loc_units.lower()=='data': 579 coord=ax.transData 580 elif loc_units.lower() in ['axes','axis']: 581 coord=ax.transAxes 582 else: 583 raise ValueError('Display units should be "Data" or "Axes"') 584 585 # Get a dict containing the needed variables 586 summarydict = self.summary_dict( variables, fitline_kw ) 587 588 # Column of label text 589 label_text = '\n'.join([_texify_name(key) for key in summarydict]) 590 # Column of value text 591 value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()]) 592 593 # Check if horizontal alignment keyword is used 594 ha='' 595 try: 596 ha = kwargs['ha'] 597 except KeyError: 598 pass 599 try: 600 ha = kwargs['horizontalalignment'] 601 except KeyError: 602 pass 603 604 # For right alignment, align on values first 605 # Otherwise, align on labels 606 if ha=='right': 607 first_text = value_text 608 second_text = label_text 609 sign = -1 610 else: 611 first_text = label_text 612 second_text = value_text 613 sign = +1 614 615 # Add first column of text 616 t1=ax.text(loc[0],loc[1], 617 first_text, 618 transform=coord, 619 **kwargs 620 ) 621 622 # Get width of first text column 623 bbox = t1.get_window_extent().transformed(coord.inverted()) 624 width = bbox.x1-bbox.x0 625 626 # Add second column of text 627 t2 = ax.text(loc[0]+width*sign,loc[1], 628 second_text, 629 transform=coord, 630 **kwargs 631 ) 632 633 ################################## 634 # Early version of this function using matplotlib.table.table() 635 636 # if isinstance(loc,(tuple,list)): 637 # # Create an inset axis to contain the table 638 # tableaxis = ax.inset_axes(loc) 639 # table_width=1 640 # else: 641 # tableaxis = ax 642 643 # # Display the table on the axis 644 # return mtable.table( 645 # tableaxis, 646 # cellText=[[floatformat.format(value)] for value in summarydict.values()], 647 # rowLabels=[texify_name(key) for key in summarydict], 648 # colWidths=[table_width/2]*2, 649 # edges=edges, 650 # loc=loc, bbox=bbox 651 # ) 652 653 return [t1,t2]
154class BivariateStatistics: 155 '''A suite of common statistics to quantify bivariate relationships 156 157 Class method 'summary' provides a formatted summary of these statistics 158 159 Attributes 160 ---------- 161 xmean, ymean : float 162 mean of x and y variables 163 xmedian, ymedian :float 164 median of x and y variables 165 xstd, ystd : float 166 standard deviation of x and y variables 167 mean_difference, md : float 168 ymean - xmean 169 mean_absolute_difference, mad : float 170 mean( |y-x| ) 171 relative_mean_difference, rmd : float 172 md / xmean 173 relative_mean_absolute_difference, rmad :float 174 mad / xmean 175 standardized_mean_difference, smd : float 176 md / xstd 177 standardized_mean_absolute_difference, smad : float 178 mad /xstd 179 mean_relative_difference, mrd : float 180 mean(y/x) - 1 181 median_difference, medd : float 182 median(y-x) 183 median_absolute_difference, medad : float 184 median(|y-x|) 185 relative_median_difference, rmedd : float 186 median(y-x) / xmedian 187 relative_median_absolute_difference, rmedad : float 188 median(|y-x|) / xmedian 189 median_relative_difference, medianrd, medrd : float 190 median(y/x)-1 191 normalized_mean_bias_factor, nmbf : float 192 see `nmbf` 193 normalized_mean_absolute_error_factor, nmaef : float 194 see `nmaef` 195 root_mean_square_difference, rmsd : float 196 $\\sqrt{ \\langle (y - x)^2 \\rangle }$ 197 covariance : float 198 cov(x,y) 199 correlation_pearson, correlation, pearsonr, R, r : float 200 Pearson linear correlation coefficient 201 correlation_spearman, spearmanr : float 202 Spearman, non-parametric rank correlation coefficient 203 R2, r2 : float 204 Linear coefficient of determination, $R^2$ 205 ''' 206 207 def __init__(self,x,y,w=None,dropna=False,data=None): 208 '''Compute suite of bivariate statistics during initialization 209 210 Statistic values are saved in attributes. 211 CAUTION: Weights w are ignored except in SMA fit 212 213 Parameters 214 ---------- 215 x : ndarray or str 216 independent variable values 217 y : ndarray or str 218 dependent variable values, same size as x 219 w : ndarray or str, optional 220 weights for points (x,y), same size as x and y 221 dropna : bool, optional (default=False) 222 drops NaN values from x, y, and w 223 data : dict-like, optional 224 if x, y, or w are str, then they should be keys in data 225 ''' 226 227 # Get values from data if needed 228 if data is None and (isinstance(x,str) or isinstance(y,str) or isinstance(w,str)): 229 raise ValueError( 'Data argument must be used if x, y, or w is a string') 230 if isinstance(x,str): 231 x = data[x] 232 if isinstance(y,str): 233 y = data[y] 234 if isinstance(w,str): 235 w = data[w] 236 237 #Ensure that x and y have same length 238 if len(x) != len(y): 239 raise ValueError( 'Arguments x and y must have the same length' ) 240 if w is None: 241 w = np.ones_like(x) 242 if len(w) != len(x): 243 raise ValueError( 'Argument w (if present) must have the same length as x' ) 244 245 # Drop NaN values 246 if dropna: 247 isna = np.isnan(x*y*w) 248 x = x[~isna] 249 y = y[~isna] 250 w = w[~isna] 251 252 diff = y - x 253 absdiff = np.abs( y - x ) 254 # Ignore divide by zero and 0/0 while dividing 255 old_settings = np.seterr(divide='ignore',invalid='ignore') 256 ratio = y/x 257 np.seterr(**old_settings) 258 259 # Means, medians, and standard deviations 260 self.xmean = np.mean(x) 261 self.ymean = np.mean(y) 262 self.xmedian = np.median(x) 263 self.ymedian = np.median(y) 264 self.xstd = np.std(x) 265 self.ystd = np.std(y) 266 267 # Save values for use later 268 self._x = x 269 self._y = y 270 self._w = w 271 272 # Mean and mean absolute differences 273 self.mean_difference = self.md = self.ymean - self.xmean 274 self.mean_absolute_difference = self.mad = np.mean( absdiff ) 275 276 # Relative and standardized differences 277 self.relative_mean_difference = self.rmd = self.mean_difference / self.xmean 278 self.relative_mean_absolute_difference = self.rmad = self.mean_absolute_difference / self.xmean 279 self.standardized_mean_difference = self.smd = self.mean_difference / self.xstd 280 self.standardized_mean_absolute_difference = self.smad = self.mean_absolute_difference / self.xstd 281 282 # Mean and median relative differences 283 self.mean_relative_difference = self.mrd = np.mean( ratio - 1 ) 284 self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 ) 285 286 # Median and median absolute differences 287 self.median_difference = self.medd = np.median( diff ) 288 self.median_absolute_difference = self.medad = np.median( absdiff ) 289 290 # Relative median differences 291 self.relative_median_difference = self.rmedd = self.median_difference / self.xmedian 292 self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian 293 294 self.normalized_mean_bias_factor = self.nmbf = nmbf(x,y) 295 self.normalized_mean_absolute_error_factor = self.nmaef = nmaef(x,y) 296 297 # RMS difference 298 self.root_mean_square_difference = self.rmsd = np.sqrt( np.mean( np.power( diff, 2) ) ) 299 300 # Covariance, correlation 301 self.covariance = np.cov(x,y)[0][1] 302 self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \ 303 np.corrcoef(x,y)[0][1] 304 self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic 305 self.R2 = self.r2 = self.R**2 306 307 def __getitem__(self,key): 308 '''Accesses attribute values via object['key']''' 309 return getattr(self,key) 310 311 def fitline(self,method='sma',intercept=True,**kwargs): 312 '''Compute bivariate line fit 313 314 Parameters 315 ---------- 316 method : str 317 line fitting method: sma (default), ols, wls, York, sen, siegel 318 intercept : bool 319 defines whether non-zero intercept should be fitted 320 **kwargs 321 passed to `acgc.stats.sma` (e.g. robust=True) 322 323 Returns 324 ------- 325 result : dict 326 dictionary with keys: 327 - slope (float) 328 slope of fitted line 329 - intercept (float) 330 intercept of fitted line 331 - fittedvalues (array (N,)) 332 values on fit line 333 - residuals (array (N,)) 334 residual from fit line 335 ''' 336 337 if method.lower()=='sma': 338 fit = sma( self._x, 339 self._y, 340 self._w, 341 intercept=intercept, 342 **kwargs) 343 slope = fit['slope'] 344 intercept= fit['intercept'] 345 346 elif method.lower()=='ols': 347 if intercept: 348 ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 349 self._y, rcond=None ) 350 else: 351 ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None ) 352 slope = ols[0][0] 353 intercept = ols[0][1] 354 355 elif method.lower() in ['theil','sen','theilsen']: 356 sen = stats.theilslopes( self._y, 357 self._x ) 358 slope = sen.slope 359 intercept = sen.intercept 360 361 elif method.lower()=='siegel': 362 siegel = stats.siegelslopes( self._x, 363 self._y ) 364 slope = siegel.slope 365 intercept = siegel.intercept 366 367 elif method.lower()=='wls': 368 raise NotImplementedError('WLS regression not implemented yet') 369 370 elif method.lower()=='york': 371 raise NotImplementedError('York regression not implemented yet') 372 373 else: 374 raise ValueError('Undefined method '+method) 375 376 line = dict( slope = slope, 377 intercept = intercept, 378 fittedvalues = slope * self._x + intercept, 379 residuals = self._y - ( slope * self._x + intercept ) ) 380 381 return line 382 383 def slope(self,method='sma',intercept=True,**kwargs): 384 '''Compute slope of bivariate line fit 385 386 Parameters 387 ---------- 388 method : str 389 line fitting method: sma (default), ols, wls 390 intercept : bool 391 defines whether non-zero intercept should be fitted 392 **kwargs 393 passed to `fitline` 394 395 Returns 396 ------- 397 slope : float 398 value of y intercept 399 ''' 400 return self.fitline(method,intercept,**kwargs)['slope'] 401 402 def intercept(self,method='sma',intercept=True,**kwargs): 403 '''Compute intercept of bivariate line fit 404 405 Parameters 406 ---------- 407 method : str 408 line fitting method: sma (default) or ols 409 intercept : bool 410 defines whether non-zero intercept should be fitted 411 **kwargs 412 passed to `fitline` 413 414 Returns 415 ------- 416 intercept : float 417 value of y intercept 418 ''' 419 return self.fitline(method,intercept,**kwargs)['intercept'] 420 421 def _expand_variables(self,variables): 422 '''Expand special strings into a list of variables 423 424 Parameter 425 --------- 426 variables : list or str, default='common' 427 Special strings ("all","common") will be expanded to a list of variables 428 list arguments will not be modified 429 430 Returns 431 ------- 432 list 433 variable names 434 ''' 435 if variables is None: 436 variables='common' 437 if variables=='all': 438 variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD', 439 'MedD','MedAD','RMedD','RMedAD','MedRD', 440 'NMBF','NMAEF','RMSD', 441 'R','R2','spearmanr','slope','intercept'] 442 elif variables=='common': 443 variables=['MD','MAD','RMD','RMAD','MRD','R2','slope'] 444 if not isinstance(variables,list): 445 raise ValueError( 446 'variables must be a list, None, or one of these strings: "all","common"') 447 448 return variables 449 450 def summary_dict(self, variables=None, fitline_kw=None ): 451 '''Summarize bivariate statistics into a dict 452 453 Parameters 454 ---------- 455 vars : list or str, default='common' 456 names of attribute variables to include in summary 457 names are case insensitive 458 The following strings are also accepted in place of a list 459 "all" (displays all variables) 460 "common" (displays all measures of mean difference) 461 fitline_kw : dict, default=None) 462 keywords passed to self.fitline() 463 464 Returns 465 ------- 466 summary : dict 467 names and values of variables 468 ''' 469 470 # List of variables 471 variables = self._expand_variables(variables) 472 473 if fitline_kw is None: 474 fitline_kw = {'method':'sma', 475 'intercept':True} 476 477 # Construct the dict 478 summary = {} 479 for v in variables: 480 if v in ['slope','intercept']: 481 # These variables are object methods 482 func = getattr(self,v) 483 value = func(**fitline_kw) 484 else: 485 # Retrieve values 486 value = getattr(self,v.lower()) 487 488 # summary += (stringformat+'='+floatformat+'\n').format(v,value) 489 summary[v] = value 490 491 return summary 492 493 def summary(self, variables=None, fitline_kw=None, 494 floatformat='{:.4f}', stringlength=None ): 495 '''Summarize bivariate statistics 496 497 Parameters 498 ---------- 499 vars : list or str, default='common' 500 names of attribute variables to include in summary 501 names are case insensitive 502 The following strings are also accepted in place of a list 503 "all" (displays all variables) 504 "common" (displays all measures of mean difference) 505 floatformat : str, default='{:.4f}' 506 format specifier for floating point values 507 stringlength : int, default=None 508 length of the variables on output 509 default (None) is to use the length of the longest variable name 510 fitline_kw : dict, default=None 511 keywords passed to `fitline` 512 513 Returns 514 ------- 515 summary : str 516 names and values of variables 517 ''' 518 # List of variables 519 variables = self._expand_variables(variables) 520 521 if stringlength is None: 522 stringlength = np.max([len(v) for v in variables]) 523 stringformat = '{:'+str(stringlength)+'s}' 524 525 # Get a dict containing the needed variables 526 summarydict = self.summary_dict( variables, fitline_kw ) 527 528 # Extract length of the float numbers from floatformat 529 # import re 530 # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)", 531 # floatformat )[0] ) ).astype(int) 532 533 # summary = (stringformat+'{:>10s}').format('Variable','Value') 534 summarytext = '' 535 for k,v in summarydict.items(): 536 summarytext += (stringformat+' = '+floatformat+'\n').format(k,v) 537 538 return summarytext 539 540 def summary_fig_inset(self, ax, variables=None, fitline_kw=None, 541 floatformat='{:.3f}', 542 loc=None, loc_units='axes', 543 **kwargs): 544 '''Display bivariate statistics as a table inset on a plot axis 545 546 Parameters 547 ---------- 548 ax : matplotlib.Figure.Axis 549 axis where the table will be displayed 550 variables : list or str, default='common' 551 names of attribute variables to include in summary 552 names are case insensitive 553 The following strings are also accepted in place of a list 554 "all" (displays all variables) 555 "common" (displays all measures of mean difference) 556 fitline_kw : dict, default=None 557 keywords passed to `fitline` 558 floatformat : str, default='{:.3f}' 559 format specifier for floating point values 560 loc : tuple (x0,y0), default=(0.85, 0.05) 561 location on the axis where the table will be drawn 562 can be in data units or axes units [0-1] 563 loc_units : {'axes' (default), 'data'} 564 specifies whether loc has 'data' units or 'axes' units [0-1] 565 566 Returns 567 ------- 568 text1, text2 : matplotlib text object 569 Artist for the two text boxes 570 ''' 571 # List of variables 572 variables = self._expand_variables(variables) 573 574 # Default location in lower right corner 575 if loc is None: 576 loc = (0.8,0.05) 577 578 # Coordinates for loc 579 if loc_units.lower()=='data': 580 coord=ax.transData 581 elif loc_units.lower() in ['axes','axis']: 582 coord=ax.transAxes 583 else: 584 raise ValueError('Display units should be "Data" or "Axes"') 585 586 # Get a dict containing the needed variables 587 summarydict = self.summary_dict( variables, fitline_kw ) 588 589 # Column of label text 590 label_text = '\n'.join([_texify_name(key) for key in summarydict]) 591 # Column of value text 592 value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()]) 593 594 # Check if horizontal alignment keyword is used 595 ha='' 596 try: 597 ha = kwargs['ha'] 598 except KeyError: 599 pass 600 try: 601 ha = kwargs['horizontalalignment'] 602 except KeyError: 603 pass 604 605 # For right alignment, align on values first 606 # Otherwise, align on labels 607 if ha=='right': 608 first_text = value_text 609 second_text = label_text 610 sign = -1 611 else: 612 first_text = label_text 613 second_text = value_text 614 sign = +1 615 616 # Add first column of text 617 t1=ax.text(loc[0],loc[1], 618 first_text, 619 transform=coord, 620 **kwargs 621 ) 622 623 # Get width of first text column 624 bbox = t1.get_window_extent().transformed(coord.inverted()) 625 width = bbox.x1-bbox.x0 626 627 # Add second column of text 628 t2 = ax.text(loc[0]+width*sign,loc[1], 629 second_text, 630 transform=coord, 631 **kwargs 632 ) 633 634 ################################## 635 # Early version of this function using matplotlib.table.table() 636 637 # if isinstance(loc,(tuple,list)): 638 # # Create an inset axis to contain the table 639 # tableaxis = ax.inset_axes(loc) 640 # table_width=1 641 # else: 642 # tableaxis = ax 643 644 # # Display the table on the axis 645 # return mtable.table( 646 # tableaxis, 647 # cellText=[[floatformat.format(value)] for value in summarydict.values()], 648 # rowLabels=[texify_name(key) for key in summarydict], 649 # colWidths=[table_width/2]*2, 650 # edges=edges, 651 # loc=loc, bbox=bbox 652 # ) 653 654 return [t1,t2]
A suite of common statistics to quantify bivariate relationships
Class method 'summary' provides a formatted summary of these statistics
Attributes
- xmean, ymean (float): mean of x and y variables
- xmedian, ymedian (float): median of x and y variables
- xstd, ystd (float): standard deviation of x and y variables
- mean_difference, md (float): ymean - xmean
- mean_absolute_difference, mad (float): mean( |y-x| )
- relative_mean_difference, rmd (float): md / xmean
- relative_mean_absolute_difference, rmad (float): mad / xmean
- standardized_mean_difference, smd (float): md / xstd
- standardized_mean_absolute_difference, smad (float): mad /xstd
- mean_relative_difference, mrd (float): mean(y/x) - 1
- median_difference, medd (float): median(y-x)
- median_absolute_difference, medad (float): median(|y-x|)
- relative_median_difference, rmedd (float): median(y-x) / xmedian
- relative_median_absolute_difference, rmedad (float): median(|y-x|) / xmedian
- median_relative_difference, medianrd, medrd (float): median(y/x)-1
- normalized_mean_bias_factor, nmbf (float):
see
nmbf
- normalized_mean_absolute_error_factor, nmaef (float):
see
nmaef
- root_mean_square_difference, rmsd (float): $\sqrt{ \langle (y - x)^2 \rangle }$
- covariance (float): cov(x,y)
- correlation_pearson, correlation, pearsonr, R, r (float): Pearson linear correlation coefficient
- correlation_spearman, spearmanr (float): Spearman, non-parametric rank correlation coefficient
- R2, r2 (float): Linear coefficient of determination, $R^2$
207 def __init__(self,x,y,w=None,dropna=False,data=None): 208 '''Compute suite of bivariate statistics during initialization 209 210 Statistic values are saved in attributes. 211 CAUTION: Weights w are ignored except in SMA fit 212 213 Parameters 214 ---------- 215 x : ndarray or str 216 independent variable values 217 y : ndarray or str 218 dependent variable values, same size as x 219 w : ndarray or str, optional 220 weights for points (x,y), same size as x and y 221 dropna : bool, optional (default=False) 222 drops NaN values from x, y, and w 223 data : dict-like, optional 224 if x, y, or w are str, then they should be keys in data 225 ''' 226 227 # Get values from data if needed 228 if data is None and (isinstance(x,str) or isinstance(y,str) or isinstance(w,str)): 229 raise ValueError( 'Data argument must be used if x, y, or w is a string') 230 if isinstance(x,str): 231 x = data[x] 232 if isinstance(y,str): 233 y = data[y] 234 if isinstance(w,str): 235 w = data[w] 236 237 #Ensure that x and y have same length 238 if len(x) != len(y): 239 raise ValueError( 'Arguments x and y must have the same length' ) 240 if w is None: 241 w = np.ones_like(x) 242 if len(w) != len(x): 243 raise ValueError( 'Argument w (if present) must have the same length as x' ) 244 245 # Drop NaN values 246 if dropna: 247 isna = np.isnan(x*y*w) 248 x = x[~isna] 249 y = y[~isna] 250 w = w[~isna] 251 252 diff = y - x 253 absdiff = np.abs( y - x ) 254 # Ignore divide by zero and 0/0 while dividing 255 old_settings = np.seterr(divide='ignore',invalid='ignore') 256 ratio = y/x 257 np.seterr(**old_settings) 258 259 # Means, medians, and standard deviations 260 self.xmean = np.mean(x) 261 self.ymean = np.mean(y) 262 self.xmedian = np.median(x) 263 self.ymedian = np.median(y) 264 self.xstd = np.std(x) 265 self.ystd = np.std(y) 266 267 # Save values for use later 268 self._x = x 269 self._y = y 270 self._w = w 271 272 # Mean and mean absolute differences 273 self.mean_difference = self.md = self.ymean - self.xmean 274 self.mean_absolute_difference = self.mad = np.mean( absdiff ) 275 276 # Relative and standardized differences 277 self.relative_mean_difference = self.rmd = self.mean_difference / self.xmean 278 self.relative_mean_absolute_difference = self.rmad = self.mean_absolute_difference / self.xmean 279 self.standardized_mean_difference = self.smd = self.mean_difference / self.xstd 280 self.standardized_mean_absolute_difference = self.smad = self.mean_absolute_difference / self.xstd 281 282 # Mean and median relative differences 283 self.mean_relative_difference = self.mrd = np.mean( ratio - 1 ) 284 self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 ) 285 286 # Median and median absolute differences 287 self.median_difference = self.medd = np.median( diff ) 288 self.median_absolute_difference = self.medad = np.median( absdiff ) 289 290 # Relative median differences 291 self.relative_median_difference = self.rmedd = self.median_difference / self.xmedian 292 self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian 293 294 self.normalized_mean_bias_factor = self.nmbf = nmbf(x,y) 295 self.normalized_mean_absolute_error_factor = self.nmaef = nmaef(x,y) 296 297 # RMS difference 298 self.root_mean_square_difference = self.rmsd = np.sqrt( np.mean( np.power( diff, 2) ) ) 299 300 # Covariance, correlation 301 self.covariance = np.cov(x,y)[0][1] 302 self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \ 303 np.corrcoef(x,y)[0][1] 304 self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic 305 self.R2 = self.r2 = self.R**2
Compute suite of bivariate statistics during initialization
Statistic values are saved in attributes. CAUTION: Weights w are ignored except in SMA fit
Parameters
- x (ndarray or str): independent variable values
- y (ndarray or str): dependent variable values, same size as x
- w (ndarray or str, optional): weights for points (x,y), same size as x and y
- dropna (bool, optional (default=False)): drops NaN values from x, y, and w
- data (dict-like, optional): if x, y, or w are str, then they should be keys in data
311 def fitline(self,method='sma',intercept=True,**kwargs): 312 '''Compute bivariate line fit 313 314 Parameters 315 ---------- 316 method : str 317 line fitting method: sma (default), ols, wls, York, sen, siegel 318 intercept : bool 319 defines whether non-zero intercept should be fitted 320 **kwargs 321 passed to `acgc.stats.sma` (e.g. robust=True) 322 323 Returns 324 ------- 325 result : dict 326 dictionary with keys: 327 - slope (float) 328 slope of fitted line 329 - intercept (float) 330 intercept of fitted line 331 - fittedvalues (array (N,)) 332 values on fit line 333 - residuals (array (N,)) 334 residual from fit line 335 ''' 336 337 if method.lower()=='sma': 338 fit = sma( self._x, 339 self._y, 340 self._w, 341 intercept=intercept, 342 **kwargs) 343 slope = fit['slope'] 344 intercept= fit['intercept'] 345 346 elif method.lower()=='ols': 347 if intercept: 348 ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 349 self._y, rcond=None ) 350 else: 351 ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None ) 352 slope = ols[0][0] 353 intercept = ols[0][1] 354 355 elif method.lower() in ['theil','sen','theilsen']: 356 sen = stats.theilslopes( self._y, 357 self._x ) 358 slope = sen.slope 359 intercept = sen.intercept 360 361 elif method.lower()=='siegel': 362 siegel = stats.siegelslopes( self._x, 363 self._y ) 364 slope = siegel.slope 365 intercept = siegel.intercept 366 367 elif method.lower()=='wls': 368 raise NotImplementedError('WLS regression not implemented yet') 369 370 elif method.lower()=='york': 371 raise NotImplementedError('York regression not implemented yet') 372 373 else: 374 raise ValueError('Undefined method '+method) 375 376 line = dict( slope = slope, 377 intercept = intercept, 378 fittedvalues = slope * self._x + intercept, 379 residuals = self._y - ( slope * self._x + intercept ) ) 380 381 return line
Compute bivariate line fit
Parameters
- method (str): line fitting method: sma (default), ols, wls, York, sen, siegel
- intercept (bool): defines whether non-zero intercept should be fitted
- **kwargs: passed to
acgc.stats.sma
(e.g. robust=True)
Returns
- result (dict):
dictionary with keys:
- slope (float) slope of fitted line
- intercept (float) intercept of fitted line
- fittedvalues (array (N,)) values on fit line
- residuals (array (N,)) residual from fit line
383 def slope(self,method='sma',intercept=True,**kwargs): 384 '''Compute slope of bivariate line fit 385 386 Parameters 387 ---------- 388 method : str 389 line fitting method: sma (default), ols, wls 390 intercept : bool 391 defines whether non-zero intercept should be fitted 392 **kwargs 393 passed to `fitline` 394 395 Returns 396 ------- 397 slope : float 398 value of y intercept 399 ''' 400 return self.fitline(method,intercept,**kwargs)['slope']
Compute slope of bivariate line fit
Parameters
- method (str): line fitting method: sma (default), ols, wls
- intercept (bool): defines whether non-zero intercept should be fitted
- **kwargs: passed to
fitline
Returns
- slope (float): value of y intercept
402 def intercept(self,method='sma',intercept=True,**kwargs): 403 '''Compute intercept of bivariate line fit 404 405 Parameters 406 ---------- 407 method : str 408 line fitting method: sma (default) or ols 409 intercept : bool 410 defines whether non-zero intercept should be fitted 411 **kwargs 412 passed to `fitline` 413 414 Returns 415 ------- 416 intercept : float 417 value of y intercept 418 ''' 419 return self.fitline(method,intercept,**kwargs)['intercept']
Compute intercept of bivariate line fit
Parameters
- method (str): line fitting method: sma (default) or ols
- intercept (bool): defines whether non-zero intercept should be fitted
- **kwargs: passed to
fitline
Returns
- intercept (float): value of y intercept
450 def summary_dict(self, variables=None, fitline_kw=None ): 451 '''Summarize bivariate statistics into a dict 452 453 Parameters 454 ---------- 455 vars : list or str, default='common' 456 names of attribute variables to include in summary 457 names are case insensitive 458 The following strings are also accepted in place of a list 459 "all" (displays all variables) 460 "common" (displays all measures of mean difference) 461 fitline_kw : dict, default=None) 462 keywords passed to self.fitline() 463 464 Returns 465 ------- 466 summary : dict 467 names and values of variables 468 ''' 469 470 # List of variables 471 variables = self._expand_variables(variables) 472 473 if fitline_kw is None: 474 fitline_kw = {'method':'sma', 475 'intercept':True} 476 477 # Construct the dict 478 summary = {} 479 for v in variables: 480 if v in ['slope','intercept']: 481 # These variables are object methods 482 func = getattr(self,v) 483 value = func(**fitline_kw) 484 else: 485 # Retrieve values 486 value = getattr(self,v.lower()) 487 488 # summary += (stringformat+'='+floatformat+'\n').format(v,value) 489 summary[v] = value 490 491 return summary
Summarize bivariate statistics into a dict
Parameters
- vars (list or str, default='common'):
names of attribute variables to include in summary
names are case insensitive
The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference) - fitline_kw (dict, default=None)): keywords passed to self.fitline()
Returns
- summary (dict): names and values of variables
493 def summary(self, variables=None, fitline_kw=None, 494 floatformat='{:.4f}', stringlength=None ): 495 '''Summarize bivariate statistics 496 497 Parameters 498 ---------- 499 vars : list or str, default='common' 500 names of attribute variables to include in summary 501 names are case insensitive 502 The following strings are also accepted in place of a list 503 "all" (displays all variables) 504 "common" (displays all measures of mean difference) 505 floatformat : str, default='{:.4f}' 506 format specifier for floating point values 507 stringlength : int, default=None 508 length of the variables on output 509 default (None) is to use the length of the longest variable name 510 fitline_kw : dict, default=None 511 keywords passed to `fitline` 512 513 Returns 514 ------- 515 summary : str 516 names and values of variables 517 ''' 518 # List of variables 519 variables = self._expand_variables(variables) 520 521 if stringlength is None: 522 stringlength = np.max([len(v) for v in variables]) 523 stringformat = '{:'+str(stringlength)+'s}' 524 525 # Get a dict containing the needed variables 526 summarydict = self.summary_dict( variables, fitline_kw ) 527 528 # Extract length of the float numbers from floatformat 529 # import re 530 # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)", 531 # floatformat )[0] ) ).astype(int) 532 533 # summary = (stringformat+'{:>10s}').format('Variable','Value') 534 summarytext = '' 535 for k,v in summarydict.items(): 536 summarytext += (stringformat+' = '+floatformat+'\n').format(k,v) 537 538 return summarytext
Summarize bivariate statistics
Parameters
- vars (list or str, default='common'):
names of attribute variables to include in summary
names are case insensitive
The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference) - floatformat : str, default='{ (.4f}'): format specifier for floating point values
- stringlength (int, default=None): length of the variables on output default (None) is to use the length of the longest variable name
- fitline_kw (dict, default=None):
keywords passed to
fitline
Returns
- summary (str): names and values of variables
540 def summary_fig_inset(self, ax, variables=None, fitline_kw=None, 541 floatformat='{:.3f}', 542 loc=None, loc_units='axes', 543 **kwargs): 544 '''Display bivariate statistics as a table inset on a plot axis 545 546 Parameters 547 ---------- 548 ax : matplotlib.Figure.Axis 549 axis where the table will be displayed 550 variables : list or str, default='common' 551 names of attribute variables to include in summary 552 names are case insensitive 553 The following strings are also accepted in place of a list 554 "all" (displays all variables) 555 "common" (displays all measures of mean difference) 556 fitline_kw : dict, default=None 557 keywords passed to `fitline` 558 floatformat : str, default='{:.3f}' 559 format specifier for floating point values 560 loc : tuple (x0,y0), default=(0.85, 0.05) 561 location on the axis where the table will be drawn 562 can be in data units or axes units [0-1] 563 loc_units : {'axes' (default), 'data'} 564 specifies whether loc has 'data' units or 'axes' units [0-1] 565 566 Returns 567 ------- 568 text1, text2 : matplotlib text object 569 Artist for the two text boxes 570 ''' 571 # List of variables 572 variables = self._expand_variables(variables) 573 574 # Default location in lower right corner 575 if loc is None: 576 loc = (0.8,0.05) 577 578 # Coordinates for loc 579 if loc_units.lower()=='data': 580 coord=ax.transData 581 elif loc_units.lower() in ['axes','axis']: 582 coord=ax.transAxes 583 else: 584 raise ValueError('Display units should be "Data" or "Axes"') 585 586 # Get a dict containing the needed variables 587 summarydict = self.summary_dict( variables, fitline_kw ) 588 589 # Column of label text 590 label_text = '\n'.join([_texify_name(key) for key in summarydict]) 591 # Column of value text 592 value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()]) 593 594 # Check if horizontal alignment keyword is used 595 ha='' 596 try: 597 ha = kwargs['ha'] 598 except KeyError: 599 pass 600 try: 601 ha = kwargs['horizontalalignment'] 602 except KeyError: 603 pass 604 605 # For right alignment, align on values first 606 # Otherwise, align on labels 607 if ha=='right': 608 first_text = value_text 609 second_text = label_text 610 sign = -1 611 else: 612 first_text = label_text 613 second_text = value_text 614 sign = +1 615 616 # Add first column of text 617 t1=ax.text(loc[0],loc[1], 618 first_text, 619 transform=coord, 620 **kwargs 621 ) 622 623 # Get width of first text column 624 bbox = t1.get_window_extent().transformed(coord.inverted()) 625 width = bbox.x1-bbox.x0 626 627 # Add second column of text 628 t2 = ax.text(loc[0]+width*sign,loc[1], 629 second_text, 630 transform=coord, 631 **kwargs 632 ) 633 634 ################################## 635 # Early version of this function using matplotlib.table.table() 636 637 # if isinstance(loc,(tuple,list)): 638 # # Create an inset axis to contain the table 639 # tableaxis = ax.inset_axes(loc) 640 # table_width=1 641 # else: 642 # tableaxis = ax 643 644 # # Display the table on the axis 645 # return mtable.table( 646 # tableaxis, 647 # cellText=[[floatformat.format(value)] for value in summarydict.values()], 648 # rowLabels=[texify_name(key) for key in summarydict], 649 # colWidths=[table_width/2]*2, 650 # edges=edges, 651 # loc=loc, bbox=bbox 652 # ) 653 654 return [t1,t2]
Display bivariate statistics as a table inset on a plot axis
Parameters
- ax (matplotlib.Figure.Axis): axis where the table will be displayed
- variables (list or str, default='common'):
names of attribute variables to include in summary
names are case insensitive
The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference) - fitline_kw (dict, default=None):
keywords passed to
fitline
- floatformat : str, default='{ (.3f}'): format specifier for floating point values
- loc (tuple (x0,y0), default=(0.85, 0.05)): location on the axis where the table will be drawn can be in data units or axes units [0-1]
- loc_units ({'axes' (default), 'data'}): specifies whether loc has 'data' units or 'axes' units [0-1]
Returns
- text1, text2 (matplotlib text object): Artist for the two text boxes
22def nmb( x0, x1 ): 23 '''Compute Normalized Mean Bias (NMB) 24 25 NMB = ( mean(x1) - mean(x0) ) / mean(x0) 26 27 Parameters 28 ---------- 29 x0 : array_like 30 reference values 31 x1 : array_like 32 experiment values 33 ''' 34 35 assert (len(x0) == len(x1)), \ 36 "Parameters x0 and x1 must have the same length" 37 38 # Mean values 39 x0_mean = np.mean(x0) 40 x1_mean = np.mean(x1) 41 42 # Metric value 43 return x1_mean / x0_mean - 1
Compute Normalized Mean Bias (NMB)
NMB = ( mean(x1) - mean(x0) ) / mean(x0)
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values
45def nmae( x0, x1 ): 46 '''Compute Normalized Mean Absolute Error (NMAE) 47 48 NMAE = mean(abs(x1 - x0)) / abs(mean(x0)) 49 50 Parameters 51 --------- 52 x0 : array_like 53 reference values 54 x1 : array_like 55 experiment values 56 ''' 57 58 # Mean values 59 x0_mean = np.mean(x0) 60 61 # Mean absolute difference 62 abs_diff = np.mean( np.abs(x1 - x0) ) 63 64 # Metric value 65 return abs_diff / np.abs( x0_mean )
Compute Normalized Mean Absolute Error (NMAE)
NMAE = mean(abs(x1 - x0)) / abs(mean(x0))
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values
68def nmbf( x0, x1 ): 69 '''Compute Normalized Mean Bias Factor (NMBF) 70 71 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 72 73 Parameters 74 ---------- 75 x0 : array_like 76 reference values 77 x1 : array_like 78 experiment values 79 ''' 80 81 # Ensure that arguments have the same length 82 assert (len(x0) == len(x1)), \ 83 "Parameters x0 and x1 must have the same length" 84 85 # Mean values 86 x0_mean = np.mean(x0) 87 x1_mean = np.mean(x1) 88 89 # Metric value 90 if x1_mean >= x0_mean: 91 result = x1_mean / x0_mean - 1 92 else: 93 result= 1 - x0_mean / x1_mean 94 # Equivalent (faster?) implementation 95 #S = (mMean - oMean) / np.abs(mMean - oMean) 96 #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 ) 97 98 return result
Compute Normalized Mean Bias Factor (NMBF)
Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values
100def nmaef( x0, x1 ): 101 '''Compute Normalized Mean Absolute Error Factor (NMAEF) 102 103 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 104 105 Parameters 106 ---------- 107 x0 : array_like 108 reference values 109 x1 : array_like 110 experiment values 111 ''' 112 113 # Ensure that arguments have the same length 114 assert (len(x0) == len(x1)), \ 115 "Parameters x0 and x1 must have the same length" 116 117 # Mean values 118 x0_mean = np.mean(x0) 119 x1_mean = np.mean(x1) 120 121 # Mean absolute difference 122 abs_diff = np.mean( np.abs(x1 - x0)) 123 124 # Metric value 125 if x1_mean >= x0_mean: 126 result = abs_diff / x0_mean 127 else: 128 result = abs_diff / x1_mean 129 # Equivalent (faster?) implementation 130 #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean) 131 #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) ) 132 133 return result
Compute Normalized Mean Absolute Error Factor (NMAEF)
Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values