acgc.stats.bivariate
Bivariate statistics
Statistical measures of relationships between two populations
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3""" Bivariate statistics 4 5Statistical measures of relationships between two populations 6""" 7 8import numpy as np 9from scipy import stats 10from .bivariate_lines import sma 11# import xarray as xr 12 13__all__ = [ 14 "BivariateStatistics", 15 "nmb", 16 "nmae", 17 "nmbf", 18 "nmaef" 19] 20 21def nmb( x0, x1 ): 22 '''Compute Normalized Mean Bias (NMB) 23 24 NMB = ( mean(x1) - mean(x0) ) / mean(x0) 25 26 Parameters 27 ---------- 28 x0 : array_like 29 reference values 30 x1 : array_like 31 experiment values 32 ''' 33 34 assert (len(x0) == len(x1)), \ 35 "Parameters x0 and x1 must have the same length" 36 37 # Mean values 38 x0_mean = np.mean(x0) 39 x1_mean = np.mean(x1) 40 41 # Metric value 42 return x1_mean / x0_mean - 1 43 44def nmae( x0, x1 ): 45 '''Compute Normalized Mean Absolute Error (NMAE) 46 47 NMAE = mean(abs(x1 - x0)) / abs(mean(x0)) 48 49 Parameters 50 --------- 51 x0 : array_like 52 reference values 53 x1 : array_like 54 experiment values 55 ''' 56 57 # Mean values 58 x0_mean = np.mean(x0) 59 60 # Mean absolute difference 61 abs_diff = np.mean( np.abs(x1 - x0) ) 62 63 # Metric value 64 return abs_diff / np.abs( x0_mean ) 65 66 67def nmbf( x0, x1 ): 68 '''Compute Normalized Mean Bias Factor (NMBF) 69 70 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 71 72 Parameters 73 ---------- 74 x0 : array_like 75 reference values 76 x1 : array_like 77 experiment values 78 ''' 79 80 # Ensure that arguments have the same length 81 assert (len(x0) == len(x1)), \ 82 "Parameters x0 and x1 must have the same length" 83 84 # Mean values 85 x0_mean = np.mean(x0) 86 x1_mean = np.mean(x1) 87 88 # Metric value 89 if x1_mean >= x0_mean: 90 result = x1_mean / x0_mean - 1 91 else: 92 result= 1 - x0_mean / x1_mean 93 # Equivalent (faster?) implementation 94 #S = (mMean - oMean) / np.abs(mMean - oMean) 95 #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 ) 96 97 return result 98 99def nmaef( x0, x1 ): 100 '''Compute Normalized Mean Absolute Error Factor (NMAEF) 101 102 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 103 104 Parameters 105 ---------- 106 x0 : array_like 107 reference values 108 x1 : array_like 109 experiment values 110 ''' 111 112 # Ensure that arguments have the same length 113 assert (len(x0) == len(x1)), \ 114 "Parameters x0 and x1 must have the same length" 115 116 # Mean values 117 x0_mean = np.mean(x0) 118 x1_mean = np.mean(x1) 119 120 # Mean absolute difference 121 abs_diff = np.mean( np.abs(x1 - x0)) 122 123 # Metric value 124 if x1_mean >= x0_mean: 125 result = abs_diff / x0_mean 126 else: 127 result = abs_diff / x1_mean 128 # Equivalent (faster?) implementation 129 #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean) 130 #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) ) 131 132 return result 133 134def _texify_name(name): 135 '''Return a LaTex formatted string for some variables 136 137 Parameter 138 --------- 139 name : str 140 141 Returns 142 ------- 143 pretty_name : str 144 ''' 145 if name=='R2': 146 pretty_name = f'$R^2$' 147 elif name=='r2': 148 pretty_name = f'$r^2$' 149 else: 150 pretty_name = name 151 return pretty_name 152 153class BivariateStatistics: 154 '''A suite of common statistics to quantify bivariate relationships 155 156 Class method 'summary' provides a formatted summary of these statistics 157 158 Attributes 159 ---------- 160 xmean, ymean : float 161 mean of x and y variables 162 xmedian, ymedian :float 163 median of x and y variables 164 xstd, ystd : float 165 standard deviation of x and y variables 166 mean_difference, md : float 167 ymean - xmean 168 mean_absolute_difference, mad : float 169 mean( |y-x| ) 170 relative_mean_difference, rmd : float 171 md / xmean 172 relative_mean_absolute_difference, rmad :float 173 mad / xmean 174 standardized_mean_difference, smd : float 175 md / xstd 176 standardized_mean_absolute_difference, smad : float 177 mad /xstd 178 mean_relative_difference, mrd : float 179 mean(y/x) - 1 180 median_difference, medd : float 181 median(y-x) 182 median_absolute_difference, medad : float 183 median(|y-x|) 184 relative_median_difference, rmedd : float 185 median(y-x) / xmedian 186 relative_median_absolute_difference, rmedad : float 187 median(|y-x|) / xmedian 188 median_relative_difference, medianrd, medrd : float 189 median(y/x)-1 190 normalized_mean_bias_factor, nmbf : float 191 see `nmbf` 192 normalized_mean_absolute_error_factor, nmaef : float 193 see `nmaef` 194 root_mean_square_difference, rmsd : float 195 $\\sqrt{ \\langle (y - x)^2 \\rangle }$ 196 covariance : float 197 cov(x,y) 198 correlation_pearson, correlation, pearsonr, R, r : float 199 Pearson linear correlation coefficient 200 correlation_spearman, spearmanr : float 201 Spearman, non-parametric rank correlation coefficient 202 R2, r2 : float 203 Linear coefficient of determination, $R^2$ 204 ''' 205 206 def __init__(self,x,y,w=None): 207 '''Compute suite of bivariate statistics during initialization 208 209 Statistic values are save in attributes. 210 CAUTION: Weights w are ignored except in SMA fit 211 212 Parameters 213 ---------- 214 x : ndarray 215 independent variable values 216 y : ndarray 217 dependent variable values, same size as x 218 w : ndarray, optional 219 weights for points (x,y), same size as x and y 220 ''' 221 222 #Ensure that x and y have same length 223 if len(x) != len(y): 224 raise ValueError( 'Arguments x and y must have the same length' ) 225 if (w is not None) and (len(w) != len(x)): 226 raise ValueError( 'Argument w (if present) must have the same length as x' ) 227 228 diff = y - x 229 absdiff = np.abs( y - x ) 230 ratio = y/x 231 232 # Means, medians, and standard deviations 233 self.xmean = np.mean(x) 234 self.ymean = np.mean(y) 235 self.xmedian = np.median(x) 236 self.ymedian = np.median(y) 237 self.xstd = np.std(x) 238 self.ystd = np.std(y) 239 240 self._x = x 241 self._y = y 242 self._w = w 243 244 # Mean and mean absolute differences 245 self.mean_difference = self.md = self.ymean - self.xmean 246 self.mean_absolute_difference = self.mad = np.mean( absdiff ) 247 248 # Relative and standardized differences 249 self.relative_mean_difference = self.rmd = self.mean_difference / self.xmean 250 self.relative_mean_absolute_difference = self.rmad = self.mean_absolute_difference / self.xmean 251 self.standardized_mean_difference = self.smd = self.mean_difference / self.xstd 252 self.standardized_mean_absolute_difference = self.smad = self.mean_absolute_difference / self.xstd 253 254 # Mean and median relative differences 255 self.mean_relative_difference = self.mrd = np.mean( ratio - 1 ) 256 self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 ) 257 258 # Median and median absolute differences 259 self.median_difference = self.medd = np.median( diff ) 260 self.median_absolute_difference = self.medad = np.median( absdiff ) 261 262 # Relative median differences 263 self.relative_median_difference = self.rmedd = self.median_difference / self.xmedian 264 self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian 265 266 self.normalized_mean_bias_factor = self.nmbf = nmbf(x,y) 267 self.normalized_mean_absolute_error_factor = self.nmaef = nmaef(x,y) 268 269 # RMS difference 270 self.root_mean_square_difference = self.rmsd = np.sqrt( np.mean( np.power( diff, 2) ) ) 271 272 # Covariance, correlation 273 self.covariance = np.cov(x,y)[0][1] 274 self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \ 275 np.corrcoef(x,y)[0][1] 276 self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic 277 self.R2 = self.r2 = self.R**2 278 279 def __getitem__(self,key): 280 '''Accesses attribute values via object['key']''' 281 return getattr(self,key) 282 283 def fitline(self,method='sma',intercept=True,**kwargs): 284 '''Compute bivariate line fit 285 286 Parameters 287 ---------- 288 method : str 289 line fitting method: sma (default), ols, wls, York, sen, siegel 290 intercept : bool 291 defines whether non-zero intercept should be fitted 292 **kwargs 293 passed to `acgc.stats.sma` (e.g. robust=True) 294 295 Returns 296 ------- 297 result : dict 298 dictionary with keys: 299 - slope (float) 300 slope of fitted line 301 - intercept (float) 302 intercept of fitted line 303 - fittedvalues (array (N,)) 304 values on fit line 305 - residuals (array (N,)) 306 residual from fit line 307 ''' 308 309 if method.lower()=='sma': 310 fit = sma( self._x, 311 self._y, 312 self._w, 313 intercept=intercept, 314 **kwargs) 315 slope = fit['slope'] 316 intercept= fit['intercept'] 317 318 elif method.lower()=='ols': 319 if intercept: 320 ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 321 self._y, rcond=None ) 322 else: 323 ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None ) 324 slope = ols[0][0] 325 intercept = ols[0][1] 326 327 elif method.lower() in ['theil','sen','theilsen']: 328 sen = stats.theilslopes( self._y, 329 self._x ) 330 slope = sen.slope 331 intercept = sen.intercept 332 333 elif method.lower()=='siegel': 334 siegel = stats.siegelslopes( self._x, 335 self._y ) 336 slope = siegel.slope 337 intercept = siegel.intercept 338 339 elif method.lower()=='wls': 340 raise NotImplementedError('WLS regression not implemented yet') 341 342 elif method.lower()=='york': 343 raise NotImplementedError('York regression not implemented yet') 344 345 else: 346 raise ValueError('Undefined method '+method) 347 348 line = dict( slope = slope, 349 intercept = intercept, 350 fittedvalues = slope * self._x + intercept, 351 residuals = self._y - ( slope * self._x + intercept ) ) 352 353 return line 354 355 def slope(self,method='sma',intercept=True,**kwargs): 356 '''Compute slope of bivariate line fit 357 358 Parameters 359 ---------- 360 method : str 361 line fitting method: sma (default), ols, wls 362 intercept : bool 363 defines whether non-zero intercept should be fitted 364 **kwargs 365 passed to `fitline` 366 367 Returns 368 ------- 369 slope : float 370 value of y intercept 371 ''' 372 return self.fitline(method,intercept,**kwargs)['slope'] 373 374 def intercept(self,method='sma',intercept=True,**kwargs): 375 '''Compute intercept of bivariate line fit 376 377 Parameters 378 ---------- 379 method : str 380 line fitting method: sma (default) or ols 381 intercept : bool 382 defines whether non-zero intercept should be fitted 383 **kwargs 384 passed to `fitline` 385 386 Returns 387 ------- 388 intercept : float 389 value of y intercept 390 ''' 391 return self.fitline(method,intercept,**kwargs)['intercept'] 392 393 def _expand_variables(self,variables): 394 '''Expand special strings into a list of variables 395 396 Parameter 397 --------- 398 variables : list or str, default='common' 399 Special strings ("all","common") will be expanded to a list of variables 400 list arguments will not be modified 401 402 Returns 403 ------- 404 list 405 variable names 406 ''' 407 if variables is None: 408 variables='common' 409 if variables=='all': 410 variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD', 411 'MedD','MedAD','RMedD','RMedAD','MedRD', 412 'NMBF','NMAEF','RMSD', 413 'R','R2','spearmanr','slope','intercept'] 414 elif variables=='common': 415 variables=['MD','MAD','RMD','RMAD','MRD','R2','slope'] 416 if not isinstance(variables,list): 417 raise ValueError( 418 'variables must be a list, None, or one of these strings: "all","common"') 419 420 return variables 421 422 def summary_dict(self, variables=None, fitline_kw=None ): 423 '''Summarize bivariate statistics into a dict 424 425 Parameters 426 ---------- 427 vars : list or str, default='common' 428 names of attribute variables to include in summary 429 names are case insensitive 430 The following strings are also accepted in place of a list 431 "all" (displays all variables) 432 "common" (displays all measures of mean difference) 433 fitline_kw : dict, default=None) 434 keywords passed to self.fitline() 435 436 Returns 437 ------- 438 summary : dict 439 names and values of variables 440 ''' 441 442 # List of variables 443 variables = self._expand_variables(variables) 444 445 if fitline_kw is None: 446 fitline_kw = {'method':'sma', 447 'intercept':True} 448 449 # Construct the dict 450 summary = {} 451 for v in variables: 452 if v in ['slope','intercept']: 453 # These variables are object methods 454 func = getattr(self,v) 455 value = func(**fitline_kw) 456 else: 457 # Retrieve values 458 value = getattr(self,v.lower()) 459 460 # summary += (stringformat+'='+floatformat+'\n').format(v,value) 461 summary[v] = value 462 463 return summary 464 465 def summary(self, variables=None, fitline_kw=None, 466 floatformat='{:.4f}', stringlength=None ): 467 '''Summarize bivariate statistics 468 469 Parameters 470 ---------- 471 vars : list or str, default='common' 472 names of attribute variables to include in summary 473 names are case insensitive 474 The following strings are also accepted in place of a list 475 "all" (displays all variables) 476 "common" (displays all measures of mean difference) 477 floatformat : str, default='{:.4f}' 478 format specifier for floating point values 479 stringlength : int, default=None 480 length of the variables on output 481 default (None) is to use the length of the longest variable name 482 fitline_kw : dict, default=None 483 keywords passed to `fitline` 484 485 Returns 486 ------- 487 summary : str 488 names and values of variables 489 ''' 490 # List of variables 491 variables = self._expand_variables(variables) 492 493 if stringlength is None: 494 stringlength = np.max([len(v) for v in variables]) 495 stringformat = '{:'+str(stringlength)+'s}' 496 497 # Get a dict containing the needed variables 498 summarydict = self.summary_dict( variables, fitline_kw ) 499 500 # Extract length of the float numbers from floatformat 501 # import re 502 # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)", 503 # floatformat )[0] ) ).astype(int) 504 505 # summary = (stringformat+'{:>10s}').format('Variable','Value') 506 summarytext = '' 507 for k,v in summarydict.items(): 508 summarytext += (stringformat+' = '+floatformat+'\n').format(k,v) 509 510 return summarytext 511 512 def summary_fig_table(self, ax, variables=None, fitline_kw=None, 513 floatformat='{:.3f}', 514 loc=None, loc_units='axes', 515 **kwargs): 516 '''Display bivariate statistics as a table on a plot axis 517 518 Parameters 519 ---------- 520 ax : matplotlib.Figure.Axis 521 axis where the table will be displayed 522 variables : list or str, default='common' 523 names of attribute variables to include in summary 524 names are case insensitive 525 The following strings are also accepted in place of a list 526 "all" (displays all variables) 527 "common" (displays all measures of mean difference) 528 fitline_kw : dict, default=None 529 keywords passed to `fitline` 530 floatformat : str, default='{:.3f}' 531 format specifier for floating point values 532 loc : tuple (x0,y0), default=(0.85, 0.05) 533 location on the axis where the table will be drawn 534 can be in data units or axes units [0-1] 535 loc_units : {'axes' (default), 'data'} 536 specifies whether loc has 'data' units or 'axes' units [0-1] 537 538 Returns 539 ------- 540 text1, text2 : matplotlib text object 541 Artist for the two text boxes 542 ''' 543 # List of variables 544 variables = self._expand_variables(variables) 545 546 # Default location in lower right corner 547 if loc is None: 548 loc = (0.8,0.05) 549 550 # Coordinates for loc 551 if loc_units.lower()=='data': 552 coord=ax.transData 553 elif loc_units.lower() in ['axes','axis']: 554 coord=ax.transAxes 555 else: 556 raise ValueError('Display units should be "Data" or "Axes"') 557 558 # Get a dict containing the needed variables 559 summarydict = self.summary_dict( variables, fitline_kw ) 560 561 # Column of label text 562 label_text = '\n'.join([_texify_name(key) for key in summarydict]) 563 # Column of value text 564 value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()]) 565 566 # Check if horizontal alignment keyword is used 567 ha='' 568 try: 569 ha = kwargs['ha'] 570 except KeyError: 571 pass 572 try: 573 ha = kwargs['horizontalalignment'] 574 except KeyError: 575 pass 576 577 # For right alignment, align on values first 578 # Otherwise, align on labels 579 if ha=='right': 580 first_text = value_text 581 second_text = label_text 582 sign = -1 583 else: 584 first_text = label_text 585 second_text = value_text 586 sign = +1 587 588 # Add first column of text 589 t1=ax.text(loc[0],loc[1], 590 first_text, 591 transform=coord, 592 **kwargs 593 ) 594 595 # Get width of first text column 596 bbox = t1.get_window_extent().transformed(coord.inverted()) 597 width = bbox.x1-bbox.x0 598 599 # Add second column of text 600 t2 = ax.text(loc[0]+width*sign,loc[1], 601 second_text, 602 transform=coord, 603 **kwargs 604 ) 605 606 ################################## 607 # Early version of this function using matplotlib.table.table() 608 609 # if isinstance(loc,(tuple,list)): 610 # # Create an inset axis to contain the table 611 # tableaxis = ax.inset_axes(loc) 612 # table_width=1 613 # else: 614 # tableaxis = ax 615 616 # # Display the table on the axis 617 # return mtable.table( 618 # tableaxis, 619 # cellText=[[floatformat.format(value)] for value in summarydict.values()], 620 # rowLabels=[texify_name(key) for key in summarydict], 621 # colWidths=[table_width/2]*2, 622 # edges=edges, 623 # loc=loc, bbox=bbox 624 # ) 625 626 return [t1,t2]
154class BivariateStatistics: 155 '''A suite of common statistics to quantify bivariate relationships 156 157 Class method 'summary' provides a formatted summary of these statistics 158 159 Attributes 160 ---------- 161 xmean, ymean : float 162 mean of x and y variables 163 xmedian, ymedian :float 164 median of x and y variables 165 xstd, ystd : float 166 standard deviation of x and y variables 167 mean_difference, md : float 168 ymean - xmean 169 mean_absolute_difference, mad : float 170 mean( |y-x| ) 171 relative_mean_difference, rmd : float 172 md / xmean 173 relative_mean_absolute_difference, rmad :float 174 mad / xmean 175 standardized_mean_difference, smd : float 176 md / xstd 177 standardized_mean_absolute_difference, smad : float 178 mad /xstd 179 mean_relative_difference, mrd : float 180 mean(y/x) - 1 181 median_difference, medd : float 182 median(y-x) 183 median_absolute_difference, medad : float 184 median(|y-x|) 185 relative_median_difference, rmedd : float 186 median(y-x) / xmedian 187 relative_median_absolute_difference, rmedad : float 188 median(|y-x|) / xmedian 189 median_relative_difference, medianrd, medrd : float 190 median(y/x)-1 191 normalized_mean_bias_factor, nmbf : float 192 see `nmbf` 193 normalized_mean_absolute_error_factor, nmaef : float 194 see `nmaef` 195 root_mean_square_difference, rmsd : float 196 $\\sqrt{ \\langle (y - x)^2 \\rangle }$ 197 covariance : float 198 cov(x,y) 199 correlation_pearson, correlation, pearsonr, R, r : float 200 Pearson linear correlation coefficient 201 correlation_spearman, spearmanr : float 202 Spearman, non-parametric rank correlation coefficient 203 R2, r2 : float 204 Linear coefficient of determination, $R^2$ 205 ''' 206 207 def __init__(self,x,y,w=None): 208 '''Compute suite of bivariate statistics during initialization 209 210 Statistic values are save in attributes. 211 CAUTION: Weights w are ignored except in SMA fit 212 213 Parameters 214 ---------- 215 x : ndarray 216 independent variable values 217 y : ndarray 218 dependent variable values, same size as x 219 w : ndarray, optional 220 weights for points (x,y), same size as x and y 221 ''' 222 223 #Ensure that x and y have same length 224 if len(x) != len(y): 225 raise ValueError( 'Arguments x and y must have the same length' ) 226 if (w is not None) and (len(w) != len(x)): 227 raise ValueError( 'Argument w (if present) must have the same length as x' ) 228 229 diff = y - x 230 absdiff = np.abs( y - x ) 231 ratio = y/x 232 233 # Means, medians, and standard deviations 234 self.xmean = np.mean(x) 235 self.ymean = np.mean(y) 236 self.xmedian = np.median(x) 237 self.ymedian = np.median(y) 238 self.xstd = np.std(x) 239 self.ystd = np.std(y) 240 241 self._x = x 242 self._y = y 243 self._w = w 244 245 # Mean and mean absolute differences 246 self.mean_difference = self.md = self.ymean - self.xmean 247 self.mean_absolute_difference = self.mad = np.mean( absdiff ) 248 249 # Relative and standardized differences 250 self.relative_mean_difference = self.rmd = self.mean_difference / self.xmean 251 self.relative_mean_absolute_difference = self.rmad = self.mean_absolute_difference / self.xmean 252 self.standardized_mean_difference = self.smd = self.mean_difference / self.xstd 253 self.standardized_mean_absolute_difference = self.smad = self.mean_absolute_difference / self.xstd 254 255 # Mean and median relative differences 256 self.mean_relative_difference = self.mrd = np.mean( ratio - 1 ) 257 self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 ) 258 259 # Median and median absolute differences 260 self.median_difference = self.medd = np.median( diff ) 261 self.median_absolute_difference = self.medad = np.median( absdiff ) 262 263 # Relative median differences 264 self.relative_median_difference = self.rmedd = self.median_difference / self.xmedian 265 self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian 266 267 self.normalized_mean_bias_factor = self.nmbf = nmbf(x,y) 268 self.normalized_mean_absolute_error_factor = self.nmaef = nmaef(x,y) 269 270 # RMS difference 271 self.root_mean_square_difference = self.rmsd = np.sqrt( np.mean( np.power( diff, 2) ) ) 272 273 # Covariance, correlation 274 self.covariance = np.cov(x,y)[0][1] 275 self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \ 276 np.corrcoef(x,y)[0][1] 277 self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic 278 self.R2 = self.r2 = self.R**2 279 280 def __getitem__(self,key): 281 '''Accesses attribute values via object['key']''' 282 return getattr(self,key) 283 284 def fitline(self,method='sma',intercept=True,**kwargs): 285 '''Compute bivariate line fit 286 287 Parameters 288 ---------- 289 method : str 290 line fitting method: sma (default), ols, wls, York, sen, siegel 291 intercept : bool 292 defines whether non-zero intercept should be fitted 293 **kwargs 294 passed to `acgc.stats.sma` (e.g. robust=True) 295 296 Returns 297 ------- 298 result : dict 299 dictionary with keys: 300 - slope (float) 301 slope of fitted line 302 - intercept (float) 303 intercept of fitted line 304 - fittedvalues (array (N,)) 305 values on fit line 306 - residuals (array (N,)) 307 residual from fit line 308 ''' 309 310 if method.lower()=='sma': 311 fit = sma( self._x, 312 self._y, 313 self._w, 314 intercept=intercept, 315 **kwargs) 316 slope = fit['slope'] 317 intercept= fit['intercept'] 318 319 elif method.lower()=='ols': 320 if intercept: 321 ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 322 self._y, rcond=None ) 323 else: 324 ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None ) 325 slope = ols[0][0] 326 intercept = ols[0][1] 327 328 elif method.lower() in ['theil','sen','theilsen']: 329 sen = stats.theilslopes( self._y, 330 self._x ) 331 slope = sen.slope 332 intercept = sen.intercept 333 334 elif method.lower()=='siegel': 335 siegel = stats.siegelslopes( self._x, 336 self._y ) 337 slope = siegel.slope 338 intercept = siegel.intercept 339 340 elif method.lower()=='wls': 341 raise NotImplementedError('WLS regression not implemented yet') 342 343 elif method.lower()=='york': 344 raise NotImplementedError('York regression not implemented yet') 345 346 else: 347 raise ValueError('Undefined method '+method) 348 349 line = dict( slope = slope, 350 intercept = intercept, 351 fittedvalues = slope * self._x + intercept, 352 residuals = self._y - ( slope * self._x + intercept ) ) 353 354 return line 355 356 def slope(self,method='sma',intercept=True,**kwargs): 357 '''Compute slope of bivariate line fit 358 359 Parameters 360 ---------- 361 method : str 362 line fitting method: sma (default), ols, wls 363 intercept : bool 364 defines whether non-zero intercept should be fitted 365 **kwargs 366 passed to `fitline` 367 368 Returns 369 ------- 370 slope : float 371 value of y intercept 372 ''' 373 return self.fitline(method,intercept,**kwargs)['slope'] 374 375 def intercept(self,method='sma',intercept=True,**kwargs): 376 '''Compute intercept of bivariate line fit 377 378 Parameters 379 ---------- 380 method : str 381 line fitting method: sma (default) or ols 382 intercept : bool 383 defines whether non-zero intercept should be fitted 384 **kwargs 385 passed to `fitline` 386 387 Returns 388 ------- 389 intercept : float 390 value of y intercept 391 ''' 392 return self.fitline(method,intercept,**kwargs)['intercept'] 393 394 def _expand_variables(self,variables): 395 '''Expand special strings into a list of variables 396 397 Parameter 398 --------- 399 variables : list or str, default='common' 400 Special strings ("all","common") will be expanded to a list of variables 401 list arguments will not be modified 402 403 Returns 404 ------- 405 list 406 variable names 407 ''' 408 if variables is None: 409 variables='common' 410 if variables=='all': 411 variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD', 412 'MedD','MedAD','RMedD','RMedAD','MedRD', 413 'NMBF','NMAEF','RMSD', 414 'R','R2','spearmanr','slope','intercept'] 415 elif variables=='common': 416 variables=['MD','MAD','RMD','RMAD','MRD','R2','slope'] 417 if not isinstance(variables,list): 418 raise ValueError( 419 'variables must be a list, None, or one of these strings: "all","common"') 420 421 return variables 422 423 def summary_dict(self, variables=None, fitline_kw=None ): 424 '''Summarize bivariate statistics into a dict 425 426 Parameters 427 ---------- 428 vars : list or str, default='common' 429 names of attribute variables to include in summary 430 names are case insensitive 431 The following strings are also accepted in place of a list 432 "all" (displays all variables) 433 "common" (displays all measures of mean difference) 434 fitline_kw : dict, default=None) 435 keywords passed to self.fitline() 436 437 Returns 438 ------- 439 summary : dict 440 names and values of variables 441 ''' 442 443 # List of variables 444 variables = self._expand_variables(variables) 445 446 if fitline_kw is None: 447 fitline_kw = {'method':'sma', 448 'intercept':True} 449 450 # Construct the dict 451 summary = {} 452 for v in variables: 453 if v in ['slope','intercept']: 454 # These variables are object methods 455 func = getattr(self,v) 456 value = func(**fitline_kw) 457 else: 458 # Retrieve values 459 value = getattr(self,v.lower()) 460 461 # summary += (stringformat+'='+floatformat+'\n').format(v,value) 462 summary[v] = value 463 464 return summary 465 466 def summary(self, variables=None, fitline_kw=None, 467 floatformat='{:.4f}', stringlength=None ): 468 '''Summarize bivariate statistics 469 470 Parameters 471 ---------- 472 vars : list or str, default='common' 473 names of attribute variables to include in summary 474 names are case insensitive 475 The following strings are also accepted in place of a list 476 "all" (displays all variables) 477 "common" (displays all measures of mean difference) 478 floatformat : str, default='{:.4f}' 479 format specifier for floating point values 480 stringlength : int, default=None 481 length of the variables on output 482 default (None) is to use the length of the longest variable name 483 fitline_kw : dict, default=None 484 keywords passed to `fitline` 485 486 Returns 487 ------- 488 summary : str 489 names and values of variables 490 ''' 491 # List of variables 492 variables = self._expand_variables(variables) 493 494 if stringlength is None: 495 stringlength = np.max([len(v) for v in variables]) 496 stringformat = '{:'+str(stringlength)+'s}' 497 498 # Get a dict containing the needed variables 499 summarydict = self.summary_dict( variables, fitline_kw ) 500 501 # Extract length of the float numbers from floatformat 502 # import re 503 # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)", 504 # floatformat )[0] ) ).astype(int) 505 506 # summary = (stringformat+'{:>10s}').format('Variable','Value') 507 summarytext = '' 508 for k,v in summarydict.items(): 509 summarytext += (stringformat+' = '+floatformat+'\n').format(k,v) 510 511 return summarytext 512 513 def summary_fig_table(self, ax, variables=None, fitline_kw=None, 514 floatformat='{:.3f}', 515 loc=None, loc_units='axes', 516 **kwargs): 517 '''Display bivariate statistics as a table on a plot axis 518 519 Parameters 520 ---------- 521 ax : matplotlib.Figure.Axis 522 axis where the table will be displayed 523 variables : list or str, default='common' 524 names of attribute variables to include in summary 525 names are case insensitive 526 The following strings are also accepted in place of a list 527 "all" (displays all variables) 528 "common" (displays all measures of mean difference) 529 fitline_kw : dict, default=None 530 keywords passed to `fitline` 531 floatformat : str, default='{:.3f}' 532 format specifier for floating point values 533 loc : tuple (x0,y0), default=(0.85, 0.05) 534 location on the axis where the table will be drawn 535 can be in data units or axes units [0-1] 536 loc_units : {'axes' (default), 'data'} 537 specifies whether loc has 'data' units or 'axes' units [0-1] 538 539 Returns 540 ------- 541 text1, text2 : matplotlib text object 542 Artist for the two text boxes 543 ''' 544 # List of variables 545 variables = self._expand_variables(variables) 546 547 # Default location in lower right corner 548 if loc is None: 549 loc = (0.8,0.05) 550 551 # Coordinates for loc 552 if loc_units.lower()=='data': 553 coord=ax.transData 554 elif loc_units.lower() in ['axes','axis']: 555 coord=ax.transAxes 556 else: 557 raise ValueError('Display units should be "Data" or "Axes"') 558 559 # Get a dict containing the needed variables 560 summarydict = self.summary_dict( variables, fitline_kw ) 561 562 # Column of label text 563 label_text = '\n'.join([_texify_name(key) for key in summarydict]) 564 # Column of value text 565 value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()]) 566 567 # Check if horizontal alignment keyword is used 568 ha='' 569 try: 570 ha = kwargs['ha'] 571 except KeyError: 572 pass 573 try: 574 ha = kwargs['horizontalalignment'] 575 except KeyError: 576 pass 577 578 # For right alignment, align on values first 579 # Otherwise, align on labels 580 if ha=='right': 581 first_text = value_text 582 second_text = label_text 583 sign = -1 584 else: 585 first_text = label_text 586 second_text = value_text 587 sign = +1 588 589 # Add first column of text 590 t1=ax.text(loc[0],loc[1], 591 first_text, 592 transform=coord, 593 **kwargs 594 ) 595 596 # Get width of first text column 597 bbox = t1.get_window_extent().transformed(coord.inverted()) 598 width = bbox.x1-bbox.x0 599 600 # Add second column of text 601 t2 = ax.text(loc[0]+width*sign,loc[1], 602 second_text, 603 transform=coord, 604 **kwargs 605 ) 606 607 ################################## 608 # Early version of this function using matplotlib.table.table() 609 610 # if isinstance(loc,(tuple,list)): 611 # # Create an inset axis to contain the table 612 # tableaxis = ax.inset_axes(loc) 613 # table_width=1 614 # else: 615 # tableaxis = ax 616 617 # # Display the table on the axis 618 # return mtable.table( 619 # tableaxis, 620 # cellText=[[floatformat.format(value)] for value in summarydict.values()], 621 # rowLabels=[texify_name(key) for key in summarydict], 622 # colWidths=[table_width/2]*2, 623 # edges=edges, 624 # loc=loc, bbox=bbox 625 # ) 626 627 return [t1,t2]
A suite of common statistics to quantify bivariate relationships
Class method 'summary' provides a formatted summary of these statistics
Attributes
- xmean, ymean (float): mean of x and y variables
- xmedian, ymedian (float): median of x and y variables
- xstd, ystd (float): standard deviation of x and y variables
- mean_difference, md (float): ymean - xmean
- mean_absolute_difference, mad (float): mean( |y-x| )
- relative_mean_difference, rmd (float): md / xmean
- relative_mean_absolute_difference, rmad (float): mad / xmean
- standardized_mean_difference, smd (float): md / xstd
- standardized_mean_absolute_difference, smad (float): mad /xstd
- mean_relative_difference, mrd (float): mean(y/x) - 1
- median_difference, medd (float): median(y-x)
- median_absolute_difference, medad (float): median(|y-x|)
- relative_median_difference, rmedd (float): median(y-x) / xmedian
- relative_median_absolute_difference, rmedad (float): median(|y-x|) / xmedian
- median_relative_difference, medianrd, medrd (float): median(y/x)-1
- normalized_mean_bias_factor, nmbf (float):
see
nmbf
- normalized_mean_absolute_error_factor, nmaef (float):
see
nmaef
- root_mean_square_difference, rmsd (float): $\sqrt{ \langle (y - x)^2 \rangle }$
- covariance (float): cov(x,y)
- correlation_pearson, correlation, pearsonr, R, r (float): Pearson linear correlation coefficient
- correlation_spearman, spearmanr (float): Spearman, non-parametric rank correlation coefficient
- R2, r2 (float): Linear coefficient of determination, $R^2$
207 def __init__(self,x,y,w=None): 208 '''Compute suite of bivariate statistics during initialization 209 210 Statistic values are save in attributes. 211 CAUTION: Weights w are ignored except in SMA fit 212 213 Parameters 214 ---------- 215 x : ndarray 216 independent variable values 217 y : ndarray 218 dependent variable values, same size as x 219 w : ndarray, optional 220 weights for points (x,y), same size as x and y 221 ''' 222 223 #Ensure that x and y have same length 224 if len(x) != len(y): 225 raise ValueError( 'Arguments x and y must have the same length' ) 226 if (w is not None) and (len(w) != len(x)): 227 raise ValueError( 'Argument w (if present) must have the same length as x' ) 228 229 diff = y - x 230 absdiff = np.abs( y - x ) 231 ratio = y/x 232 233 # Means, medians, and standard deviations 234 self.xmean = np.mean(x) 235 self.ymean = np.mean(y) 236 self.xmedian = np.median(x) 237 self.ymedian = np.median(y) 238 self.xstd = np.std(x) 239 self.ystd = np.std(y) 240 241 self._x = x 242 self._y = y 243 self._w = w 244 245 # Mean and mean absolute differences 246 self.mean_difference = self.md = self.ymean - self.xmean 247 self.mean_absolute_difference = self.mad = np.mean( absdiff ) 248 249 # Relative and standardized differences 250 self.relative_mean_difference = self.rmd = self.mean_difference / self.xmean 251 self.relative_mean_absolute_difference = self.rmad = self.mean_absolute_difference / self.xmean 252 self.standardized_mean_difference = self.smd = self.mean_difference / self.xstd 253 self.standardized_mean_absolute_difference = self.smad = self.mean_absolute_difference / self.xstd 254 255 # Mean and median relative differences 256 self.mean_relative_difference = self.mrd = np.mean( ratio - 1 ) 257 self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 ) 258 259 # Median and median absolute differences 260 self.median_difference = self.medd = np.median( diff ) 261 self.median_absolute_difference = self.medad = np.median( absdiff ) 262 263 # Relative median differences 264 self.relative_median_difference = self.rmedd = self.median_difference / self.xmedian 265 self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian 266 267 self.normalized_mean_bias_factor = self.nmbf = nmbf(x,y) 268 self.normalized_mean_absolute_error_factor = self.nmaef = nmaef(x,y) 269 270 # RMS difference 271 self.root_mean_square_difference = self.rmsd = np.sqrt( np.mean( np.power( diff, 2) ) ) 272 273 # Covariance, correlation 274 self.covariance = np.cov(x,y)[0][1] 275 self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \ 276 np.corrcoef(x,y)[0][1] 277 self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic 278 self.R2 = self.r2 = self.R**2
Compute suite of bivariate statistics during initialization
Statistic values are save in attributes. CAUTION: Weights w are ignored except in SMA fit
Parameters
- x (ndarray): independent variable values
- y (ndarray): dependent variable values, same size as x
- w (ndarray, optional): weights for points (x,y), same size as x and y
284 def fitline(self,method='sma',intercept=True,**kwargs): 285 '''Compute bivariate line fit 286 287 Parameters 288 ---------- 289 method : str 290 line fitting method: sma (default), ols, wls, York, sen, siegel 291 intercept : bool 292 defines whether non-zero intercept should be fitted 293 **kwargs 294 passed to `acgc.stats.sma` (e.g. robust=True) 295 296 Returns 297 ------- 298 result : dict 299 dictionary with keys: 300 - slope (float) 301 slope of fitted line 302 - intercept (float) 303 intercept of fitted line 304 - fittedvalues (array (N,)) 305 values on fit line 306 - residuals (array (N,)) 307 residual from fit line 308 ''' 309 310 if method.lower()=='sma': 311 fit = sma( self._x, 312 self._y, 313 self._w, 314 intercept=intercept, 315 **kwargs) 316 slope = fit['slope'] 317 intercept= fit['intercept'] 318 319 elif method.lower()=='ols': 320 if intercept: 321 ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 322 self._y, rcond=None ) 323 else: 324 ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None ) 325 slope = ols[0][0] 326 intercept = ols[0][1] 327 328 elif method.lower() in ['theil','sen','theilsen']: 329 sen = stats.theilslopes( self._y, 330 self._x ) 331 slope = sen.slope 332 intercept = sen.intercept 333 334 elif method.lower()=='siegel': 335 siegel = stats.siegelslopes( self._x, 336 self._y ) 337 slope = siegel.slope 338 intercept = siegel.intercept 339 340 elif method.lower()=='wls': 341 raise NotImplementedError('WLS regression not implemented yet') 342 343 elif method.lower()=='york': 344 raise NotImplementedError('York regression not implemented yet') 345 346 else: 347 raise ValueError('Undefined method '+method) 348 349 line = dict( slope = slope, 350 intercept = intercept, 351 fittedvalues = slope * self._x + intercept, 352 residuals = self._y - ( slope * self._x + intercept ) ) 353 354 return line
Compute bivariate line fit
Parameters
- method (str): line fitting method: sma (default), ols, wls, York, sen, siegel
- intercept (bool): defines whether non-zero intercept should be fitted
- **kwargs: passed to
acgc.stats.sma
(e.g. robust=True)
Returns
- result (dict):
dictionary with keys:
- slope (float) slope of fitted line
- intercept (float) intercept of fitted line
- fittedvalues (array (N,)) values on fit line
- residuals (array (N,)) residual from fit line
356 def slope(self,method='sma',intercept=True,**kwargs): 357 '''Compute slope of bivariate line fit 358 359 Parameters 360 ---------- 361 method : str 362 line fitting method: sma (default), ols, wls 363 intercept : bool 364 defines whether non-zero intercept should be fitted 365 **kwargs 366 passed to `fitline` 367 368 Returns 369 ------- 370 slope : float 371 value of y intercept 372 ''' 373 return self.fitline(method,intercept,**kwargs)['slope']
Compute slope of bivariate line fit
Parameters
- method (str): line fitting method: sma (default), ols, wls
- intercept (bool): defines whether non-zero intercept should be fitted
- **kwargs: passed to
fitline
Returns
- slope (float): value of y intercept
375 def intercept(self,method='sma',intercept=True,**kwargs): 376 '''Compute intercept of bivariate line fit 377 378 Parameters 379 ---------- 380 method : str 381 line fitting method: sma (default) or ols 382 intercept : bool 383 defines whether non-zero intercept should be fitted 384 **kwargs 385 passed to `fitline` 386 387 Returns 388 ------- 389 intercept : float 390 value of y intercept 391 ''' 392 return self.fitline(method,intercept,**kwargs)['intercept']
Compute intercept of bivariate line fit
Parameters
- method (str): line fitting method: sma (default) or ols
- intercept (bool): defines whether non-zero intercept should be fitted
- **kwargs: passed to
fitline
Returns
- intercept (float): value of y intercept
423 def summary_dict(self, variables=None, fitline_kw=None ): 424 '''Summarize bivariate statistics into a dict 425 426 Parameters 427 ---------- 428 vars : list or str, default='common' 429 names of attribute variables to include in summary 430 names are case insensitive 431 The following strings are also accepted in place of a list 432 "all" (displays all variables) 433 "common" (displays all measures of mean difference) 434 fitline_kw : dict, default=None) 435 keywords passed to self.fitline() 436 437 Returns 438 ------- 439 summary : dict 440 names and values of variables 441 ''' 442 443 # List of variables 444 variables = self._expand_variables(variables) 445 446 if fitline_kw is None: 447 fitline_kw = {'method':'sma', 448 'intercept':True} 449 450 # Construct the dict 451 summary = {} 452 for v in variables: 453 if v in ['slope','intercept']: 454 # These variables are object methods 455 func = getattr(self,v) 456 value = func(**fitline_kw) 457 else: 458 # Retrieve values 459 value = getattr(self,v.lower()) 460 461 # summary += (stringformat+'='+floatformat+'\n').format(v,value) 462 summary[v] = value 463 464 return summary
Summarize bivariate statistics into a dict
Parameters
- vars (list or str, default='common'):
names of attribute variables to include in summary
names are case insensitive
The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference) - fitline_kw (dict, default=None)): keywords passed to self.fitline()
Returns
- summary (dict): names and values of variables
466 def summary(self, variables=None, fitline_kw=None, 467 floatformat='{:.4f}', stringlength=None ): 468 '''Summarize bivariate statistics 469 470 Parameters 471 ---------- 472 vars : list or str, default='common' 473 names of attribute variables to include in summary 474 names are case insensitive 475 The following strings are also accepted in place of a list 476 "all" (displays all variables) 477 "common" (displays all measures of mean difference) 478 floatformat : str, default='{:.4f}' 479 format specifier for floating point values 480 stringlength : int, default=None 481 length of the variables on output 482 default (None) is to use the length of the longest variable name 483 fitline_kw : dict, default=None 484 keywords passed to `fitline` 485 486 Returns 487 ------- 488 summary : str 489 names and values of variables 490 ''' 491 # List of variables 492 variables = self._expand_variables(variables) 493 494 if stringlength is None: 495 stringlength = np.max([len(v) for v in variables]) 496 stringformat = '{:'+str(stringlength)+'s}' 497 498 # Get a dict containing the needed variables 499 summarydict = self.summary_dict( variables, fitline_kw ) 500 501 # Extract length of the float numbers from floatformat 502 # import re 503 # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)", 504 # floatformat )[0] ) ).astype(int) 505 506 # summary = (stringformat+'{:>10s}').format('Variable','Value') 507 summarytext = '' 508 for k,v in summarydict.items(): 509 summarytext += (stringformat+' = '+floatformat+'\n').format(k,v) 510 511 return summarytext
Summarize bivariate statistics
Parameters
- vars (list or str, default='common'):
names of attribute variables to include in summary
names are case insensitive
The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference) - floatformat : str, default='{ (.4f}'): format specifier for floating point values
- stringlength (int, default=None): length of the variables on output default (None) is to use the length of the longest variable name
- fitline_kw (dict, default=None):
keywords passed to
fitline
Returns
- summary (str): names and values of variables
513 def summary_fig_table(self, ax, variables=None, fitline_kw=None, 514 floatformat='{:.3f}', 515 loc=None, loc_units='axes', 516 **kwargs): 517 '''Display bivariate statistics as a table on a plot axis 518 519 Parameters 520 ---------- 521 ax : matplotlib.Figure.Axis 522 axis where the table will be displayed 523 variables : list or str, default='common' 524 names of attribute variables to include in summary 525 names are case insensitive 526 The following strings are also accepted in place of a list 527 "all" (displays all variables) 528 "common" (displays all measures of mean difference) 529 fitline_kw : dict, default=None 530 keywords passed to `fitline` 531 floatformat : str, default='{:.3f}' 532 format specifier for floating point values 533 loc : tuple (x0,y0), default=(0.85, 0.05) 534 location on the axis where the table will be drawn 535 can be in data units or axes units [0-1] 536 loc_units : {'axes' (default), 'data'} 537 specifies whether loc has 'data' units or 'axes' units [0-1] 538 539 Returns 540 ------- 541 text1, text2 : matplotlib text object 542 Artist for the two text boxes 543 ''' 544 # List of variables 545 variables = self._expand_variables(variables) 546 547 # Default location in lower right corner 548 if loc is None: 549 loc = (0.8,0.05) 550 551 # Coordinates for loc 552 if loc_units.lower()=='data': 553 coord=ax.transData 554 elif loc_units.lower() in ['axes','axis']: 555 coord=ax.transAxes 556 else: 557 raise ValueError('Display units should be "Data" or "Axes"') 558 559 # Get a dict containing the needed variables 560 summarydict = self.summary_dict( variables, fitline_kw ) 561 562 # Column of label text 563 label_text = '\n'.join([_texify_name(key) for key in summarydict]) 564 # Column of value text 565 value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()]) 566 567 # Check if horizontal alignment keyword is used 568 ha='' 569 try: 570 ha = kwargs['ha'] 571 except KeyError: 572 pass 573 try: 574 ha = kwargs['horizontalalignment'] 575 except KeyError: 576 pass 577 578 # For right alignment, align on values first 579 # Otherwise, align on labels 580 if ha=='right': 581 first_text = value_text 582 second_text = label_text 583 sign = -1 584 else: 585 first_text = label_text 586 second_text = value_text 587 sign = +1 588 589 # Add first column of text 590 t1=ax.text(loc[0],loc[1], 591 first_text, 592 transform=coord, 593 **kwargs 594 ) 595 596 # Get width of first text column 597 bbox = t1.get_window_extent().transformed(coord.inverted()) 598 width = bbox.x1-bbox.x0 599 600 # Add second column of text 601 t2 = ax.text(loc[0]+width*sign,loc[1], 602 second_text, 603 transform=coord, 604 **kwargs 605 ) 606 607 ################################## 608 # Early version of this function using matplotlib.table.table() 609 610 # if isinstance(loc,(tuple,list)): 611 # # Create an inset axis to contain the table 612 # tableaxis = ax.inset_axes(loc) 613 # table_width=1 614 # else: 615 # tableaxis = ax 616 617 # # Display the table on the axis 618 # return mtable.table( 619 # tableaxis, 620 # cellText=[[floatformat.format(value)] for value in summarydict.values()], 621 # rowLabels=[texify_name(key) for key in summarydict], 622 # colWidths=[table_width/2]*2, 623 # edges=edges, 624 # loc=loc, bbox=bbox 625 # ) 626 627 return [t1,t2]
Display bivariate statistics as a table on a plot axis
Parameters
- ax (matplotlib.Figure.Axis): axis where the table will be displayed
- variables (list or str, default='common'):
names of attribute variables to include in summary
names are case insensitive
The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference) - fitline_kw (dict, default=None):
keywords passed to
fitline
- floatformat : str, default='{ (.3f}'): format specifier for floating point values
- loc (tuple (x0,y0), default=(0.85, 0.05)): location on the axis where the table will be drawn can be in data units or axes units [0-1]
- loc_units ({'axes' (default), 'data'}): specifies whether loc has 'data' units or 'axes' units [0-1]
Returns
- text1, text2 (matplotlib text object): Artist for the two text boxes
22def nmb( x0, x1 ): 23 '''Compute Normalized Mean Bias (NMB) 24 25 NMB = ( mean(x1) - mean(x0) ) / mean(x0) 26 27 Parameters 28 ---------- 29 x0 : array_like 30 reference values 31 x1 : array_like 32 experiment values 33 ''' 34 35 assert (len(x0) == len(x1)), \ 36 "Parameters x0 and x1 must have the same length" 37 38 # Mean values 39 x0_mean = np.mean(x0) 40 x1_mean = np.mean(x1) 41 42 # Metric value 43 return x1_mean / x0_mean - 1
Compute Normalized Mean Bias (NMB)
NMB = ( mean(x1) - mean(x0) ) / mean(x0)
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values
45def nmae( x0, x1 ): 46 '''Compute Normalized Mean Absolute Error (NMAE) 47 48 NMAE = mean(abs(x1 - x0)) / abs(mean(x0)) 49 50 Parameters 51 --------- 52 x0 : array_like 53 reference values 54 x1 : array_like 55 experiment values 56 ''' 57 58 # Mean values 59 x0_mean = np.mean(x0) 60 61 # Mean absolute difference 62 abs_diff = np.mean( np.abs(x1 - x0) ) 63 64 # Metric value 65 return abs_diff / np.abs( x0_mean )
Compute Normalized Mean Absolute Error (NMAE)
NMAE = mean(abs(x1 - x0)) / abs(mean(x0))
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values
68def nmbf( x0, x1 ): 69 '''Compute Normalized Mean Bias Factor (NMBF) 70 71 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 72 73 Parameters 74 ---------- 75 x0 : array_like 76 reference values 77 x1 : array_like 78 experiment values 79 ''' 80 81 # Ensure that arguments have the same length 82 assert (len(x0) == len(x1)), \ 83 "Parameters x0 and x1 must have the same length" 84 85 # Mean values 86 x0_mean = np.mean(x0) 87 x1_mean = np.mean(x1) 88 89 # Metric value 90 if x1_mean >= x0_mean: 91 result = x1_mean / x0_mean - 1 92 else: 93 result= 1 - x0_mean / x1_mean 94 # Equivalent (faster?) implementation 95 #S = (mMean - oMean) / np.abs(mMean - oMean) 96 #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 ) 97 98 return result
Compute Normalized Mean Bias Factor (NMBF)
Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values
100def nmaef( x0, x1 ): 101 '''Compute Normalized Mean Absolute Error Factor (NMAEF) 102 103 Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125 104 105 Parameters 106 ---------- 107 x0 : array_like 108 reference values 109 x1 : array_like 110 experiment values 111 ''' 112 113 # Ensure that arguments have the same length 114 assert (len(x0) == len(x1)), \ 115 "Parameters x0 and x1 must have the same length" 116 117 # Mean values 118 x0_mean = np.mean(x0) 119 x1_mean = np.mean(x1) 120 121 # Mean absolute difference 122 abs_diff = np.mean( np.abs(x1 - x0)) 123 124 # Metric value 125 if x1_mean >= x0_mean: 126 result = abs_diff / x0_mean 127 else: 128 result = abs_diff / x1_mean 129 # Equivalent (faster?) implementation 130 #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean) 131 #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) ) 132 133 return result
Compute Normalized Mean Absolute Error Factor (NMAEF)
Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
Parameters
- x0 (array_like): reference values
- x1 (array_like): experiment values