Package astLib :: Module astStats
[hide private]
[frames] | no frames]

Source Code for Module astLib.astStats

  1  # -*- coding: utf-8 -*- 
  2  """module for performing statistical calculations. 
  3   
  4  (c) 2007-2012 Matt Hilton  
  5   
  6  (c) 2013 Matt Hilton & Steven Boada 
  7   
  8  U{http://astlib.sourceforge.net} 
  9   
 10  This module (as you may notice) provides very few statistical routines. It does, however, provide 
 11  biweight (robust) estimators of location and scale, as described in Beers et al. 1990 (AJ, 100, 
 12  32), in addition to a robust least squares fitting routine that uses the biweight transform. 
 13   
 14  Some routines may fail if they are passed lists with few items and encounter a `divide by zero' 
 15  error. Where this occurs, the function will return None. An error message will be printed to the 
 16  console when this happens if astStats.REPORT_ERRORS=True (the default). Testing if an 
 17  astStats function returns None can be used to handle errors in scripts.  
 18   
 19  For extensive statistics modules, the Python bindings for GNU R (U{http://rpy.sourceforge.net}), or 
 20  SciPy (U{http://www.scipy.org}) are suggested. 
 21   
 22  """ 
 23   
 24  import math 
 25  import numpy 
 26  import sys 
 27   
 28  REPORT_ERRORS=True 
 29   
 30  #--------------------------------------------------------------------------------------------------- 
31 -def mean(dataList):
32 """Calculates the mean average of a list of numbers. 33 34 @type dataList: list 35 @param dataList: input data, must be a one dimensional list 36 @rtype: float 37 @return: mean average 38 39 """ 40 sum=0 41 for item in dataList: 42 sum=sum+float(item) 43 if len(dataList)>0: 44 mean=sum/float(len(dataList)) 45 else: 46 mean=0 47 return mean
48 49 #---------------------------------------------------------------------------------------------------
50 -def weightedMean(dataList):
51 """Calculates the weighted mean average of a two dimensional list (value, weight) of 52 numbers. 53 54 @type dataList: list 55 @param dataList: input data, must be a two dimensional list in format [value, weight] 56 @rtype: float 57 @return: weighted mean average 58 59 """ 60 sum=0 61 weightSum=0 62 for item in dataList: 63 sum=sum+float(item[0]*item[1]) 64 weightSum=weightSum+item[1] 65 if len(dataList)>0: 66 mean=sum/weightSum 67 else: 68 mean=0 69 return mean
70 71 #---------------------------------------------------------------------------------------------------
72 -def stdev(dataList):
73 """Calculates the (sample) standard deviation of a list of numbers. 74 75 @type dataList: list 76 @param dataList: input data, must be a one dimensional list 77 @rtype: float 78 @return: standard deviation 79 80 """ 81 listMean=mean(dataList) 82 sum=0 83 for item in dataList: 84 sum=sum+(float(item-listMean)*float(item-listMean)) 85 if len(dataList)>0: 86 stdev=math.sqrt(sum/(float(len(dataList))-1)) 87 else: 88 stdev=0 89 return stdev
90 91 #---------------------------------------------------------------------------------------------------
92 -def rms(dataList):
93 """Calculates the root mean square of a list of numbers. 94 95 @type dataList: list 96 @param dataList: input data, must be a one dimensional list 97 @rtype: float 98 @return: root mean square 99 100 """ 101 dataListSq=[] 102 for item in dataList: 103 dataListSq.append(item*item) 104 listMeanSq=mean(dataListSq) 105 rms=math.sqrt(listMeanSq) 106 107 return rms
108 109 #---------------------------------------------------------------------------------------------------
110 -def weightedStdev(dataList):
111 """Calculates the weighted (sample) standard deviation of a list of numbers. 112 113 @type dataList: list 114 @param dataList: input data, must be a two dimensional list in format [value, weight] 115 @rtype: float 116 @return: weighted standard deviation 117 118 @note: Returns None if an error occurs. 119 120 """ 121 listMean=weightedMean(dataList) 122 sum=0 123 wSum=0 124 wNonZero=0 125 for item in dataList: 126 if item[1]>0.0: 127 sum=sum+float((item[0]-listMean)/item[1])*float((item[0]-listMean)/item[1]) 128 wSum=wSum+float(1.0/item[1])*float(1.0/item[1]) 129 130 if len(dataList)>1: 131 nFactor=float(len(dataList))/float(len(dataList)-1) 132 stdev=math.sqrt(nFactor*(sum/wSum)) 133 else: 134 if REPORT_ERRORS==True: 135 print("""ERROR: astStats.weightedStdev() : dataList contains < 2 items.""") 136 stdev=None 137 return stdev
138 139 #---------------------------------------------------------------------------------------------------
140 -def median(dataList):
141 """Calculates the median of a list of numbers. 142 143 @type dataList: list 144 @param dataList: input data, must be a one dimensional list 145 @rtype: float 146 @return: median average 147 148 """ 149 dataList.sort() 150 midValue=float(len(dataList)/2.0) 151 fractPart=math.modf(midValue)[0] 152 153 if fractPart==0.5: # if odd number of items 154 midValue=math.ceil(midValue) 155 156 # Doesn't like it when handling a list with only one item in it! 157 if midValue<len(dataList)-1: 158 median=dataList[int(midValue)] 159 160 if fractPart!=0.5: # if even 161 prevItem=dataList[int(midValue)-1] 162 median=(median+prevItem)/2.0 163 164 else: 165 median=dataList[0] 166 167 return median
168 169 #---------------------------------------------------------------------------------------------------
170 -def modeEstimate(dataList):
171 """Returns an estimate of the mode of a set of values by mode=(3*median)-(2*mean). 172 173 @type dataList: list 174 @param dataList: input data, must be a one dimensional list 175 @rtype: float 176 @return: estimate of mode average 177 178 """ 179 mode=(3*median(dataList))-(2*mean(dataList)) 180 181 return mode
182 183 #---------------------------------------------------------------------------------------------------
184 -def MAD(dataList):
185 """Calculates the Median Absolute Deviation of a list of numbers. 186 187 @type dataList: list 188 @param dataList: input data, must be a one dimensional list 189 @rtype: float 190 @return: median absolute deviation 191 192 """ 193 listMedian=median(dataList) 194 195 # Calculate |x-M| values 196 diffModuli=[] 197 for item in dataList: 198 diffModuli.append(math.fabs(item-listMedian)) 199 diffModuli.sort() 200 201 midValue=float(len(diffModuli)/2.0) 202 fractPart=math.modf(midValue)[0] 203 204 if fractPart==0.5: # if odd number of items 205 midValue=math.ceil(midValue) 206 207 # Doesn't like it when handling a list with only one item in it! 208 if midValue<len(diffModuli)-1: 209 MAD=diffModuli[int(midValue)] 210 211 if fractPart!=0.5: # if even 212 prevItem=diffModuli[int(midValue)-1] 213 MAD=(MAD+prevItem)/2.0 214 215 else: 216 MAD=diffModuli[0] 217 218 return MAD
219 220 #---------------------------------------------------------------------------------------------------
221 -def biweightLocation(dataList, tuningConstant):
222 """Calculates the biweight location estimator (like a robust average) of a list of 223 numbers. 224 225 @type dataList: list 226 @param dataList: input data, must be a one dimensional list 227 @type tuningConstant: float 228 @param tuningConstant: 6.0 is recommended. 229 @rtype: float 230 @return: biweight location 231 232 @note: Returns None if an error occurs. 233 234 """ 235 C=tuningConstant 236 listMedian=median(dataList) 237 listMAD=MAD(dataList) 238 if listMAD!=0: 239 uValues=[] 240 for item in dataList: 241 uValues.append((item-listMedian)/(C*listMAD)) 242 243 top=0 # numerator equation (5) Beers et al if you like 244 bottom=0 # denominator 245 for i in range(len(uValues)): 246 if math.fabs(uValues[i])<=1.0: 247 top=top+((dataList[i]-listMedian) \ 248 *(1.0-(uValues[i]*uValues[i])) \ 249 *(1.0-(uValues[i]*uValues[i]))) 250 251 bottom=bottom+((1.0-(uValues[i]*uValues[i])) \ 252 *(1.0-(uValues[i]*uValues[i]))) 253 254 CBI=listMedian+(top/bottom) 255 256 else: 257 if REPORT_ERRORS==True: 258 print("""ERROR: astStats: biweightLocation() : MAD() returned 0.""") 259 return None 260 261 return CBI
262 263 #---------------------------------------------------------------------------------------------------
264 -def biweightScale(dataList, tuningConstant):
265 """Calculates the biweight scale estimator (like a robust standard deviation) of a list 266 of numbers. 267 268 @type dataList: list 269 @param dataList: input data, must be a one dimensional list 270 @type tuningConstant: float 271 @param tuningConstant: 9.0 is recommended. 272 @rtype: float 273 @return: biweight scale 274 275 @note: Returns None if an error occurs. 276 277 """ 278 C=tuningConstant 279 280 # Calculate |x-M| values and u values 281 listMedian=median(dataList) 282 listMAD=MAD(dataList) 283 diffModuli=[] 284 for item in dataList: 285 diffModuli.append(math.fabs(item-listMedian)) 286 uValues=[] 287 for item in dataList: 288 try: 289 uValues.append((item-listMedian)/(C*listMAD)) 290 except ZeroDivisionError: 291 if REPORT_ERRORS==True: 292 print("""ERROR: astStats.biweightScale() : divide by zero error.""") 293 return None 294 295 top=0 # numerator equation (9) Beers et al 296 bottom=0 297 valCount=0 # Count values where u<1 only 298 299 for i in range(len(uValues)): 300 # Skip u values >1 301 if math.fabs(uValues[i])<=1.0: 302 u2Term=1.0-(uValues[i]*uValues[i]) 303 u4Term=math.pow(u2Term, 4) 304 top=top+((diffModuli[i]*diffModuli[i])*u4Term) 305 bottom=bottom+(u2Term*(1.0-(5.0*(uValues[i]*uValues[i])))) 306 valCount=valCount+1 307 308 top=math.sqrt(top) 309 bottom=math.fabs(bottom) 310 311 SBI=math.pow(float(valCount), 0.5)*(top/bottom) 312 return SBI
313 314 #---------------------------------------------------------------------------------------------------
315 -def biweightClipped(dataList, tuningConstant, sigmaCut):
316 """Iteratively calculates biweight location and scale, using sigma clipping, for a list 317 of values. The calculation is performed on the first column of a multi-dimensional 318 list; other columns are ignored. 319 320 @type dataList: list 321 @param dataList: input data 322 @type tuningConstant: float 323 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 324 scale estimates 325 @type sigmaCut: float 326 @param sigmaCut: sigma clipping to apply 327 @rtype: dictionary 328 @return: estimate of biweight location, scale, and list of non-clipped data, in the format 329 {'biweightLocation', 'biweightScale', 'dataList'} 330 331 @note: Returns None if an error occurs. 332 333 """ 334 335 iterations=0 336 clippedValues=[] 337 for row in dataList: 338 if type(row)==list: 339 clippedValues.append(row[0]) 340 else: 341 clippedValues.append(row) 342 343 while iterations<11 and len(clippedValues)>5: 344 345 cbi=biweightLocation(clippedValues, tuningConstant) 346 sbi=biweightScale(clippedValues, tuningConstant) 347 348 # check for either biweight routine falling over 349 # happens when feed in lots of similar numbers 350 # e.g. when bootstrapping with a small sample 351 if cbi==None or sbi==None: 352 353 if REPORT_ERRORS==True: 354 print("""ERROR: astStats : biweightClipped() : 355 divide by zero error.""") 356 357 return None 358 359 else: 360 361 clippedValues=[] 362 clippedData=[] 363 for row in dataList: 364 if type(row)==list: 365 if row[0]>cbi-(sigmaCut*sbi) \ 366 and row[0]<cbi+(sigmaCut*sbi): 367 clippedValues.append(row[0]) 368 clippedData.append(row) 369 else: 370 if row>cbi-(sigmaCut*sbi) \ 371 and row<cbi+(sigmaCut*sbi): 372 clippedValues.append(row) 373 clippedData.append(row) 374 375 iterations=iterations+1 376 377 return { 'biweightLocation':cbi , 378 'biweightScale':sbi, 379 'dataList':clippedData}
380 381 #---------------------------------------------------------------------------------------------------
382 -def biweightTransform(dataList, tuningConstant):
383 """Calculates the biweight transform for a set of values. Useful for using as weights in 384 robust line fitting. 385 386 @type dataList: list 387 @param dataList: input data, must be a one dimensional list 388 @type tuningConstant: float 389 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 390 scale estimates 391 @rtype: list 392 @return: list of biweights 393 394 """ 395 C=tuningConstant 396 397 # Calculate |x-M| values and u values 398 listMedian=abs(median(dataList)) 399 cutoff=C*listMedian 400 biweights=[] 401 for item in dataList: 402 if abs(item)<cutoff: 403 biweights.append([item, 404 (1.0-((item/cutoff)*(item/cutoff))) \ 405 *(1.0-((item/cutoff)*(item/cutoff)))]) 406 else: 407 biweights.append([item, 0.0]) 408 409 return biweights
410 411 #---------------------------------------------------------------------------------------------------
412 -def OLSFit(dataList):
413 """Performs an ordinary least squares fit on a two dimensional list of numbers. 414 Minimum number of data points is 5. 415 416 @type dataList: list 417 @param dataList: input data, must be a two dimensional list in format [x, y] 418 @rtype: dictionary 419 @return: slope and intercept on y-axis, with associated errors, in the format 420 {'slope', 'intercept', 'slopeError', 'interceptError'} 421 422 @note: Returns None if an error occurs. 423 424 """ 425 sumX=0 426 sumY=0 427 sumXY=0 428 sumXX=0 429 n=float(len(dataList)) 430 if n > 2: 431 for item in dataList: 432 sumX=sumX+item[0] 433 sumY=sumY+item[1] 434 sumXY=sumXY+(item[0]*item[1]) 435 sumXX=sumXX+(item[0]*item[0]) 436 m=((n*sumXY)-(sumX*sumY))/((n*sumXX)-(sumX*sumX)) 437 c=((sumXX*sumY)-(sumX*sumXY))/((n*sumXX)-(sumX*sumX)) 438 439 sumRes=0 440 for item in dataList: 441 442 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 443 *(item[1]-(m*item[0])-c)) 444 445 sigma=math.sqrt((1.0/(n-2))*sumRes) 446 447 try: 448 mSigma=(sigma*math.sqrt(n))/math.sqrt((n*sumXX)-(sumX*sumX)) 449 except: 450 mSigma=numpy.nan 451 try: 452 cSigma=(sigma*math.sqrt(sumXX))/math.sqrt((n*sumXX)-(sumX*sumX)) 453 except: 454 cSigma=numpy.nan 455 else: 456 if REPORT_ERRORS==True: 457 print("""ERROR: astStats.OLSFit() : dataList contains < 3 items.""") 458 459 return None 460 461 return {'slope':m, 462 'intercept':c, 463 'slopeError':mSigma, 464 'interceptError':cSigma}
465 466 #---------------------------------------------------------------------------------------------------
467 -def clippedMeanStdev(dataList, sigmaCut = 3.0, maxIterations = 10.0):
468 """Calculates the clipped mean and stdev of a list of numbers. 469 470 @type dataList: list 471 @param dataList: input data, one dimensional list of numbers 472 @type sigmaCut: float 473 @param sigmaCut: clipping in Gaussian sigma to apply 474 @type maxIterations: int 475 @param maxIterations: maximum number of iterations 476 @rtype: dictionary 477 @return: format {'clippedMean', 'clippedStdev', 'numPoints'} 478 479 """ 480 481 listCopy=[] 482 for d in dataList: 483 listCopy.append(d) 484 listCopy=numpy.array(listCopy) 485 486 iterations=0 487 while iterations < maxIterations and len(listCopy) > 4: 488 489 m=listCopy.mean() 490 s=listCopy.std() 491 492 listCopy=listCopy[numpy.less(abs(listCopy), abs(m+sigmaCut*s))] 493 494 iterations=iterations+1 495 496 return {'clippedMean': m, 'clippedStdev': s, 'numPoints': listCopy.shape[0]}
497 498 #---------------------------------------------------------------------------------------------------
499 -def clippedWeightedLSFit(dataList, sigmaCut):
500 """Performs a weighted least squares fit on a list of numbers with sigma clipping. Minimum number of data 501 points is 5. 502 503 @type dataList: list 504 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 505 @rtype: dictionary 506 @return: slope and intercept on y-axis, with associated errors, in the format 507 {'slope', 'intercept', 'slopeError', 'interceptError'} 508 509 @note: Returns None if an error occurs. 510 511 """ 512 513 iterations=0 514 clippedValues=[] 515 for row in dataList: 516 clippedValues.append(row) 517 518 while iterations<11 and len(clippedValues)>4: 519 520 fitResults=weightedLSFit(clippedValues, "errors") 521 522 if fitResults['slope'] == None: 523 524 if REPORT_ERRORS==True: 525 print("""ERROR: astStats : clippedWeightedLSFit() : 526 divide by zero error.""") 527 528 return None 529 530 else: 531 532 clippedValues=[] 533 for row in dataList: 534 535 # Trim points more than sigmaCut*sigma away from the fitted line 536 fit=fitResults['slope']*row[0]+fitResults['intercept'] 537 res=row[1]-fit 538 if abs(res)/row[2] < sigmaCut: 539 clippedValues.append(row) 540 541 iterations=iterations+1 542 543 # store the number of values that made it through the clipping process 544 fitResults['numDataPoints']=len(clippedValues) 545 546 return fitResults
547 548 #---------------------------------------------------------------------------------------------------
549 -def weightedLSFit(dataList, weightType):
550 """Performs a weighted least squares fit on a three dimensional list of numbers [x, y, y error]. 551 552 @type dataList: list 553 @param dataList: input data, must be a three dimensional list in format [x, y, y error] 554 @type weightType: string 555 @param weightType: if "errors", weights are calculated assuming the input data is in the 556 format [x, y, error on y]; if "weights", the weights are assumed to be already calculated and 557 stored in a fourth column [x, y, error on y, weight] (as used by e.g. L{astStats.biweightLSFit}) 558 @rtype: dictionary 559 @return: slope and intercept on y-axis, with associated errors, in the format 560 {'slope', 'intercept', 'slopeError', 'interceptError'} 561 562 @note: Returns None if an error occurs. 563 564 """ 565 if weightType == "weights": 566 sumW=0 567 sumWX=0 568 sumWY=0 569 sumWXY=0 570 sumWXX=0 571 n=float(len(dataList)) 572 if n > 4: 573 for item in dataList: 574 W=item[3] 575 sumWX=sumWX+(W*item[0]) 576 sumWY=sumWY+(W*item[1]) 577 sumWXY=sumWXY+(W*item[0]*item[1]) 578 sumWXX=sumWXX+(W*item[0]*item[0]) 579 sumW=sumW+W 580 #print sumW, sumWXX, sumWX 581 582 try: 583 m=((sumW*sumWXY)-(sumWX*sumWY)) \ 584 /((sumW*sumWXX)-(sumWX*sumWX)) 585 except ZeroDivisionError: 586 if REPORT_ERRORS == True: 587 print("ERROR: astStats.weightedLSFit() : divide by zero error.") 588 return None 589 590 try: 591 c=((sumWXX*sumWY)-(sumWX*sumWXY)) \ 592 /((sumW*sumWXX)-(sumWX*sumWX)) 593 except ZeroDivisionError: 594 if REPORT_ERRORS == True: 595 print("ERROR: astStats.weightedLSFit() : divide by zero error.") 596 return None 597 598 sumRes=0 599 for item in dataList: 600 601 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 602 *(item[1]-(m*item[0])-c)) 603 604 sigma=math.sqrt((1.0/(n-2))*sumRes) 605 606 # Can get div0 errors here so check 607 # When biweight fitting converges this shouldn't happen 608 if (n*sumWXX)-(sumWX*sumWX)>0.0: 609 610 mSigma=(sigma*math.sqrt(n)) \ 611 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 612 613 cSigma=(sigma*math.sqrt(sumWXX)) \ 614 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 615 616 else: 617 618 if REPORT_ERRORS==True: 619 print("""ERROR: astStats.weightedLSFit() 620 : divide by zero error.""") 621 return None 622 623 else: 624 if REPORT_ERRORS==True: 625 print("""ERROR: astStats.weightedLSFit() : 626 dataList contains < 5 items.""") 627 return None 628 629 elif weightType == "errors": 630 sumX=0 631 sumY=0 632 sumXY=0 633 sumXX=0 634 sumSigma=0 635 n=float(len(dataList)) 636 for item in dataList: 637 sumX=sumX+(item[0]/(item[2]*item[2])) 638 sumY=sumY+(item[1]/(item[2]*item[2])) 639 sumXY=sumXY+((item[0]*item[1])/(item[2]*item[2])) 640 sumXX=sumXX+((item[0]*item[0])/(item[2]*item[2])) 641 sumSigma=sumSigma+(1.0/(item[2]*item[2])) 642 delta=(sumSigma*sumXX)-(sumX*sumX) 643 m=((sumSigma*sumXY)-(sumX*sumY))/delta 644 c=((sumXX*sumY)-(sumX*sumXY))/delta 645 mSigma=math.sqrt(sumSigma/delta) 646 cSigma=math.sqrt(sumXX/delta) 647 648 return {'slope':m, 649 'intercept':c, 650 'slopeError':mSigma, 651 'interceptError':cSigma}
652 653 #---------------------------------------------------------------------------------------------------
654 -def biweightLSFit(dataList, tuningConstant, sigmaCut = None):
655 """Performs a weighted least squares fit, where the weights used are the biweight 656 transforms of the residuals to the previous best fit .i.e. the procedure is iterative, 657 and converges very quickly (iterations is set to 10 by default). Minimum number of data 658 points is 10. 659 660 This seems to give slightly different results to the equivalent R routine, so use at your 661 own risk! 662 663 @type dataList: list 664 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 665 @type tuningConstant: float 666 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 667 scale estimates 668 @type sigmaCut: float 669 @param sigmaCut: sigma clipping to apply (set to None if not required) 670 @rtype: dictionary 671 @return: slope and intercept on y-axis, with associated errors, in the format 672 {'slope', 'intercept', 'slopeError', 'interceptError'} 673 674 @note: Returns None if an error occurs. 675 676 """ 677 678 dataCopy=[] 679 for row in dataList: 680 dataCopy.append(row) 681 682 # First perform unweighted fit, then calculate residuals 683 results=OLSFit(dataCopy) 684 origLen=len(dataCopy) 685 for k in range(10): 686 m=results['slope'] 687 c=results['intercept'] 688 res=[] 689 for item in dataCopy: 690 res.append((m*item[0]+c)-item[1]) 691 692 if len(res)>5: 693 # For clipping, trim away things >3 sigma 694 # away from median 695 if sigmaCut != None: 696 absRes=[] 697 for item in res: 698 absRes.append(abs(item)) 699 sigma=stdev(absRes) 700 count=0 701 for item in absRes: 702 if item>(sigmaCut*sigma) \ 703 and len(dataCopy)>2: 704 del dataCopy[count] 705 del res[count] 706 707 # Index of datalist gets out of 708 # sync with absRes as we delete 709 # items 710 count=count-1 711 712 count=count+1 713 714 # Biweight transform residuals 715 weights=biweightTransform(res, tuningConstant) 716 717 # Perform weighted fit, using biweight transforms 718 # of residuals as weight 719 wData=[] 720 for i in range(len(dataCopy)): 721 wData.append([dataCopy[i][0], dataCopy[i][1], dataCopy[i][2], weights[i][1]]) 722 723 results=weightedLSFit(wData, "weights") 724 725 return results
726 727 #---------------------------------------------------------------------------------------------------
728 -def cumulativeBinner(data, binMin, binMax, binTotal):
729 """Bins the input data cumulatively. 730 731 @param data: input data, must be a one dimensional list 732 @type binMin: float 733 @param binMin: minimum value from which to bin data 734 @type binMax: float 735 @param binMax: maximum value from which to bin data 736 @type binTotal: int 737 @param binTotal: number of bins 738 @rtype: list 739 @return: binned data, in format [bin centre, frequency] 740 741 """ 742 #Bin data 743 binStep=float(binMax-binMin)/binTotal 744 bins=[] 745 totalItems=len(data) 746 for i in range(binTotal): 747 bins.append(0) 748 for item in data: 749 if item>(binMin+(i*binStep)): 750 bins[i]=bins[i]+1.0/totalItems 751 752 # Gnuplot requires points at bin midpoints 753 coords=[] 754 for i in range(binTotal): 755 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 756 757 return coords
758 759 #---------------------------------------------------------------------------------------------------
760 -def binner(data, binMin, binMax, binTotal):
761 """Bins the input data.. 762 763 @param data: input data, must be a one dimensional list 764 @type binMin: float 765 @param binMin: minimum value from which to bin data 766 @type binMax: float 767 @param binMax: maximum value from which to bin data 768 @type binTotal: int 769 @param binTotal: number of bins 770 @rtype: list 771 @return: binned data, in format [bin centre, frequency] 772 773 """ 774 #Bin data 775 binStep=float(binMax-binMin)/binTotal 776 bins=[] 777 for i in range(binTotal): 778 bins.append(0) 779 for item in data: 780 if item>(binMin+(i*binStep)) \ 781 and item<=(binMin+((i+1)*binStep)): 782 bins[i]=bins[i]+1 783 784 # Gnuplot requires points at bin midpoints 785 coords=[] 786 for i in range(binTotal): 787 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 788 789 return coords
790 791 #---------------------------------------------------------------------------------------------------
792 -def weightedBinner(data, weights, binMin, binMax, binTotal):
793 """Bins the input data, recorded frequency is sum of weights in bin. 794 795 @param data: input data, must be a one dimensional list 796 @type binMin: float 797 @param binMin: minimum value from which to bin data 798 @type binMax: float 799 @param binMax: maximum value from which to bin data 800 @type binTotal: int 801 @param binTotal: number of bins 802 @rtype: list 803 @return: binned data, in format [bin centre, frequency] 804 805 """ 806 #Bin data 807 binStep=float(binMax-binMin)/binTotal 808 bins=[] 809 for i in range(binTotal): 810 bins.append(0.0) 811 for item, weight in zip(data, weights): 812 if item>(binMin+(i*binStep)) \ 813 and item<=(binMin+((i+1)*binStep)): 814 bins[i]=bins[i]+weight 815 816 # Gnuplot requires points at bin midpoints 817 coords=[] 818 for i in range(binTotal): 819 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 820 821 return coords
822 823 #--------------------------------------------------------------------------------------------------- 824