1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 __VERSION__="ete2-2.0rev96"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 """
48 pstat.py module
49
50 #################################################
51 ####### Written by: Gary Strangman ###########
52 ####### Last modified: Dec 18, 2007 ###########
53 #################################################
54
55 This module provides some useful list and array manipulation routines
56 modeled after those found in the |Stat package by Gary Perlman, plus a
57 number of other useful list/file manipulation functions. The list-based
58 functions include:
59
60 abut (source,*args)
61 simpleabut (source, addon)
62 colex (listoflists,cnums)
63 collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
64 dm (listoflists,criterion)
65 flat (l)
66 linexand (listoflists,columnlist,valuelist)
67 linexor (listoflists,columnlist,valuelist)
68 linedelimited (inlist,delimiter)
69 lineincols (inlist,colsize)
70 lineincustcols (inlist,colsizes)
71 list2string (inlist)
72 makelol(inlist)
73 makestr(x)
74 printcc (lst,extra=2)
75 printincols (listoflists,colsize)
76 pl (listoflists)
77 printl(listoflists)
78 replace (lst,oldval,newval)
79 recode (inlist,listmap,cols='all')
80 remap (listoflists,criterion)
81 roundlist (inlist,num_digits_to_round_floats_to)
82 sortby(listoflists,sortcols)
83 unique (inlist)
84 duplicates(inlist)
85 writedelimited (listoflists, delimiter, file, writetype='w')
86
87 Some of these functions have alternate versions which are defined only if
88 Numeric (NumPy) can be imported. These functions are generally named as
89 above, with an 'a' prefix.
90
91 aabut (source, *args)
92 acolex (a,indices,axis=1)
93 acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
94 adm (a,criterion)
95 alinexand (a,columnlist,valuelist)
96 alinexor (a,columnlist,valuelist)
97 areplace (a,oldval,newval)
98 arecode (a,listmap,col='all')
99 arowcompare (row1, row2)
100 arowsame (row1, row2)
101 asortrows(a,axis=0)
102 aunique(inarray)
103 aduplicates(inarray)
104
105 Currently, the code is all but completely un-optimized. In many cases, the
106 array versions of functions amount simply to aliases to built-in array
107 functions/methods. Their inclusion here is for function name consistency.
108 """
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 import stats
132 import string, copy
133 from types import *
134
135 __version__ = 0.4
136
137
138
139
140
141
142
143 -def abut (source,*args):
144 """
145 Like the |Stat abut command. It concatenates two lists side-by-side
146 and returns the result. '2D' lists are also accomodated for either argument
147 (source or addon). CAUTION: If one list is shorter, it will be repeated
148 until it is as long as the longest list. If this behavior is not desired,
149 use pstat.simpleabut().
150
151 Usage: abut(source, args) where args=any # of lists
152 Returns: a list of lists as long as the LONGEST list past, source on the
153 'left', lists in <args> attached consecutively on the 'right'
154 """
155
156 if type(source) not in [ListType,TupleType]:
157 source = [source]
158 for addon in args:
159 if type(addon) not in [ListType,TupleType]:
160 addon = [addon]
161 if len(addon) < len(source):
162 if len(source) % len(addon) == 0:
163 repeats = len(source)/len(addon)
164 origadd = copy.deepcopy(addon)
165 for i in range(repeats-1):
166 addon = addon + origadd
167 else:
168 repeats = len(source)/len(addon)+1
169 origadd = copy.deepcopy(addon)
170 for i in range(repeats-1):
171 addon = addon + origadd
172 addon = addon[0:len(source)]
173 elif len(source) < len(addon):
174 if len(addon) % len(source) == 0:
175 repeats = len(addon)/len(source)
176 origsour = copy.deepcopy(source)
177 for i in range(repeats-1):
178 source = source + origsour
179 else:
180 repeats = len(addon)/len(source)+1
181 origsour = copy.deepcopy(source)
182 for i in range(repeats-1):
183 source = source + origsour
184 source = source[0:len(addon)]
185
186 source = simpleabut(source,addon)
187 return source
188
189
191 """
192 Concatenates two lists as columns and returns the result. '2D' lists
193 are also accomodated for either argument (source or addon). This DOES NOT
194 repeat either list to make the 2 lists of equal length. Beware of list pairs
195 with different lengths ... the resulting list will be the length of the
196 FIRST list passed.
197
198 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
199 Returns: a list of lists as long as source, with source on the 'left' and
200 addon on the 'right'
201 """
202 if type(source) not in [ListType,TupleType]:
203 source = [source]
204 if type(addon) not in [ListType,TupleType]:
205 addon = [addon]
206 minlen = min(len(source),len(addon))
207 list = copy.deepcopy(source)
208 if type(source[0]) not in [ListType,TupleType]:
209 if type(addon[0]) not in [ListType,TupleType]:
210 for i in range(minlen):
211 list[i] = [source[i]] + [addon[i]]
212 else:
213 for i in range(minlen):
214 list[i] = [source[i]] + addon[i]
215 else:
216 if type(addon[0]) not in [ListType,TupleType]:
217 for i in range(minlen):
218 list[i] = source[i] + [addon[i]]
219 else:
220 for i in range(minlen):
221 list[i] = source[i] + addon[i]
222 source = list
223 return source
224
225
226 -def colex (listoflists,cnums):
227 """
228 Extracts from listoflists the columns specified in the list 'cnums'
229 (cnums can be an integer, a sequence of integers, or a string-expression that
230 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
231 columns 3 onward from the listoflists).
232
233 Usage: colex (listoflists,cnums)
234 Returns: a list-of-lists corresponding to the columns from listoflists
235 specified by cnums, in the order the column numbers appear in cnums
236 """
237 global index
238 column = 0
239 if type(cnums) in [ListType,TupleType]:
240 index = cnums[0]
241 column = map(lambda x: x[index], listoflists)
242 for col in cnums[1:]:
243 index = col
244 column = abut(column,map(lambda x: x[index], listoflists))
245 elif type(cnums) == StringType:
246 evalstring = 'map(lambda x: x'+cnums+', listoflists)'
247 column = eval(evalstring)
248 else:
249 index = cnums
250 column = map(lambda x: x[index], listoflists)
251 return column
252
253
254 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
255 """
256 Averages data in collapsecol, keeping all unique items in keepcols
257 (using unique, which keeps unique LISTS of column numbers), retaining the
258 unique sets of values in keepcols, the mean for each. Setting fcn1
259 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
260 will append those results (e.g., the sterr, N) after each calculated mean.
261 cfcn is the collapse function to apply (defaults to mean, defined here in the
262 pstat module to avoid circular imports with stats.py, but harmonicmean or
263 others could be passed).
264
265 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
266 Returns: a list of lists with all unique permutations of entries appearing in
267 columns ("conditions") specified by keepcols, abutted with the result of
268 cfcn (if cfcn=None, defaults to the mean) of each column specified by
269 collapsecols.
270 """
271 def collmean (inlist):
272 s = 0
273 for item in inlist:
274 s = s + item
275 return s/float(len(inlist))
276
277 if type(keepcols) not in [ListType,TupleType]:
278 keepcols = [keepcols]
279 if type(collapsecols) not in [ListType,TupleType]:
280 collapsecols = [collapsecols]
281 if cfcn == None:
282 cfcn = collmean
283 if keepcols == []:
284 means = [0]*len(collapsecols)
285 for i in range(len(collapsecols)):
286 avgcol = colex(listoflists,collapsecols[i])
287 means[i] = cfcn(avgcol)
288 if fcn1:
289 try:
290 test = fcn1(avgcol)
291 except:
292 test = 'N/A'
293 means[i] = [means[i], test]
294 if fcn2:
295 try:
296 test = fcn2(avgcol)
297 except:
298 test = 'N/A'
299 try:
300 means[i] = means[i] + [len(avgcol)]
301 except TypeError:
302 means[i] = [means[i],len(avgcol)]
303 return means
304 else:
305 values = colex(listoflists,keepcols)
306 uniques = unique(values)
307 uniques.sort()
308 newlist = []
309 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
310 for item in uniques:
311 if type(item) not in [ListType,TupleType]: item =[item]
312 tmprows = linexand(listoflists,keepcols,item)
313 for col in collapsecols:
314 avgcol = colex(tmprows,col)
315 item.append(cfcn(avgcol))
316 if fcn1 <> None:
317 try:
318 test = fcn1(avgcol)
319 except:
320 test = 'N/A'
321 item.append(test)
322 if fcn2 <> None:
323 try:
324 test = fcn2(avgcol)
325 except:
326 test = 'N/A'
327 item.append(test)
328 newlist.append(item)
329 return newlist
330
331
332 -def dm (listoflists,criterion):
333 """
334 Returns rows from the passed list of lists that meet the criteria in
335 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
336 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
337 with column 2 equal to the string 'N').
338
339 Usage: dm (listoflists, criterion)
340 Returns: rows from listoflists that meet the specified criterion.
341 """
342 function = 'filter(lambda x: '+criterion+',listoflists)'
343 lines = eval(function)
344 return lines
345
346
348 """
349 Returns the flattened version of a '2D' list. List-correlate to the a.ravel()()
350 method of NumPy arrays.
351
352 Usage: flat(l)
353 """
354 newl = []
355 for i in range(len(l)):
356 for j in range(len(l[i])):
357 newl.append(l[i][j])
358 return newl
359
360
361 -def linexand (listoflists,columnlist,valuelist):
362 """
363 Returns the rows of a list of lists where col (from columnlist) = val
364 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
365 len(columnlist) must equal len(valuelist).
366
367 Usage: linexand (listoflists,columnlist,valuelist)
368 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
369 """
370 if type(columnlist) not in [ListType,TupleType]:
371 columnlist = [columnlist]
372 if type(valuelist) not in [ListType,TupleType]:
373 valuelist = [valuelist]
374 criterion = ''
375 for i in range(len(columnlist)):
376 if type(valuelist[i])==StringType:
377 critval = '\'' + valuelist[i] + '\''
378 else:
379 critval = str(valuelist[i])
380 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
381 criterion = criterion[0:-3]
382 function = 'filter(lambda x: '+criterion+',listoflists)'
383 lines = eval(function)
384 return lines
385
386
387 -def linexor (listoflists,columnlist,valuelist):
388 """
389 Returns the rows of a list of lists where col (from columnlist) = val
390 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
391 One value is required for each column in columnlist. If only one value
392 exists for columnlist but multiple values appear in valuelist, the
393 valuelist values are all assumed to pertain to the same column.
394
395 Usage: linexor (listoflists,columnlist,valuelist)
396 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
397 """
398 if type(columnlist) not in [ListType,TupleType]:
399 columnlist = [columnlist]
400 if type(valuelist) not in [ListType,TupleType]:
401 valuelist = [valuelist]
402 criterion = ''
403 if len(columnlist) == 1 and len(valuelist) > 1:
404 columnlist = columnlist*len(valuelist)
405 for i in range(len(columnlist)):
406 if type(valuelist[i])==StringType:
407 critval = '\'' + valuelist[i] + '\''
408 else:
409 critval = str(valuelist[i])
410 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
411 criterion = criterion[0:-2]
412 function = 'filter(lambda x: '+criterion+',listoflists)'
413 lines = eval(function)
414 return lines
415
416
418 """
419 Returns a string composed of elements in inlist, with each element
420 separated by 'delimiter.' Used by function writedelimited. Use '\t'
421 for tab-delimiting.
422
423 Usage: linedelimited (inlist,delimiter)
424 """
425 outstr = ''
426 for item in inlist:
427 if type(item) <> StringType:
428 item = str(item)
429 outstr = outstr + item + delimiter
430 outstr = outstr[0:-1]
431 return outstr
432
433
435 """
436 Returns a string composed of elements in inlist, with each element
437 right-aligned in columns of (fixed) colsize.
438
439 Usage: lineincols (inlist,colsize) where colsize is an integer
440 """
441 outstr = ''
442 for item in inlist:
443 if type(item) <> StringType:
444 item = str(item)
445 size = len(item)
446 if size <= colsize:
447 for i in range(colsize-size):
448 outstr = outstr + ' '
449 outstr = outstr + item
450 else:
451 outstr = outstr + item[0:colsize+1]
452 return outstr
453
454
456 """
457 Returns a string composed of elements in inlist, with each element
458 right-aligned in a column of width specified by a sequence colsizes. The
459 length of colsizes must be greater than or equal to the number of columns
460 in inlist.
461
462 Usage: lineincustcols (inlist,colsizes)
463 Returns: formatted string created from inlist
464 """
465 outstr = ''
466 for i in range(len(inlist)):
467 if type(inlist[i]) <> StringType:
468 item = str(inlist[i])
469 else:
470 item = inlist[i]
471 size = len(item)
472 if size <= colsizes[i]:
473 for j in range(colsizes[i]-size):
474 outstr = outstr + ' '
475 outstr = outstr + item
476 else:
477 outstr = outstr + item[0:colsizes[i]+1]
478 return outstr
479
480
482 """
483 Converts a 1D list to a single long string for file output, using
484 the string.join function.
485
486 Usage: list2string (inlist,delimit=' ')
487 Returns: the string created from inlist
488 """
489 stringlist = map(makestr,inlist)
490 return string.join(stringlist,delimit)
491
492
494 """
495 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
496 want to use put() to write a 1D list one item per line in the file.
497
498 Usage: makelol(inlist)
499 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
500 """
501 x = []
502 for item in inlist:
503 x.append([item])
504 return x
505
506
508 if type(x) <> StringType:
509 x = str(x)
510 return x
511
512
514 """
515 Prints a list of lists in columns, customized by the max size of items
516 within the columns (max size of items in col, plus 'extra' number of spaces).
517 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
518 respectively.
519
520 Usage: printcc (lst,extra=2)
521 Returns: None
522 """
523 if type(lst[0]) not in [ListType,TupleType]:
524 lst = [lst]
525 rowstokill = []
526 list2print = copy.deepcopy(lst)
527 for i in range(len(lst)):
528 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
529 rowstokill = rowstokill + [i]
530 rowstokill.reverse()
531 for row in rowstokill:
532 del list2print[row]
533 maxsize = [0]*len(list2print[0])
534 for col in range(len(list2print[0])):
535 items = colex(list2print,col)
536 items = map(makestr,items)
537 maxsize[col] = max(map(len,items)) + extra
538 for row in lst:
539 if row == ['\n'] or row == '\n' or row == '' or row == ['']:
540 print
541 elif row == ['dashes'] or row == 'dashes':
542 dashes = [0]*len(maxsize)
543 for j in range(len(maxsize)):
544 dashes[j] = '-'*(maxsize[j]-2)
545 print lineincustcols(dashes,maxsize)
546 else:
547 print lineincustcols(row,maxsize)
548 return None
549
550
552 """
553 Prints a list of lists in columns of (fixed) colsize width, where
554 colsize is an integer.
555
556 Usage: printincols (listoflists,colsize)
557 Returns: None
558 """
559 for row in listoflists:
560 print lineincols(row,colsize)
561 return None
562
563
564 -def pl (listoflists):
565 """
566 Prints a list of lists, 1 list (row) at a time.
567
568 Usage: pl(listoflists)
569 Returns: None
570 """
571 for row in listoflists:
572 if row[-1] == '\n':
573 print row,
574 else:
575 print row
576 return None
577
578
580 """Alias for pl."""
581 pl(listoflists)
582 return
583
584
586 """
587 Replaces all occurrences of 'oldval' with 'newval', recursively.
588
589 Usage: replace (inlst,oldval,newval)
590 """
591 lst = inlst*1
592 for i in range(len(lst)):
593 if type(lst[i]) not in [ListType,TupleType]:
594 if lst[i]==oldval: lst[i]=newval
595 else:
596 lst[i] = replace(lst[i],oldval,newval)
597 return lst
598
599
600 -def recode (inlist,listmap,cols=None):
601 """
602 Changes the values in a list to a new set of values (useful when
603 you need to recode data from (e.g.) strings to numbers. cols defaults
604 to None (meaning all columns are recoded).
605
606 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
607 Returns: inlist with the appropriate values replaced with new ones
608 """
609 lst = copy.deepcopy(inlist)
610 if cols != None:
611 if type(cols) not in [ListType,TupleType]:
612 cols = [cols]
613 for col in cols:
614 for row in range(len(lst)):
615 try:
616 idx = colex(listmap,0).index(lst[row][col])
617 lst[row][col] = listmap[idx][1]
618 except ValueError:
619 pass
620 else:
621 for row in range(len(lst)):
622 for col in range(len(lst)):
623 try:
624 idx = colex(listmap,0).index(lst[row][col])
625 lst[row][col] = listmap[idx][1]
626 except ValueError:
627 pass
628 return lst
629
630
631 -def remap (listoflists,criterion):
632 """
633 Remaps values in a given column of a 2D list (listoflists). This requires
634 a criterion as a function of 'x' so that the result of the following is
635 returned ... map(lambda x: 'criterion',listoflists).
636
637 Usage: remap(listoflists,criterion) criterion=string
638 Returns: remapped version of listoflists
639 """
640 function = 'map(lambda x: '+criterion+',listoflists)'
641 lines = eval(function)
642 return lines
643
644
646 """
647 Goes through each element in a 1D or 2D inlist, and applies the following
648 function to all elements of FloatType ... round(element,digits).
649
650 Usage: roundlist(inlist,digits)
651 Returns: list with rounded floats
652 """
653 if type(inlist[0]) in [IntType, FloatType]:
654 inlist = [inlist]
655 l = inlist*1
656 for i in range(len(l)):
657 for j in range(len(l[i])):
658 if type(l[i][j])==FloatType:
659 l[i][j] = round(l[i][j],digits)
660 return l
661
662
663 -def sortby(listoflists,sortcols):
664 """
665 Sorts a list of lists on the column(s) specified in the sequence
666 sortcols.
667
668 Usage: sortby(listoflists,sortcols)
669 Returns: sorted list, unchanged column ordering
670 """
671 newlist = abut(colex(listoflists,sortcols),listoflists)
672 newlist.sort()
673 try:
674 numcols = len(sortcols)
675 except TypeError:
676 numcols = 1
677 crit = '[' + str(numcols) + ':]'
678 newlist = colex(newlist,crit)
679 return newlist
680
681
683 """
684 Returns all unique items in the passed list. If the a list-of-lists
685 is passed, unique LISTS are found (i.e., items in the first dimension are
686 compared).
687
688 Usage: unique (inlist)
689 Returns: the unique elements (or rows) in inlist
690 """
691 uniques = []
692 for item in inlist:
693 if item not in uniques:
694 uniques.append(item)
695 return uniques
696
698 """
699 Returns duplicate items in the FIRST dimension of the passed list.
700
701 Usage: duplicates (inlist)
702 """
703 dups = []
704 for i in range(len(inlist)):
705 if inlist[i] in inlist[i+1:]:
706 dups.append(inlist[i])
707 return dups
708
709
711 """
712 Returns items that are NOT duplicated in the first dim of the passed list.
713
714 Usage: nonrepeats (inlist)
715 """
716 nonrepeats = []
717 for i in range(len(inlist)):
718 if inlist.count(inlist[i]) == 1:
719 nonrepeats.append(inlist[i])
720 return nonrepeats
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740 try:
741 import numpy as N
742
743 - def aabut (source, *args):
744 """
745 Like the |Stat abut command. It concatenates two arrays column-wise
746 and returns the result. CAUTION: If one array is shorter, it will be
747 repeated until it is as long as the other.
748
749 Usage: aabut (source, args) where args=any # of arrays
750 Returns: an array as long as the LONGEST array past, source appearing on the
751 'left', arrays in <args> attached on the 'right'.
752 """
753 if len(source.shape)==1:
754 width = 1
755 source = N.resize(source,[source.shape[0],width])
756 else:
757 width = source.shape[1]
758 for addon in args:
759 if len(addon.shape)==1:
760 width = 1
761 addon = N.resize(addon,[source.shape[0],width])
762 else:
763 width = source.shape[1]
764 if len(addon) < len(source):
765 addon = N.resize(addon,[source.shape[0],addon.shape[1]])
766 elif len(source) < len(addon):
767 source = N.resize(source,[addon.shape[0],source.shape[1]])
768 source = N.concatenate((source,addon),1)
769 return source
770
771
772 - def acolex (a,indices,axis=1):
773 """
774 Extracts specified indices (a list) from passed array, along passed
775 axis (column extraction is default). BEWARE: A 1D array is presumed to be a
776 column-array (and that the whole array will be returned as a column).
777
778 Usage: acolex (a,indices,axis=1)
779 Returns: the columns of a specified by indices
780 """
781 if type(indices) not in [ListType,TupleType,N.ndarray]:
782 indices = [indices]
783 if len(N.shape(a)) == 1:
784 cols = N.resize(a,[a.shape[0],1])
785 else:
786 cols = N.take(a,indices,axis)
787 return cols
788
789
790 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
791 """
792 Averages data in collapsecol, keeping all unique items in keepcols
793 (using unique, which keeps unique LISTS of column numbers), retaining
794 the unique sets of values in keepcols, the mean for each. If stderror or
795 N of the mean are desired, set either or both parameters to 1.
796
797 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
798 Returns: unique 'conditions' specified by the contents of columns specified
799 by keepcols, abutted with the mean(s) of column(s) specified by
800 collapsecols
801 """
802 def acollmean (inarray):
803 return N.sum(N.ravel(inarray))
804
805 if type(keepcols) not in [ListType,TupleType,N.ndarray]:
806 keepcols = [keepcols]
807 if type(collapsecols) not in [ListType,TupleType,N.ndarray]:
808 collapsecols = [collapsecols]
809
810 if cfcn == None:
811 cfcn = acollmean
812 if keepcols == []:
813 avgcol = acolex(a,collapsecols)
814 means = N.sum(avgcol)/float(len(avgcol))
815 if fcn1<>None:
816 try:
817 test = fcn1(avgcol)
818 except:
819 test = N.array(['N/A']*len(means))
820 means = aabut(means,test)
821 if fcn2<>None:
822 try:
823 test = fcn2(avgcol)
824 except:
825 test = N.array(['N/A']*len(means))
826 means = aabut(means,test)
827 return means
828 else:
829 if type(keepcols) not in [ListType,TupleType,N.ndarray]:
830 keepcols = [keepcols]
831 values = colex(a,keepcols)
832 uniques = unique(values)
833 uniques.sort()
834 newlist = []
835 for item in uniques:
836 if type(item) not in [ListType,TupleType,N.ndarray]:
837 item =[item]
838 tmprows = alinexand(a,keepcols,item)
839 for col in collapsecols:
840 avgcol = acolex(tmprows,col)
841 item.append(acollmean(avgcol))
842 if fcn1<>None:
843 try:
844 test = fcn1(avgcol)
845 except:
846 test = 'N/A'
847 item.append(test)
848 if fcn2<>None:
849 try:
850 test = fcn2(avgcol)
851 except:
852 test = 'N/A'
853 item.append(test)
854 newlist.append(item)
855 try:
856 new_a = N.array(newlist)
857 except TypeError:
858 new_a = N.array(newlist,'O')
859 return new_a
860
861
862 - def adm (a,criterion):
863 """
864 Returns rows from the passed list of lists that meet the criteria in
865 the passed criterion expression (a string as a function of x).
866
867 Usage: adm (a,criterion) where criterion is like 'x[2]==37'
868 """
869 function = 'filter(lambda x: '+criterion+',a)'
870 lines = eval(function)
871 try:
872 lines = N.array(lines)
873 except:
874 lines = N.array(lines,dtype='O')
875 return lines
876
877
879 if type(x)==StringType:
880 return 1
881 else:
882 return 0
883
884
886 """
887 Returns the rows of an array where col (from columnlist) = val
888 (from valuelist). One value is required for each column in columnlist.
889
890 Usage: alinexand (a,columnlist,valuelist)
891 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
892 """
893 if type(columnlist) not in [ListType,TupleType,N.ndarray]:
894 columnlist = [columnlist]
895 if type(valuelist) not in [ListType,TupleType,N.ndarray]:
896 valuelist = [valuelist]
897 criterion = ''
898 for i in range(len(columnlist)):
899 if type(valuelist[i])==StringType:
900 critval = '\'' + valuelist[i] + '\''
901 else:
902 critval = str(valuelist[i])
903 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
904 criterion = criterion[0:-3]
905 return adm(a,criterion)
906
907
909 """
910 Returns the rows of an array where col (from columnlist) = val (from
911 valuelist). One value is required for each column in columnlist.
912 The exception is if either columnlist or valuelist has only 1 value,
913 in which case that item will be expanded to match the length of the
914 other list.
915
916 Usage: alinexor (a,columnlist,valuelist)
917 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
918 """
919 if type(columnlist) not in [ListType,TupleType,N.ndarray]:
920 columnlist = [columnlist]
921 if type(valuelist) not in [ListType,TupleType,N.ndarray]:
922 valuelist = [valuelist]
923 criterion = ''
924 if len(columnlist) == 1 and len(valuelist) > 1:
925 columnlist = columnlist*len(valuelist)
926 elif len(valuelist) == 1 and len(columnlist) > 1:
927 valuelist = valuelist*len(columnlist)
928 for i in range(len(columnlist)):
929 if type(valuelist[i])==StringType:
930 critval = '\'' + valuelist[i] + '\''
931 else:
932 critval = str(valuelist[i])
933 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
934 criterion = criterion[0:-2]
935 return adm(a,criterion)
936
937
939 """
940 Replaces all occurrences of oldval with newval in array a.
941
942 Usage: areplace(a,oldval,newval)
943 """
944 return N.where(a==oldval,newval,a)
945
946
947 - def arecode (a,listmap,col='all'):
948 """
949 Remaps the values in an array to a new set of values (useful when
950 you need to recode data from (e.g.) strings to numbers as most stats
951 packages require. Can work on SINGLE columns, or 'all' columns at once.
952 @@@BROKEN 2007-11-26
953
954 Usage: arecode (a,listmap,col='all')
955 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
956 """
957 ashape = a.shape
958 if col == 'all':
959 work = a.ravel()
960 else:
961 work = acolex(a,col)
962 work = work.ravel()
963 for pair in listmap:
964 if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O':
965 work = N.array(work,dtype='O')
966 a = N.array(a,dtype='O')
967 for i in range(len(work)):
968 if work[i]==pair[0]:
969 work[i] = pair[1]
970 if col == 'all':
971 return N.reshape(work,ashape)
972 else:
973 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
974 else:
975 work = N.where(work==pair[0],pair[1],work)
976 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
977
978
980 """
981 Compares two rows from an array, regardless of whether it is an
982 array of numbers or of python objects (which requires the cmp function).
983 @@@PURPOSE? 2007-11-26
984
985 Usage: arowcompare(row1,row2)
986 Returns: an array of equal length containing 1s where the two rows had
987 identical elements and 0 otherwise
988 """
989 return
990 if row1.dtype.char=='O' or row2.dtype=='O':
991 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2))))
992 else:
993 cmpvect = N.equal(row1,row2)
994 return cmpvect
995
996
998 """
999 Compares two rows from an array, regardless of whether it is an
1000 array of numbers or of python objects (which requires the cmp function).
1001
1002 Usage: arowsame(row1,row2)
1003 Returns: 1 if the two rows are identical, 0 otherwise.
1004 """
1005 cmpval = N.alltrue(arowcompare(row1,row2))
1006 return cmpval
1007
1008
1010 """
1011 Sorts an array "by rows". This differs from the Numeric.sort() function,
1012 which sorts elements WITHIN the given axis. Instead, this function keeps
1013 the elements along the given axis intact, but shifts them 'up or down'
1014 relative to one another.
1015
1016 Usage: asortrows(a,axis=0)
1017 Returns: sorted version of a
1018 """
1019 return N.sort(a,axis=axis,kind='mergesort')
1020
1021
1023 """
1024 Returns unique items in the FIRST dimension of the passed array. Only
1025 works on arrays NOT including string items.
1026
1027 Usage: aunique (inarray)
1028 """
1029 uniques = N.array([inarray[0]])
1030 if len(uniques.shape) == 1:
1031 for item in inarray[1:]:
1032 if N.add.reduce(N.equal(uniques,item).ravel()) == 0:
1033 try:
1034 uniques = N.concatenate([uniques,N.array[N.newaxis,:]])
1035 except TypeError:
1036 uniques = N.concatenate([uniques,N.array([item])])
1037 else:
1038 if inarray.dtype.char != 'O':
1039 for item in inarray[1:]:
1040 if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1041 try:
1042 uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
1043 except TypeError:
1044 uniques = N.concatenate([uniques,N.array([item])])
1045 else:
1046 pass
1047 else:
1048 for item in inarray[1:]:
1049 newflag = 1
1050 for unq in uniques:
1051 test = N.sum(abs(N.array(map(cmp,item,unq))))
1052 if test == 0:
1053 newflag = 0
1054 break
1055 if newflag == 1:
1056 try:
1057 uniques = N.concatenate( [uniques,item[N.newaxis,:]] )
1058 except TypeError:
1059 uniques = N.concatenate([uniques,N.array([item])])
1060 return uniques
1061
1062
1064 """
1065 Returns duplicate items in the FIRST dimension of the passed array. Only
1066 works on arrays NOT including string items.
1067
1068 Usage: aunique (inarray)
1069 """
1070 inarray = N.array(inarray)
1071 if len(inarray.shape) == 1:
1072 dups = []
1073 inarray = inarray.tolist()
1074 for i in range(len(inarray)):
1075 if inarray[i] in inarray[i+1:]:
1076 dups.append(inarray[i])
1077 dups = aunique(dups)
1078 else:
1079 dups = []
1080 aslist = inarray.tolist()
1081 for i in range(len(aslist)):
1082 if aslist[i] in aslist[i+1:]:
1083 dups.append(aslist[i])
1084 dups = unique(dups)
1085 dups = N.array(dups)
1086 return dups
1087
1088 except ImportError:
1089 pass
1090