Module littletable
[frames] | no frames]

Source Code for Module littletable

   1  # 
   2  # 
   3  # littletable.py 
   4  #  
   5  # littletable is a simple in-memory database for ad-hoc or user-defined objects, 
   6  # supporting simple query and join operations - useful for ORM-like access 
   7  # to a collection of data objects, without dealing with SQL 
   8  # 
   9  # 
  10  # Copyright (c) 2010-2015  Paul T. McGuire 
  11  # 
  12  # Permission is hereby granted, free of charge, to any person obtaining 
  13  # a copy of this software and associated documentation files (the 
  14  # "Software"), to deal in the Software without restriction, including 
  15  # without limitation the rights to use, copy, modify, merge, publish, 
  16  # distribute, sublicense, and/or sell copies of the Software, and to 
  17  # permit persons to whom the Software is furnished to do so, subject to 
  18  # the following conditions: 
  19  # 
  20  # The above copyright notice and this permission notice shall be 
  21  # included in all copies or substantial portions of the Software. 
  22  # 
  23  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  24  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  25  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  26  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  27  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  28  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  29  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  30  # 
  31   
  32  __doc__ = """\ 
  33   
  34  C{littletable} - a Python module to give ORM-like access to a collection of objects 
  35   
  36  The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a  
  37  collection of user objects.  C{littletable} provides a L{DataObject} class for ad hoc creation 
  38  of semi-immutable objects that can be stored in a C{littletable} L{Table}. 
  39   
  40  In addition to basic ORM-style insert/remove/query/delete access to the contents of a  
  41  Table, C{littletable} offers: 
  42   - simple indexing for improved retrieval performance, and optional enforcing key uniqueness 
  43   - access to objects using indexed attributes 
  44   - simplified joins using '+' operator syntax between annotated Tables 
  45   - the result of any query or join is a new first-class C{littletable} Table 
  46   
  47  C{littletable} Tables do not require an upfront schema definition, but simply work off of the 
  48  attributes in the stored values, and those referenced in any query parameters. 
  49   
  50  Here is a simple C{littletable} data storage/retrieval example:: 
  51   
  52      from littletable import Table, DataObject 
  53   
  54      customers = Table('customers') 
  55      customers.create_index("id", unique=True) 
  56      customers.insert(DataObject(id="0010", name="George Jetson")) 
  57      customers.insert(DataObject(id="0020", name="Wile E. Coyote")) 
  58      customers.insert(DataObject(id="0030", name="Jonny Quest")) 
  59   
  60      catalog = Table('catalog') 
  61      catalog.create_index("sku", unique=True) 
  62      catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100)) 
  63      catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3)) 
  64      catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8)) 
  65      catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12)) 
  66   
  67      wishitems = Table('wishitems') 
  68      wishitems.create_index("custid") 
  69      wishitems.create_index("sku") 
  70      wishitems.insert(DataObject(custid="0020", sku="ANVIL-001")) 
  71      wishitems.insert(DataObject(custid="0020", sku="BRDSD-001")) 
  72      wishitems.insert(DataObject(custid="0020", sku="MAGNT-001")) 
  73      wishitems.insert(DataObject(custid="0030", sku="MAGNT-001")) 
  74      wishitems.insert(DataObject(custid="0030", sku="MAGLS-001")) 
  75   
  76      # print a particular customer name  
  77      # (unique indexes will return a single item; non-unique 
  78      # indexes will return a list of all matching items) 
  79      print customers.id["0030"].name 
  80   
  81      # print all items sold by the pound 
  82      for item in catalog.query(unitofmeas="LB"): 
  83          print item.sku, item.descr 
  84   
  85      # print all items that cost more than 10 
  86      for item in catalog.where(lambda o : o.unitprice>10): 
  87          print item.sku, item.descr, item.unitprice 
  88   
  89      # join tables to create queryable wishlists collection 
  90      wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku") 
  91   
  92      # print all wishlist items with price > 10 
  93      bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10) 
  94      for item in bigticketitems: 
  95          print item 
  96   
  97      # list all wishlist items in descending order by price 
  98      for item in wishlists().sort("unitprice desc"): 
  99          print item 
 100  """ 
 101   
 102  __version__ = "0.8" 
 103  __versionTime__ = "5 Dec 2015 20:03" 
 104  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
 105   
 106  import sys 
 107  from collections import defaultdict, deque, namedtuple 
 108  from itertools import groupby,ifilter,islice,starmap,repeat 
 109  from operator import attrgetter 
 110  import csv 
 111  import json 
 112  _consumer = deque(maxlen=0) 
 113  do_all = _consumer.extend 
 114   
 115  try: 
 116      from itertools import product 
 117  except ImportError: 
118 - def product(*seqs):
119 tupleseqs = [[(x,) for x in s] for s in seqs] 120 def _product(*seqs): 121 if len(seqs) == 1: 122 for x in seqs[0]: 123 yield x 124 else: 125 for x in seqs[0]: 126 for p in _product(*seqs[1:]): 127 yield x+p
128 for p in _product(*tupleseqs): 129 yield p 130 131 try: 132 t = basestring 133 except NameError: 134 basestring = str 135 136 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"] 137
138 -def _object_attrnames(obj):
139 if hasattr(obj, "__dict__"): 140 # normal object 141 return obj.__dict__.keys() 142 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 143 # namedtuple 144 return obj._fields 145 elif hasattr(obj, "__slots__"): 146 return obj.__slots__ 147 else: 148 raise ValueError("object with unknown attributes")
149
150 -def _to_json(obj):
151 if hasattr(obj, "__dict__"): 152 # normal object 153 return json.dumps(obj.__dict__) 154 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 155 # namedtuple 156 return json.dumps(dict(zip(obj._fields, obj))) 157 elif hasattr(obj, "__slots__"): 158 return json.dumps({k:v for k,v in zip(obj.__slots__, 159 (getattr(obj,a) for a in obj.__slots__))}) 160 else: 161 raise ValueError("object with unknown attributes")
162
163 -class DataObject(object):
164 """A generic semi-mutable object for storing data values in a table. Attributes 165 can be set by passing in named arguments in the constructor, or by setting them 166 as C{object.attribute = value}. New attributes can be added any time, but updates 167 are ignored. Table joins are returned as a Table of DataObjects."""
168 - def __init__(self, **kwargs):
169 if kwargs: 170 self.__dict__.update(kwargs)
171 - def __repr__(self):
172 return repr(self.__dict__)
173 - def __setattr__(self, attr, val):
174 # make all attributes write-once 175 if attr not in self.__dict__: 176 super(DataObject,self).__setattr__(attr,val)
177 - def __getitem__(self, k):
178 if hasattr(self,k): 179 return getattr(self,k) 180 else: 181 raise KeyError("object has no such attribute " + k)
182 - def __eq__(self, other):
183 return self.__dict__ == other.__dict__
184
185 -class _ObjIndex(object):
186 - def __init__(self, attr):
187 self.attr = attr 188 self.obs = defaultdict(list) 189 self.is_unique = False
190 - def __setitem__(self, k, v):
191 self.obs[k].append(v)
192 - def __getitem__(self, k):
193 return self.obs.get(k,[])
194 - def __len__(self):
195 return len(self.obs)
196 - def __iter__(self):
197 return iter(self.obs)
198 - def keys(self):
199 return sorted(self.obs.keys())
200 - def items(self):
201 return self.obs.items()
202 - def remove(self, obj):
203 try: 204 k = getattr(obj, self.attr) 205 self.obs[k].remove(obj) 206 except (ValueError,AttributeError,KeyError): 207 pass
208 - def __contains__(self, key):
209 return key in self.obs
210 - def copy_template(self):
211 return self.__class__(self.attr)
212
213 -class _UniqueObjIndex(_ObjIndex):
214 - def __init__(self, attr, accept_none=False):
215 self.attr = attr 216 self.obs = {} 217 self.is_unique = True 218 self.accept_none = accept_none 219 self.none_values = set()
220 - def __setitem__(self, k, v):
221 if k: 222 if k not in self.obs: 223 self.obs[k] = v 224 else: 225 raise KeyError("duplicate key value %s" % k) 226 else: 227 self.none_values.add(v)
228 - def __getitem__(self, k):
229 if k: 230 return [self.obs.get(k)] if k in self.obs else [] 231 else: 232 return list(self.none_values)
233 - def __contains__(self, k):
234 if k: 235 return k in self.obs 236 else: 237 return self.accept_none and self.none_values
238 - def keys(self):
239 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
240 - def items(self):
241 return [(k,[v]) for k,v in self.obs.items()]
242 - def remove(self, obj):
243 k = getattr(obj, self.attr) 244 if k: 245 if k in self.obs: 246 del self.obs[k] 247 else: 248 self.none_values.discard(obj)
249
250 -class _ObjIndexWrapper(object):
251 - def __init__(self, ind):
252 self._index = ind
253 - def __getattr__(self, attr):
254 return getattr(self._index, attr)
255 - def __getitem__(self, k):
256 ret = Table() 257 if k in self._index: 258 ret.insert_many(self._index[k]) 259 return ret
260 - def __contains__(self, k):
261 return k in self._index
262
263 -class _UniqueObjIndexWrapper(object):
264 - def __init__(self, ind):
265 self._index = ind
266 - def __getattr__(self, attr):
267 return getattr(self._index, attr)
268 - def __contains__(self, k):
269 return k in self._index
270 - def __getitem__(self, k):
271 if k: 272 return self._index[k][0] 273 else: 274 ret = Table() 275 if k in self._index: 276 ret.insert_many(self._index[k]) 277 return ret
278
279 -class _IndexAccessor(object):
280 - def __init__(self, table):
281 self.table = table
282
283 - def __getattr__(self, attr):
284 """A quick way to query for matching records using their indexed attributes. The attribute 285 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 286 dict-like access to the underlying records in the table, as in:: 287 288 employees.socsecnum["000-00-0000"] 289 customers.zipcode["12345"] 290 291 The behavior differs slightly for unique and non-unique indexes: 292 - if the index is unique, then retrieving a matching object, will return just the object; 293 if there is no matching object, C{KeyError} is raised (making a table with a unique 294 index behave very much like a Python dict) 295 - if the index is non-unique, then all matching objects will be returned in a new Table, 296 just as if a regular query had been performed; if no objects match the key value, an empty 297 Table is returned and no exception is raised. 298 299 If there is no index defined for the given attribute, then C{AttributeError} is raised. 300 """ 301 if attr in self.table._indexes: 302 ret = self.table._indexes[attr] 303 if isinstance(ret, _UniqueObjIndex): 304 ret = _UniqueObjIndexWrapper(ret) 305 if isinstance(ret, _ObjIndex): 306 ret = _ObjIndexWrapper(ret) 307 return ret 308 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
309 310
311 -class Table(object):
312 """Table is the main class in C{littletable}, for representing a collection of DataObjects or 313 user-defined objects with publicly accessible attributes or properties. Tables can be: 314 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>} 315 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index} 316 - queried, specifying values to exact match in the desired records, see L{where} 317 - filtered (using L{where}), using a simple predicate function to match desired records; 318 useful for selecting using inequalities or compound conditions 319 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__} 320 - joined, using L{join_on} to identify attribute to be used for joining with another table, and 321 L{join} or operator '+' to perform the actual join 322 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects 323 by attribute values 324 - grouped, using L{groupby} to create a summary table of computed values, grouped by a key 325 attribute 326 - L{imported<csv_import>}/L{exported<csv_export>} to CSV-format files 327 Queries and joins return their results as new Table objects, so that queries and joins can 328 be easily performed as a succession of operations. 329 """
330 - def __init__(self, table_name=''):
331 """Create a new, empty Table. 332 @param table_name: name for Table 333 @type table_name: string (optional) 334 """ 335 self(table_name) 336 self.obs = [] 337 self._indexes = {} 338 self._uniqueIndexes = [] 339 self.by = _IndexAccessor(self)
340
341 - def __len__(self):
342 """Return the number of objects in the Table.""" 343 return len(self.obs)
344
345 - def __iter__(self):
346 """Create an iterator over the objects in the Table.""" 347 return iter(self.obs)
348
349 - def __getitem__(self, i):
350 """Provides direct indexed/sliced access to the Table's underlying list of objects.""" 351 if isinstance(i, slice): 352 ret = self.copy_template() 353 ret.insert_many(self.obs[i]) 354 return ret 355 else: 356 return self.obs[i]
357
358 - def __getattr__(self, attr):
359 """A quick way to query for matching records using their indexed attributes. The attribute 360 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 361 dict-like access to the underlying records in the table, as in:: 362 363 employees.socsecnum["000-00-0000"] 364 customers.zipcode["12345"] 365 366 The behavior differs slightly for unique and non-unique indexes: 367 - if the index is unique, then retrieving a matching object, will return just the object; 368 if there is no matching object, C{KeyError} is raised (making a table with a unique 369 index behave very much like a Python dict) 370 - if the index is non-unique, then all matching objects will be returned in a new Table, 371 just as if a regular query had been performed; if no objects match the key value, an empty 372 Table is returned and no exception is raised. 373 374 If there is no index defined for the given attribute, then C{AttributeError} is raised. 375 """ 376 if attr in self._indexes: 377 ret = self._indexes[attr] 378 if isinstance(ret, _UniqueObjIndex): 379 ret = _UniqueObjIndexWrapper(ret) 380 if isinstance(ret, _ObjIndex): 381 ret = _ObjIndexWrapper(ret) 382 return ret 383 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
384
385 - def __bool__(self):
386 return bool(self.obs)
387 388 __nonzero__ = __bool__ 389
390 - def __add__(self, other):
391 """Support UNION of 2 tables using "+" operator.""" 392 if isinstance(other, JoinTerm): 393 # special case if added to a JoinTerm, do join, not union 394 return other + self 395 elif isinstance(other, Table): 396 # if other is another Table, just union them 397 return self.union(other) 398 else: 399 # assume other is a sequence of some sort, insert all elements 400 return self.clone().insert_many(other)
401
402 - def __iadd__(self, other):
403 """Support UNION of 2 tables using "+=" operator.""" 404 return self.insert_many(other)
405
406 - def union(self, other):
407 return self.clone().insert_many(other.obs)
408
409 - def __call__(self, table_name):
410 """A simple way to assign a name to a table, such as those 411 dynamically created by joins and queries. 412 @param table_name: name for Table 413 @type table_name: string 414 """ 415 self.table_name = table_name 416 return self
417
418 - def copy_template(self, name=None):
419 """Create empty copy of the current table, with copies of all 420 index definitions. 421 """ 422 ret = Table(self.table_name) 423 #~ for k,v in self._indexes.items(): 424 #~ ret._indexes[k] = v.copy_template() 425 ret._indexes.update(dict((k,v.copy_template()) for k,v in self._indexes.items())) 426 if name is not None: 427 ret(name) 428 return ret
429
430 - def clone(self, name=None):
431 """Create full copy of the current table, including table contents 432 and index definitions. 433 """ 434 ret = self.copy_template() 435 ret.insert_many(self.obs) 436 if name is not None: 437 ret(name) 438 return ret
439
440 - def create_index(self, attr, unique=False, accept_none=False):
441 """Create a new index on a given attribute. 442 If C{unique} is True and records are found in the table with duplicate 443 attribute values, the index is deleted and C{KeyError} is raised. 444 445 If the table already has an index on the given attribute, then no 446 action is taken and no exception is raised. 447 @param attr: the attribute to be used for indexed access and joins 448 @type attr: string 449 @param unique: flag indicating whether the indexed field values are 450 expected to be unique across table entries 451 @type unique: boolean 452 @param accept_none: flag indicating whether None is an acceptable 453 unique key value for this attribute 454 @type accept_none: boolean 455 """ 456 if attr in self._indexes: 457 return self 458 459 if unique: 460 self._indexes[attr] = _UniqueObjIndex(attr,accept_none) 461 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 462 else: 463 self._indexes[attr] = _ObjIndex(attr) 464 accept_none = True 465 ind = self._indexes[attr] 466 try: 467 for obj in self.obs: 468 if hasattr(obj, attr): 469 obval = getattr(obj, attr) or None 470 else: 471 obval = None 472 if obval or accept_none: 473 ind[obval] = obj 474 else: 475 raise KeyError("None is not an allowed key") 476 return self 477 478 except KeyError: 479 del self._indexes[attr] 480 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 481 raise
482
483 - def delete_index(self, attr):
484 """Deletes an index from the Table. Can be used to drop and rebuild an index, 485 or to convert a non-unique index to a unique index, or vice versa. 486 @param attr: name of an indexed attribute 487 @type attr: string 488 """ 489 if attr in self._indexes: 490 del self._indexes[attr] 491 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
492
493 - def insert(self, obj):
494 """Insert a new object into this Table. 495 @param obj: any Python object 496 Objects can be constructed using the defined DataObject type, or they can 497 be any Python object that does not use the Python C{__slots__} feature; C{littletable} 498 introspect's the object's C{__dict__} or C{_fields} attributes to obtain join and 499 index attributes and values. 500 501 If the table contains a unique index, and the record to be inserted would add 502 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the 503 object is not inserted. 504 505 If the table has no unique indexes, then it is possible to insert duplicate 506 objects into the table. 507 """ 508 509 # verify new object doesn't duplicate any existing unique index values 510 uniqueIndexes = self._uniqueIndexes #[ind for ind in self._indexes.values() if ind.is_unique] 511 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none) 512 or ( 513 hasattr(obj, ind.attr) and getattr(obj, ind.attr) in ind 514 ) 515 for ind in uniqueIndexes): 516 # had a problem, find which one 517 for ind in uniqueIndexes: 518 if (getattr(obj, ind.attr, None) is None and not ind.accept_none): 519 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj) 520 if getattr(obj, ind.attr) in ind: 521 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj) 522 523 self.obs.append(obj) 524 for attr, ind in self._indexes.items(): 525 obval = getattr(obj, attr) 526 ind[obval] = obj 527 return self
528
529 - def insert_many(self, it):
530 """Inserts a collection of objects into the table.""" 531 #~ for ob in it: 532 #~ self.insert(ob) 533 do_all(self.insert(ob) for ob in it) 534 return self
535
536 - def remove(self, ob):
537 """Removes an object from the table. If object is not in the table, then 538 no action is taken and no exception is raised.""" 539 # remove from indexes 540 #~ for attr,ind in self._indexes.items(): 541 #~ ind.remove(ob) 542 do_all(ind.remove(ob) for attr,ind in self._indexes.items()) 543 544 # remove from main object list 545 self.obs.remove(ob)
546
547 - def remove_many(self, it):
548 """Removes a collection of objects from the table.""" 549 #~ for ob in it: 550 #~ self.remove(ob) 551 do_all(self.remove(ob) for ob in it)
552
553 - def _query_attr_sort_fn(self, attr_val):
554 attr,v = attr_val 555 if attr in self._indexes: 556 idx = self._indexes[attr] 557 if v in idx: 558 return len(idx[v]) 559 else: 560 return 0 561 else: 562 return 1e9
563
564 - def where(self, *args, **kwargs):
565 """ 566 Retrieves matching objects from the table, based on given 567 named parameters. If multiple named parameters are given, then 568 only objects that satisfy all of the query criteria will be returned. 569 570 Special named args: 571 - C{_orderby="attr,..."} - resulting table should sort content objects 572 by the C{attr}s given in a comma-separated string; to sort in 573 descending order, reference the attribute as C{attr desc}. 574 575 - C{_limit} - maximum number of records to return 576 577 @param wherefn: a method or lambda that returns a boolean result, as in:: 578 579 lambda ob : ob.unitprice > 10 580 581 @type wherefn: callable(object) returning boolean 582 583 @param kwargs: attributes for selecting records, given as additional 584 named arguments of the form C{attrname="attrvalue"}. 585 586 @return: a new Table containing the matching objects 587 """ 588 # extract meta keys 589 flags = dict((k,v) for k,v in kwargs.items() if k.startswith("_")) 590 for f in flags: 591 del kwargs[f] 592 593 if kwargs: 594 # order query criteria in ascending order of number of matching items 595 # for each individual given attribute; this will minimize the number 596 # of filtering records that each subsequent attribute will have to 597 # handle 598 kwargs = kwargs.items() 599 if len(kwargs) > 1 and len(self.obs) > 100: 600 kwargs = sorted(kwargs, key=self._query_attr_sort_fn) 601 602 ret = self 603 for k,v in kwargs: 604 newret = ret.copy_template() 605 if k in ret._indexes: 606 newret.insert_many(ret._indexes[k][v]) 607 else: 608 newret.insert_many( r for r in ret.obs 609 if hasattr(r,k) and getattr(r,k) == v ) 610 ret = newret 611 else: 612 ret = self.clone() 613 614 # apply flags 615 # sort before clip 616 if flags: 617 if '_orderby' in flags: 618 ret.sort(flags['_orderby']) 619 if '_limit' in flags: 620 del ret.obs[flags['_limit']:] 621 622 if args: 623 wherefn = args[0] 624 newret = ret.copy_template() 625 newret.insert_many(ifilter(wherefn, ret.obs)) 626 ret = newret 627 628 return ret
629
630 - def delete(self, **kwargs):
631 """Deletes matching objects from the table, based on given 632 named parameters. If multiple named parameters are given, then 633 only objects that satisfy all of the query criteria will be removed. 634 @param kwargs: attributes for selecting records, given as additional 635 named arguments of the form C{attrname="attrvalue"}. 636 @return: the number of objects removed from the table 637 """ 638 if not kwargs: 639 return 0 640 641 affected = self.where(**kwargs) 642 self.remove_many(affected) 643 return len(affected)
644
645 - def sort(self, key, reverse=False):
646 """Sort Table in place, using given fields as sort key. 647 @param key: if this is a string, it is a comma-separated list of field names, 648 optionally followed by 'desc' to indicate descending sort instead of the 649 default ascending sort; if a list or tuple, it is a list or tuple of field names 650 or field names with ' desc' appended; if it is a function, then it is the 651 function to be used as the sort key function 652 @return: self 653 """ 654 if isinstance(key, (basestring,list,tuple)): 655 if isinstance(key, basestring): 656 attrdefs = [s.strip() for s in key.split(',')] 657 # leftmost attr is the most primary sort key, so do succession of 658 # sorts from right to left 659 attr_orders = [(a.split()+['asc',])[:2] for a in attrdefs][::-1] 660 else: 661 # attr definitions were already resolved to a sequence by the caller 662 attr_orders = key 663 attrs = [attr for attr,order in attr_orders] 664 665 #special optimization if all orders are ascending or descending 666 if all(order=='asc' for attr,order in attr_orders): 667 self.obs.sort(key=attrgetter(*attrs), reverse=reverse) 668 elif all(order=='desc' for attr,order in attr_orders): 669 self.obs.sort(key=attrgetter(*attrs), reverse=not reverse) 670 else: 671 # mix of ascending and descending sorts, have to do succession of sorts 672 #~ for attr,order in attr_orders: 673 #~ self.obs.sort(key=lambda ob:getattr(ob,attr), reverse=(order=="desc")) 674 do_all(self.obs.sort(key=attrgetter(attr), reverse=(order=="desc")) 675 for attr,order in attr_orders) 676 else: 677 keyfn = key 678 self.obs.sort(key=keyfn, reverse=reverse) 679 return self
680
681 - def select(self, fields, **exprs):
682 """ 683 Create a new table containing a subset of attributes, with optionally 684 newly-added fields computed from each rec in the original table. 685 686 Special kwargs: 687 - C{_unique=True} - only return a set of unique rows 688 689 @param fields: list of strings, or single space-delimited string, listing attribute name to be included in the output 690 @type fields: list, or space-delimited string 691 @param exprs: one or more named callable arguments, to compute additional fields using the given function 692 @type exprs: C{name=callable}, callable takes the record as an argument, and returns the new attribute value 693 If a string is passed as a callable, this string will be used using string formatting, given the record 694 as a source of interpolation values. For instance, C{fullName = '%(lastName)s, %(firstName)s'} 695 696 """ 697 if isinstance(fields, basestring): 698 fields = fields.split() 699 700 unique = exprs.pop('_unique', False) 701 702 def _makeStringCallable(expr): 703 if isinstance(expr,basestring): 704 return lambda rec: expr % rec 705 else: 706 return expr
707 708 exprs = dict((k, _makeStringCallable(v)) for k,v in exprs) 709 710 raw_tuples = [] 711 for rec in self.obs: 712 attrvalues = tuple(getattr(rec, fieldname, None) for fieldname in fields) 713 if exprs: 714 attrvalues += tuple(expr(rec) for expr in exprs.values()) 715 raw_tuples.append(attrvalues) 716 717 if unique: 718 raw_tuples = list(set(raw_tuples)) 719 720 allNames = tuple(fields) + tuple(exprs.keys()) 721 return Table().insert_many(DataObject(**dict(zip(allNames, outtuple))) for outtuple in raw_tuples)
722
723 - def format(self, *fields, **exprs):
724 """ 725 Create a new table with all string formatted attribute values, typically in preparation for 726 formatted output. 727 @param fields: one or more strings, each string is an attribute name to be included in the output 728 @type fields: string (multiple) 729 @param exprs: one or more named string arguments, to format the given attribute with a formatting string 730 @type exprs: name=string 731 """ 732 #~ select_exprs = {} 733 #~ for f in fields: 734 #~ select_exprs[f] = lambda r : str(getattr,f,None) 735 fields = set(fields) 736 select_exprs = dict((f, lambda r,f=f : str(getattr,f,None)) for f in fields) 737 738 for ename,expr in exprs.items(): 739 if isinstance(expr, basestring): 740 if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', expr): 741 select_exprs[ename] = lambda r : str(getattr(r, expr, None)) 742 else: 743 if "{}" in expr or "{0}" in expr: 744 select_exprs[ename] = lambda r : expr.format(r) 745 else: 746 select_exprs[ename] = lambda r : expr % getattr(r, ename) if hasattr(r, ename) else "None" 747 748 return self.select(**select_exprs)
749
750 - def join(self, other, attrlist=None, auto_create_indexes=True, **kwargs):
751 """ 752 Join the objects of one table with the objects of another, based on the given 753 matching attributes in the named arguments. The attrlist specifies the attributes to 754 be copied from the source tables - if omitted, all attributes will be copied. Entries 755 in the attrlist may be single attribute names, or if there are duplicate names in both 756 tables, then a C{(table,attributename)} tuple can be given to disambiguate which 757 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to 758 rename an attribute from a source table. 759 760 This method may be called directly, or can be constructed using the L{join_on} method and 761 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")} 762 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm 763 or tables can be added to construct a compound join expression. When complete, the 764 join expression gets executed by calling the resulting join definition, 765 using C{join_expression([attrlist])}. 766 767 @param other: other table to join to 768 @param attrlist: list of attributes to be copied to the new joined table; if 769 none provided, all attributes of both tables will be used (taken from the first 770 object in each table) 771 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples 772 (list may contain both strings and tuples) 773 @param kwargs: attributes to join on, given as additional named arguments 774 of the form C{table1attr="table2attr"}, or a dict mapping attribute names. 775 @returns: a new Table containing the joined data as new DataObjects 776 """ 777 thiscol,othercol = kwargs.items()[0] 778 779 retname = ("(%s:%s^%s:%s)" % 780 (self.table_name, thiscol, other.table_name, othercol)) 781 # make sure both tables contain records to join - if not, just return empty list 782 if not (self.obs and other.obs): 783 return Table(retname) 784 785 if isinstance(attrlist, basestring): 786 attrlist = re.split(r'[,\s]+', attrlist) 787 788 # expand attrlist to full (table, name, alias) tuples 789 thisnames = set(_object_attrnames(self.obs[0])) 790 othernames = set(_object_attrnames(other.obs[0])) 791 fullcols = [] 792 if attrlist is not None: 793 for col in attrlist: 794 if isinstance(col, tuple): 795 # assume col contains at least (table, colname), fill in alias if missing 796 # to be same as colname 797 fullcols.append((col + (col[1],))[:3]) 798 else: 799 if col in thisnames: 800 fullcols.append( (self, col, col) ) 801 elif col in othernames: 802 fullcols.append( (other, col, col) ) 803 else: 804 raise ValueError("join attribute not found: " + col) 805 else: 806 fullcols = [(self,n,n) for n in thisnames] 807 fullcols += [(other,n,n) for n in othernames] 808 809 thiscols = list(ifilter(lambda o:o[0] is self, fullcols)) 810 othercols = list(ifilter(lambda o:o[0] is other, fullcols)) 811 812 if auto_create_indexes: 813 if thiscol not in self._indexes: 814 self.create_index(thiscol) 815 if othercol not in other._indexes: 816 other.create_index(othercol) 817 818 thiscolindex = othercolindex = None 819 if thiscol in self._indexes: 820 thiscolindex = self._indexes[thiscol] 821 else: 822 raise ValueError("indexed attribute required for join: "+thiscol) 823 if othercol in other._indexes: 824 othercolindex = other._indexes[othercol] 825 else: 826 raise ValueError("indexed attribute required for join: "+othercol) 827 828 # use table with fewer keys to drive join 829 if len(thiscolindex) < len(othercolindex): 830 shortindex, longindex = (thiscolindex, othercolindex) 831 swap = False 832 else: 833 shortindex, longindex = (othercolindex, thiscolindex) 834 swap = True 835 836 # find matching rows 837 #~ matchingrows = [] 838 #~ for key,rows in shortindex.items(): 839 #~ if key in longindex: 840 #~ if swap: 841 #~ matchingrows.append( (longindex[key], rows) ) 842 #~ else: 843 #~ matchingrows.append( (rows, longindex[key]) ) 844 matchingrows = list((longindex[key],rows) if swap else (rows, longindex[key]) 845 for key,rows in shortindex.items()) 846 847 joinrows = [] 848 for thisrows,otherrows in matchingrows: 849 for trow,orow in product(thisrows,otherrows): 850 retobj = DataObject() 851 #~ for _,c,a in thiscols: 852 #~ setattr(retobj, a, getattr(trow,c)) 853 do_all(setattr(retobj, a, getattr(trow,c)) for _,c,a in thiscols) 854 #~ for _,c,a in othercols: 855 #~ setattr(retobj, a, getattr(orow,c)) 856 do_all(setattr(retobj, a, getattr(orow,c)) for _,c,a in othercols) 857 joinrows.append(retobj) 858 859 ret = Table(retname) 860 for tbl,collist in zip([self,other],[thiscols,othercols]): 861 for _,c,a in collist: 862 if c in tbl._indexes: 863 ret.create_index(a) # no unique indexes in join results 864 ret.insert_many(joinrows) 865 return ret
866
867 - def join_on(self, attr):
868 """Creates a JoinTerm in preparation for joining with another table, to 869 indicate what attribute should be used in the join. Only indexed attributes 870 may be used in a join. 871 @param attr: attribute name to join from this table (may be different 872 from the attribute name in the table being joined to) 873 @type attr: string 874 @returns: L{JoinTerm}""" 875 if attr not in self._indexes: 876 raise ValueError("can only join on indexed attributes") 877 return JoinTerm(self, attr)
878
879 - def pivot(self, attrlist):
880 """Pivots the data using the given attributes, returning a L{PivotTable}. 881 @param attrlist: list of attributes to be used to construct the pivot table 882 @type attrlist: list of strings, or string of space-delimited attribute names 883 """ 884 if isinstance(attrlist, basestring): 885 attrlist = attrlist.split() 886 if all(a in self._indexes for a in attrlist): 887 return PivotTable(self,[],attrlist) 888 else: 889 raise ValueError("pivot can only be called using indexed attributes")
890
891 - def _import(self, source, transforms=None, reader=csv.DictReader):
892 close_on_exit = False 893 if isinstance(source, basestring): 894 source = open(source) 895 close_on_exit = True 896 try: 897 csvdata = reader(source) 898 self.insert_many(DataObject(**s) for s in csvdata) 899 if transforms: 900 for attr,fn in transforms.items(): 901 default = None 902 if isinstance(fn,tuple): 903 fn,default = fn 904 objfn = lambda obj : fn(getattr(obj,attr)) 905 self.add_field(attr, objfn, default) 906 finally: 907 if close_on_exit: 908 source.close()
909
910 - def csv_import(self, csv_source, transforms=None):
911 """Imports the contents of a CSV-formatted file into this table. 912 @param csv_source: CSV file - if a string is given, the file with that name will be 913 opened, read, and closed; if a file object is given, then that object 914 will be read as-is, and left for the caller to be closed. 915 @type csv_source: string or file 916 @param transforms: dict of functions by attribute name; if given, each 917 attribute will be transformed using the corresponding transform; if there is no 918 matching transform, the attribute will be read as a string (default); the 919 transform function can also be defined as a (function, default-value) tuple; if 920 there is an Exception raised by the transform function, then the attribute will 921 be set to the given default value 922 @type transforms: dict (optional) 923 """ 924 return self._import(csv_source, transforms)
925
926 - def _xsv_import(self, xsv_source, transforms=None, splitstr="\t"):
927 xsv_reader = lambda src: csv.DictReader(src, delimiter=splitstr) 928 return self._import(xsv_source, transforms, reader=xsv_reader)
929
930 - def tsv_import(self, xsv_source, transforms=None):
931 """Imports the contents of a tab-separated data file into this table. 932 @param xsv_source: tab-separated data file - if a string is given, the file with that name will be 933 opened, read, and closed; if a file object is given, then that object 934 will be read as-is, and left for the caller to be closed. 935 @type xsv_source: string or file 936 @param transforms: dict of functions by attribute name; if given, each 937 attribute will be transformed using the corresponding transform; if there is no 938 matching transform, the attribute will be read as a string (default); the 939 transform function can also be defined as a (function, default-value) tuple; if 940 there is an Exception raised by the transform function, then the attribute will 941 be set to the given default value 942 @type transforms: dict (optional) 943 """ 944 return self._xsv_import(xsv_source, transforms=transforms, splitstr="\t")
945
946 - def csv_export(self, csv_dest, fieldnames=None):
947 """Exports the contents of the table to a CSV-formatted file. 948 @param csv_dest: CSV file - if a string is given, the file with that name will be 949 opened, written, and closed; if a file object is given, then that object 950 will be written as-is, and left for the caller to be closed. 951 @type csv_dest: string or file 952 @param fieldnames: attribute names to be exported; can be given as a single 953 string with space-delimited names, or as a list of attribute names 954 """ 955 close_on_exit = False 956 if isinstance(csv_dest, basestring): 957 csv_dest = open(csv_dest,'wb') 958 close_on_exit = True 959 try: 960 if fieldnames is None: 961 fieldnames = list(_object_attrnames(self.obs[0])) 962 if isinstance(fieldnames, basestring): 963 fieldnames = fieldnames.split() 964 965 csv_dest.write(','.join(fieldnames) + '\n') 966 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore') 967 if hasattr(self.obs[0], "__dict__"): 968 #~ for o in self.obs: 969 #~ csvout.writerow(o.__dict__) 970 do_all(csvout.writerow(o.__dict__) for o in self.obs) 971 else: 972 #~ for o in self.obs: 973 #~ row = dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)), 974 #~ zip(repeat(o), fieldnames))) 975 #~ csvout.writerow(row) 976 do_all(csvout.writerow(dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)), 977 zip(repeat(o), fieldnames)))) for o in self.obs) 978 finally: 979 if close_on_exit: 980 csv_dest.close()
981
982 - def json_import(self, source, transforms=None):
983 """Imports the contents of a JSON data file into this table. 984 @param source: JSON data file - if a string is given, the file with that name will be 985 opened, read, and closed; if a file object is given, then that object 986 will be read as-is, and left for the caller to be closed. 987 @type source: string or file 988 @param transforms: dict of functions by attribute name; if given, each 989 attribute will be transformed using the corresponding transform; if there is no 990 matching transform, the attribute will be read as a string (default); the 991 transform function can also be defined as a (function, default-value) tuple; if 992 there is an Exception raised by the transform function, then the attribute will 993 be set to the given default value 994 @type transforms: dict (optional) 995 """ 996 class _JsonFileReader(object): 997 def __init__(self, src): 998 self.source = src
999 def __iter__(self): 1000 current = '' 1001 for line in self.source: 1002 if current: 1003 current += ' ' 1004 current += line 1005 try: 1006 yield json.loads(current) 1007 current = '' 1008 except Exception: 1009 pass 1010 return self._import(source, transforms=transforms, reader=_JsonFileReader) 1011
1012 - def json_export(self, dest, fieldnames=None):
1013 """Exports the contents of the table to a JSON-formatted file. 1014 @param dest: output file - if a string is given, the file with that name will be 1015 opened, written, and closed; if a file object is given, then that object 1016 will be written as-is, and left for the caller to be closed. 1017 @type dest: string or file 1018 @param fieldnames: attribute names to be exported; can be given as a single 1019 string with space-delimited names, or as a list of attribute names 1020 """ 1021 close_on_exit = False 1022 if isinstance(dest, basestring): 1023 dest = open(dest,'wb') 1024 close_on_exit = True 1025 try: 1026 if isinstance(fieldnames, basestring): 1027 fieldnames = fieldnames.split() 1028 1029 if fieldnames is None: 1030 do_all( 1031 dest.write(_to_json(o)+'\n') for o in self.obs 1032 ) 1033 else: 1034 do_all( 1035 dest.write(json.dumps({f:getattr(o, f) for f in fieldnames})+'\n') 1036 for o in self.obs 1037 ) 1038 finally: 1039 if close_on_exit: 1040 dest.close()
1041
1042 - def add_field(self, attrname, fn, default=None):
1043 """Computes a new attribute for each object in table, or replaces an 1044 existing attribute in each record with a computed value 1045 @param attrname: attribute to compute for each object 1046 @type attrname: string 1047 @param fn: function used to compute new attribute value, based on 1048 other values in the object, as in:: 1049 1050 lambda ob : ob.commission_pct/100.0 * ob.gross_sales 1051 1052 @type fn: function(obj) returns value 1053 @param default: value to use if an exception is raised while trying 1054 to evaluate fn 1055 """ 1056 #~ for rec in self: 1057 def _addFieldToRec(rec, fn=fn, default=default): 1058 try: 1059 val = fn(rec) 1060 except Exception: 1061 val = default 1062 if isinstance(rec, DataObject): 1063 object.__setattr__(rec, attrname, val) 1064 else: 1065 setattr(rec, attrname, val)
1066 do_all(_addFieldToRec(r) for r in self) 1067 return self 1068
1069 - def addfield(self, attrname, fn, default=None):
1070 # deprecated in favor of add_field 1071 return self.add_field(attrname, fn, default)
1072
1073 - def groupby(self, keyexpr, **outexprs):
1074 """simple prototype of group by, with support for expressions in the group-by clause 1075 and outputs 1076 @param keyexpr: grouping field and optional expression for computing the key value; 1077 if a string is passed 1078 @type keyexpr: string or tuple 1079 @param outexprs: named arguments describing one or more summary values to 1080 compute per key 1081 @type outexprs: callable, taking a sequence of objects as input and returning 1082 a single summary value 1083 """ 1084 if isinstance(keyexpr, basestring): 1085 keyattrs = keyexpr.split() 1086 keyfn = lambda o : tuple(getattr(o, k) for k in keyattrs) 1087 1088 elif isinstance(keyexpr, tuple): 1089 keyattrs = (keyexpr[0],) 1090 keyfn = keyexpr[1] 1091 1092 groupedobs = defaultdict(list) 1093 #~ for ob in self.obs: 1094 #~ groupedobs[keyfn(ob)].append(ob) 1095 do_all(groupedobs[keyfn(ob)].append(ob) for ob in self.obs) 1096 1097 tbl = Table() 1098 #~ for k in keyattrs: 1099 #~ tbl.create_index(k, unique=(len(keyattrs)==1)) 1100 do_all(tbl.create_index(k, unique=(len(keyattrs)==1)) for k in keyattrs) 1101 for key, recs in sorted(groupedobs.iteritems()): 1102 groupobj = DataObject(**dict(zip(keyattrs, key))) 1103 #~ for subkey, expr in outexprs.items(): 1104 #~ setattr(groupobj, subkey, expr(recs)) 1105 do_all(setattr(groupobj, subkey, expr(recs)) 1106 for subkey, expr in outexprs.items()) 1107 tbl.insert(groupobj) 1108 return tbl
1109
1110 - def run(self):
1111 return self
1112
1113 - def unique(self):
1114 ret = self.copy_template() 1115 seen = set() 1116 for rec in self: 1117 reckey = tuple(rec.__dict__.values()) 1118 if reckey not in seen: 1119 seen.add(reckey) 1120 ret.insert(rec) 1121 return ret
1122
1123 -class PivotTable(Table):
1124 """Enhanced Table containing pivot results from calling table.pivot(). 1125 """
1126 - def __init__(self, parent, attr_val_path, attrlist):
1127 """PivotTable initializer - do not create these directly, use 1128 L{Table.pivot}. 1129 """ 1130 super(PivotTable,self).__init__() 1131 self._attr_path = attr_val_path[:] 1132 self._pivot_attrs = attrlist[:] 1133 self._subtable_dict = {} 1134 1135 #~ for k,v in parent._indexes.items(): 1136 #~ self._indexes[k] = v.copy_template() 1137 self._indexes.update(dict((k,v.copy_template()) for k,v in parent._indexes.items())) 1138 if not attr_val_path: 1139 self.insert_many(parent.obs) 1140 else: 1141 attr,val = attr_val_path[-1] 1142 self.insert_many(parent.where(**{attr:val})) 1143 parent._subtable_dict[val] = self 1144 1145 if len(attrlist) > 0: 1146 this_attr = attrlist[0] 1147 sub_attrlist = attrlist[1:] 1148 ind = parent._indexes[this_attr] 1149 self.subtables = [ PivotTable(self, 1150 attr_val_path + [(this_attr,k)], 1151 sub_attrlist) for k in sorted(ind.keys()) ] 1152 else: 1153 self.subtables = []
1154
1155 - def __getitem__(self,val):
1156 if self._subtable_dict: 1157 return self._subtable_dict[val] 1158 else: 1159 return super(PivotTable,self).__getitem__(val)
1160
1161 - def keys(self):
1162 return sorted(self._subtable_dict.keys())
1163
1164 - def items(self):
1165 return sorted(self._subtable_dict.items())
1166
1167 - def values(self):
1168 return [self._subtable_dict.items[k] for k in self.keys()]
1169
1170 - def pivot_key(self):
1171 """Return the set of attribute-value pairs that define the contents of this 1172 table within the original source table. 1173 """ 1174 return self._attr_path
1175
1176 - def pivot_key_str(self):
1177 """Return the pivot_key as a displayable string. 1178 """ 1179 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
1180
1181 - def has_subtables(self):
1182 """Return whether this table has further subtables. 1183 """ 1184 return bool(self.subtables)
1185
1186 - def dump(self, out=sys.stdout, row_fn=repr, limit=-1, indent=0):
1187 """Dump out the contents of this table in a nested listing. 1188 @param out: output stream to write to 1189 @param row_fn: function to call to display individual rows 1190 @param limit: number of records to show at deepest level of pivot (-1=show all) 1191 @param indent: current nesting level 1192 """ 1193 NL = '\n' 1194 if indent: 1195 out.write(" "*indent + self.pivot_key_str()) 1196 else: 1197 out.write("Pivot: %s" % ','.join(self._pivot_attrs)) 1198 out.write(NL) 1199 if self.has_subtables(): 1200 #~ for sub in self.subtables: 1201 #~ if sub: 1202 #~ sub.dump(out, row_fn, limit, indent+1) 1203 do_all(sub.dump(out, row_fn, limit, indent+1) for sub in self.subtables if sub) 1204 else: 1205 if limit >= 0: 1206 showslice = slice(0,limit) 1207 else: 1208 showslice = slice(None,None) 1209 #~ for r in self.obs[showslice]: 1210 #~ out.write(" "*(indent+1) + row_fn(r) + NL) 1211 do_all(out.write(" "*(indent+1) + row_fn(r) + NL) 1212 for r in self.obs[showslice]) 1213 out.flush()
1214
1215 - def dump_counts(self, out=sys.stdout, count_fn=len, colwidth=10):
1216 """Dump out the summary counts of entries in this pivot table as a tabular listing. 1217 @param out: output stream to write to 1218 """ 1219 if len(self._pivot_attrs) == 1: 1220 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs)) 1221 maxkeylen = max(len(str(k)) for k in self.keys()) 1222 maxvallen = colwidth 1223 keytally = {} 1224 for k, sub in self.items(): 1225 sub_v = count_fn(sub) 1226 maxvallen = max(maxvallen, len(str(sub_v))) 1227 keytally[k] = sub_v 1228 for k,sub in self.items(): 1229 out.write("%-*.*s " % (maxkeylen,maxkeylen,k)) 1230 out.write("%*s\n" % (maxvallen,keytally[k])) 1231 elif len(self._pivot_attrs) == 2: 1232 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs)) 1233 maxkeylen = max(max(len(str(k)) for k in self.keys()),5) 1234 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),colwidth) 1235 keytally = dict((k,0) for k in self.subtables[0].keys()) 1236 out.write("%*s " % (maxkeylen,'')) 1237 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys())) 1238 out.write(' %*s\n' % (maxvallen, 'Total')) 1239 for k,sub in self.items(): 1240 out.write("%-*.*s " % (maxkeylen,maxkeylen,k)) 1241 for kk,ssub in sub.items(): 1242 ssub_v = count_fn(ssub) 1243 out.write("%*d " % (maxvallen,ssub_v)) 1244 keytally[kk] += ssub_v 1245 maxvallen = max(maxvallen, len(str(ssub_v))) 1246 sub_v = count_fn(sub) 1247 maxvallen = max(maxvallen, len(str(sub_v))) 1248 out.write("%*d\n" % (maxvallen,sub_v)) 1249 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total")) 1250 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items()))) 1251 out.write(" %*d\n" % (maxvallen,sum(tally for k,tally in keytally.items()))) 1252 else: 1253 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1254
1255 - def summary_counts(self, fn=None, col=None, summarycolname=None):
1256 """Dump out the summary counts of this pivot table as a Table. 1257 """ 1258 if summarycolname is None: 1259 summarycolname = col 1260 ret = Table() 1261 topattr = self._pivot_attrs[0] 1262 #~ for attr in self._pivot_attrs: 1263 #~ ret.create_index(attr) 1264 do_all(ret.create_index(attr) for attr in self._pivot_attrs) 1265 if len(self._pivot_attrs) == 1: 1266 for sub in self.subtables: 1267 subattr,subval = sub._attr_path[-1] 1268 attrdict = {subattr:subval} 1269 if fn is None: 1270 attrdict['Count'] = len(sub) 1271 else: 1272 attrdict[summarycolname] = fn(s[col] for s in sub) 1273 ret.insert(DataObject(**attrdict)) 1274 elif len(self._pivot_attrs) == 2: 1275 for sub in self.subtables: 1276 for ssub in sub.subtables: 1277 attrdict = dict(ssub._attr_path) 1278 if fn is None: 1279 attrdict['Count'] = len(ssub) 1280 else: 1281 attrdict[summarycolname] = fn(s[col] for s in ssub) 1282 ret.insert(DataObject(**attrdict)) 1283 elif len(self._pivot_attrs) == 3: 1284 for sub in self.subtables: 1285 for ssub in sub.subtables: 1286 for sssub in ssub.subtables: 1287 attrdict = dict(sssub._attr_path) 1288 if fn is None: 1289 attrdict['Count'] = len(sssub) 1290 else: 1291 attrdict[summarycolname] = fn(s[col] for s in sssub) 1292 ret.insert(DataObject(**attrdict)) 1293 else: 1294 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots") 1295 return ret
1296
1297 -class JoinTerm(object):
1298 """Temporary object created while composing a join across tables using 1299 L{Table.join_on} and '+' addition. JoinTerm's are usually created by 1300 calling join_on on a Table object, as in:: 1301 1302 customers.join_on("id") + orders.join_on("custid") 1303 1304 This join expression would set up the join relationship 1305 equivalent to:: 1306 1307 customers.join(orders, id="custid") 1308 1309 If tables are being joined on attributes that have the same name in 1310 both tables, then a join expression could be created by adding a 1311 JoinTerm of one table directly to the other table:: 1312 1313 customers.join_on("custid") + orders 1314 1315 Once the join expression is composed, the actual join is performed 1316 using function call notation:: 1317 1318 customerorders = customers.join_on("custid") + orders 1319 for custord in customerorders(): 1320 print custord 1321 1322 When calling the join expression, you can optionally specify a 1323 list of attributes as defined in L{Table.join}. 1324 """
1325 - def __init__(self, sourceTable, joinfield):
1326 self.sourcetable = sourceTable 1327 self.joinfield = joinfield 1328 self.jointo = None
1329
1330 - def __add__(self, other):
1331 if isinstance(other, Table): 1332 other = other.join_on(self.joinfield) 1333 if isinstance(other, JoinTerm): 1334 if self.jointo is None: 1335 if other.jointo is None: 1336 self.jointo = other 1337 else: 1338 self.jointo = other() 1339 return self 1340 else: 1341 if other.jointo is None: 1342 return self() + other 1343 else: 1344 return self() + other() 1345 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1346
1347 - def __radd__(self, other):
1348 if isinstance(other, Table): 1349 return other.join_on(self.joinfield) + self 1350 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1351
1352 - def __call__(self, attrs=None):
1353 if self.jointo: 1354 other = self.jointo 1355 if isinstance(other, Table): 1356 other = other.join_on(self.joinfield) 1357 ret = self.sourcetable.join(other.sourcetable, attrs, 1358 **{self.joinfield : other.joinfield}) 1359 return ret 1360 else: 1361 return self.sourcetable.query()
1362
1363 - def join_on(self, col):
1364 return self().join_on(col)
1365 1366 1367 if __name__ == "__main__": 1368 1369 # import json in Python 2 or 3 compatible forms 1370 from functools import partial 1371 try: 1372 import simplejson as json 1373 json_dumps = partial(json.dumps, indent=' ') 1374 except ImportError: 1375 import json 1376 json_dumps = partial(json.dumps, indent=2) 1377 1378 1379 rawdata = """\ 1380 Phoenix:AZ:85001:KPHX 1381 Phoenix:AZ:85001:KPHY 1382 Phoenix:AZ:85001:KPHA 1383 Dallas:TX:75201:KDFW""".splitlines() 1384 1385 # load miniDB 1386 stations = Table() 1387 #~ stations.create_index("city") 1388 stations.create_index("stn", unique=True) 1389 1390 fields = "city state zip stn".split() 1391 for d in rawdata: 1392 ob = DataObject() 1393 for k,v in zip(fields, d.split(':')): 1394 setattr(ob,k,v.strip()) 1395 stations.insert(ob) 1396 1397 # perform some queries and deletes 1398 for queryargs in [ 1399 dict(city="Phoenix"), 1400 dict(city="Phoenix", stn="KPHX"), 1401 dict(stn="KPHA", city="Phoenix"), 1402 dict(state="TX"), 1403 dict(city="New York"), 1404 dict(city="Phoenix", _orderby="stn"), 1405 dict(city="Phoenix", _orderby="stn desc"), 1406 ]: 1407 print queryargs, 1408 result = stations.where(**queryargs) 1409 print len(result) 1410 for r in result: print r 1411 print 1412 #~ print stations.delete(city="Phoenix") 1413 #~ print stations.delete(city="Boston") 1414 print list(stations.where()) 1415 print 1416 1417 amfm = Table() 1418 amfm.create_index("stn", unique=True) 1419 amfm.insert(DataObject(stn="KPHY", band="AM")) 1420 amfm.insert(DataObject(stn="KPHX", band="FM")) 1421 amfm.insert(DataObject(stn="KPHA", band="FM")) 1422 amfm.insert(DataObject(stn="KDFW", band="FM")) 1423 1424 try: 1425 amfm.insert(DataObject(stn="KPHA", band="AM")) 1426 except KeyError: 1427 print "duplicate key not allowed" 1428 1429 print 1430 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1431 )(["stn", "city", (amfm,"band","AMFM"), 1432 (stations,"state","st")]).sort("AMFM"): 1433 print repr(rec) 1434 1435 print 1436 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1437 )(["stn", "city", (amfm,"band"), (stations,"state","st")]): 1438 print json_dumps(rec.__dict__) 1439 1440 print 1441 for rec in (stations.join_on("stn") + amfm.join_on("stn"))(): 1442 print json_dumps(rec.__dict__) 1443 1444 print 1445 stations.create_index("state") 1446 pivot = stations.pivot("state") 1447 pivot.dump_counts() 1448 1449 print 1450 amfm.create_index("band") 1451 pivot = (stations.join_on("stn") + amfm)().pivot("state band") 1452 pivot.dump_counts() 1453