Module littletable
[frames] | no frames]

Source Code for Module littletable

   1  # 
   2  # 
   3  # littletable.py 
   4  #  
   5  # littletable is a simple in-memory database for ad-hoc or user-defined objects, 
   6  # supporting simple query and join operations - useful for ORM-like access 
   7  # to a collection of data objects, without dealing with SQL 
   8  # 
   9  # 
  10  # Copyright (c) 2010-2016  Paul T. McGuire 
  11  # 
  12  # Permission is hereby granted, free of charge, to any person obtaining 
  13  # a copy of this software and associated documentation files (the 
  14  # "Software"), to deal in the Software without restriction, including 
  15  # without limitation the rights to use, copy, modify, merge, publish, 
  16  # distribute, sublicense, and/or sell copies of the Software, and to 
  17  # permit persons to whom the Software is furnished to do so, subject to 
  18  # the following conditions: 
  19  # 
  20  # The above copyright notice and this permission notice shall be 
  21  # included in all copies or substantial portions of the Software. 
  22  # 
  23  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  24  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  25  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  26  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  27  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  28  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  29  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  30  # 
  31   
  32  __doc__ = """\ 
  33   
  34  C{littletable} - a Python module to give ORM-like access to a collection of objects 
  35   
  36  The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a  
  37  collection of user objects.  C{littletable} provides a L{DataObject} class for ad hoc creation 
  38  of semi-immutable objects that can be stored in a C{littletable} L{Table}. 
  39   
  40  In addition to basic insert/remove/query/delete access to the contents of a  
  41  Table, C{littletable} offers: 
  42   - simple indexing for improved retrieval performance, and optional enforcing key uniqueness 
  43   - access to objects using indexed attributes 
  44   - simplified joins using '+' operator syntax between annotated Tables 
  45   - the result of any query or join is a new first-class C{littletable} Table 
  46   - pivot on one or two attributes to gather tabulated data summaries 
  47   - easy import/export to CSV and JSON files 
  48   
  49  C{littletable} Tables do not require an upfront schema definition, but simply work off of the 
  50  attributes in the stored values, and those referenced in any query parameters. 
  51   
  52  Here is a simple C{littletable} data storage/retrieval example:: 
  53   
  54      from littletable import Table, DataObject 
  55   
  56      customers = Table('customers') 
  57      customers.create_index("id", unique=True) 
  58      customers.insert(DataObject(id="0010", name="George Jetson")) 
  59      customers.insert(DataObject(id="0020", name="Wile E. Coyote")) 
  60      customers.insert(DataObject(id="0030", name="Jonny Quest")) 
  61   
  62      catalog = Table('catalog') 
  63      catalog.create_index("sku", unique=True) 
  64      catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100)) 
  65      catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3)) 
  66      catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8)) 
  67      catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12)) 
  68      print(catalog.by.sku["ANVIL-001"].descr) 
  69   
  70      wishitems = Table('wishitems') 
  71      wishitems.create_index("custid") 
  72      wishitems.create_index("sku") 
  73      wishitems.insert(DataObject(custid="0020", sku="ANVIL-001")) 
  74      wishitems.insert(DataObject(custid="0020", sku="BRDSD-001")) 
  75      wishitems.insert(DataObject(custid="0020", sku="MAGNT-001")) 
  76      wishitems.insert(DataObject(custid="0030", sku="MAGNT-001")) 
  77      wishitems.insert(DataObject(custid="0030", sku="MAGLS-001")) 
  78   
  79      # print a particular customer name  
  80      # (unique indexes will return a single item; non-unique 
  81      # indexes will return a list of all matching items) 
  82      print(customers.by.id["0030"].name) 
  83   
  84      # print all items sold by the pound 
  85      for item in catalog.where(unitofmeas="LB"): 
  86          print(item.sku, item.descr) 
  87   
  88      # print all items that cost more than 10 
  89      for item in catalog.where(lambda o : o.unitprice>10): 
  90          print(item.sku, item.descr, item.unitprice) 
  91   
  92      # join tables to create queryable wishlists collection 
  93      wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku") 
  94   
  95      # print all wishlist items with price > 10 
  96      bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10) 
  97      for item in bigticketitems: 
  98          print(item) 
  99   
 100      # list all wishlist items in descending order by price 
 101      for item in wishlists().sort("unitprice desc"): 
 102          print(item) 
 103  """ 
 104   
 105  __version__ = "0.10" 
 106  __versionTime__ = "14 Jul 2016 11:52" 
 107  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
 108   
 109  import sys 
 110  PY_2 = sys.version_info[0] == 2 
 111  PY_3 = sys.version_info[0] == 3 
 112   
 113  from collections import defaultdict, deque, namedtuple 
 114  from itertools import groupby,islice,starmap,repeat 
 115  if PY_2: 
 116      from itertools import ifilter as filter 
 117   
 118  try: 
 119      from collections import OrderedDict as ODict 
 120  except ImportError: 
 121      # best effort, just use dict, but won't preserve ordering of fields 
 122      # in tables or output files 
 123      ODict = dict 
 124   
 125  from operator import attrgetter 
 126  import csv 
 127  import json 
 128  import re 
 129   
 130  _consumer = deque(maxlen=0) 
 131  do_all = _consumer.extend 
 132   
 133  try: 
 134      from itertools import product 
 135  except ImportError: 
136 - def product(*seqs):
137 tupleseqs = [[(x,) for x in s] for s in seqs] 138 def _product(*seqs): 139 if len(seqs) == 1: 140 for x in seqs[0]: 141 yield x 142 else: 143 for x in seqs[0]: 144 for p in _product(*seqs[1:]): 145 yield x+p
146 for p in _product(*tupleseqs): 147 yield p 148 149 if PY_3: 150 basestring = str 151 152 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"] 153
154 -def _object_attrnames(obj):
155 if hasattr(obj, "__dict__"): 156 # normal object 157 return obj.__dict__.keys() 158 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 159 # namedtuple 160 return obj._fields 161 elif hasattr(obj, "__slots__"): 162 return obj.__slots__ 163 else: 164 raise ValueError("object with unknown attributes")
165
166 -def _to_json(obj):
167 if hasattr(obj, "__dict__"): 168 # normal object 169 return json.dumps(obj.__dict__) 170 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 171 # namedtuple 172 return json.dumps(ODict(zip(obj._fields, obj))) 173 elif hasattr(obj, "__slots__"): 174 return json.dumps(ODict((k,v) for k,v in zip(obj.__slots__, 175 (getattr(obj,a) for a in obj.__slots__)))) 176 else: 177 raise ValueError("object with unknown attributes")
178
179 -class DataObject(object):
180 """A generic semi-mutable object for storing data values in a table. Attributes 181 can be set by passing in named arguments in the constructor, or by setting them 182 as C{object.attribute = value}. New attributes can be added any time, but updates 183 are ignored. Table joins are returned as a Table of DataObjects."""
184 - def __init__(self, **kwargs):
185 if kwargs: 186 self.__dict__.update(kwargs)
187 - def __repr__(self):
188 return '{' + ', '.join(("%r: %r" % k_v) for k_v in self.__dict__.items()) + '}'
189 - def __setattr__(self, attr, val):
190 # make all attributes write-once 191 if attr not in self.__dict__: 192 super(DataObject,self).__setattr__(attr,val) 193 else: 194 raise AttributeError("can't set existing attribute")
195 - def __getitem__(self, k):
196 if hasattr(self,k): 197 return getattr(self,k) 198 else: 199 raise KeyError("object has no such attribute " + k)
200 - def __setitem__(self, k, v):
201 if k not in self.__dict__: 202 self.__dict__[k] = v 203 else: 204 raise KeyError("attribute already exists")
205 - def __eq__(self, other):
206 return self.__dict__ == other.__dict__
207
208 -class _ObjIndex(object):
209 - def __init__(self, attr):
210 self.attr = attr 211 self.obs = defaultdict(list) 212 self.is_unique = False
213 - def __setitem__(self, k, v):
214 self.obs[k].append(v)
215 - def __getitem__(self, k):
216 return self.obs.get(k,[])
217 - def __len__(self):
218 return len(self.obs)
219 - def __iter__(self):
220 return iter(self.obs.keys())
221
222 - def keys(self):
223 return sorted(filter(None, self.obs.keys()))
224 - def items(self):
225 return self.obs.items()
226 - def remove(self, obj):
227 try: 228 k = getattr(obj, self.attr) 229 self.obs[k].remove(obj) 230 except (ValueError,AttributeError,KeyError): 231 pass
232 - def __contains__(self, key):
233 return key in self.obs
234 - def copy_template(self):
235 return self.__class__(self.attr)
236
237 -class _UniqueObjIndex(_ObjIndex):
238 - def __init__(self, attr, accept_none=False):
239 super(_UniqueObjIndex, self).__init__(attr) 240 self.obs = {} 241 self.is_unique = True 242 self.accept_none = accept_none 243 self.none_values = list()
244 - def __setitem__(self, k, v):
245 if k is not None: 246 if k not in self.obs: 247 self.obs[k] = v 248 else: 249 raise KeyError("duplicate key value %s" % k) 250 else: 251 if self.accept_none: 252 self.none_values.append(v) 253 else: 254 raise ValueError("None is not a valid index key")
255
256 - def __getitem__(self, k):
257 if k is not None: 258 return [self.obs.get(k)] if k in self.obs else [] 259 else: 260 return list(self.none_values)
261 - def __contains__(self, k):
262 if k is not None: 263 return k in self.obs 264 else: 265 return self.accept_none and self.none_values
266 - def keys(self):
267 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
268 - def items(self):
269 return [(k,[v]) for k,v in self.obs.items()]
270 - def remove(self, obj):
271 k = getattr(obj, self.attr) 272 if k is not None: 273 self.obs.pop(k, None) 274 else: 275 try: 276 self.none_values.remove(obj) 277 except ValueError: 278 pass
279
280 -class _ObjIndexWrapper(object):
281 - def __init__(self, ind):
282 self._index = ind
283 - def __getattr__(self, attr):
284 return getattr(self._index, attr)
285 - def __getitem__(self, k):
286 ret = Table() 287 if k in self._index: 288 ret.insert_many(self._index[k]) 289 return ret
290
291 - def __contains__(self, k):
292 return k in self._index
293 294
295 -class _UniqueObjIndexWrapper(_ObjIndexWrapper):
296 - def __getitem__(self, k):
297 if k is not None: 298 return self._index[k][0] 299 else: 300 ret = Table() 301 if k in self._index: 302 ret.insert_many(self._index[k]) 303 return ret
304
305 -class _IndexAccessor(object):
306 - def __init__(self, table):
307 self.table = table
308
309 - def __getattr__(self, attr):
310 """A quick way to query for matching records using their indexed attributes. The attribute 311 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 312 dict-like access to the underlying records in the table, as in:: 313 314 employees.by.socsecnum["000-00-0000"] 315 customers.by.zipcode["12345"] 316 317 (C{'by'} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes 318 are not attributes of the table, but of items in the table.) 319 320 The behavior differs slightly for unique and non-unique indexes: 321 - if the index is unique, then retrieving a matching object, will return just the object; 322 if there is no matching object, C{KeyError} is raised (making a table with a unique 323 index behave very much like a Python dict) 324 - if the index is non-unique, then all matching objects will be returned in a new Table, 325 just as if a regular query had been performed; if no objects match the key value, an empty 326 Table is returned and no exception is raised. 327 328 If there is no index defined for the given attribute, then C{AttributeError} is raised. 329 """ 330 if attr in self.table._indexes: 331 ret = self.table._indexes[attr] 332 if isinstance(ret, _UniqueObjIndex): 333 ret = _UniqueObjIndexWrapper(ret) 334 if isinstance(ret, _ObjIndex): 335 ret = _ObjIndexWrapper(ret) 336 return ret 337 raise AttributeError("Table '%s' has no index '%s'" % (self.table.table_name, attr))
338 339
340 -class Table(object):
341 """Table is the main class in C{littletable}, for representing a collection of DataObjects or 342 user-defined objects with publicly accessible attributes or properties. Tables can be: 343 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>} 344 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index} 345 - queried, specifying values to exact match in the desired records, see L{where} 346 - filtered (using L{where}), using a simple predicate function to match desired records; 347 useful for selecting using inequalities or compound conditions 348 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__} 349 - joined, using L{join_on} to identify attribute to be used for joining with another table, and 350 L{join} or operator '+' to perform the actual join 351 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects 352 by attribute values 353 - grouped, using L{groupby} to create a summary table of computed values, grouped by a key 354 attribute 355 - L{imported<csv_import>}/L{exported<csv_export>} to CSV-format files 356 Queries and joins return their results as new Table objects, so that queries and joins can 357 be easily performed as a succession of operations. 358 """
359 - def __init__(self, table_name=''):
360 """Create a new, empty Table. 361 @param table_name: name for Table 362 @type table_name: string (optional) 363 """ 364 self(table_name) 365 self.obs = [] 366 self._indexes = {} 367 self._uniqueIndexes = [] 368 self.by = _IndexAccessor(self) 369 """ 370 C{'by'} is added as a pseudo-attribute on tables, to provide 371 dict-like access to the underlying records in the table by index key, as in:: 372 373 employees.by.socsecnum["000-00-0000"] 374 customers.by.zipcode["12345"] 375 376 The behavior differs slightly for unique and non-unique indexes: 377 - if the index is unique, then retrieving a matching object, will return just the object; 378 if there is no matching object, C{KeyError} is raised (making a table with a unique 379 index behave very much like a Python dict) 380 - if the index is non-unique, then all matching objects will be returned in a new Table, 381 just as if a regular query had been performed; if no objects match the key value, an empty 382 Table is returned and no exception is raised. 383 384 If there is no index defined for the given attribute, then C{AttributeError} is raised. 385 """
386
387 - def __len__(self):
388 """Return the number of objects in the Table.""" 389 return len(self.obs)
390
391 - def __iter__(self):
392 """Create an iterator over the objects in the Table.""" 393 return iter(self.obs)
394
395 - def __getitem__(self, i):
396 """Provides direct indexed/sliced access to the Table's underlying list of objects.""" 397 if isinstance(i, slice): 398 ret = self.copy_template() 399 ret.insert_many(self.obs[i]) 400 return ret 401 else: 402 return self.obs[i]
403
404 - def __getattr__(self, attr):
405 """(Deprecated) A quick way to query for matching records using their indexed attributes. The attribute 406 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 407 dict-like access to the underlying records in the table, as in:: 408 409 employees.socsecnum["000-00-0000"] 410 customers.zipcode["12345"] 411 412 (L{by} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes 413 are not attributes of the table, but of items in the table. Use of C{'by'} is preferred, and 414 will replace direct attribute access in a future release.):: 415 416 employees.by.socsecnum["000-00-0000"] 417 customers.by.zipcode["12345"] 418 419 The behavior differs slightly for unique and non-unique indexes: 420 - if the index is unique, then retrieving a matching object, will return just the object; 421 if there is no matching object, C{KeyError} is raised (making a table with a unique 422 index behave very much like a Python dict) 423 - if the index is non-unique, then all matching objects will be returned in a new Table, 424 just as if a regular query had been performed; if no objects match the key value, an empty 425 Table is returned and no exception is raised. 426 427 If there is no index defined for the given attribute, then C{AttributeError} is raised. 428 """ 429 if attr in self._indexes: 430 ret = self._indexes[attr] 431 if isinstance(ret, _UniqueObjIndex): 432 ret = _UniqueObjIndexWrapper(ret) 433 if isinstance(ret, _ObjIndex): 434 ret = _ObjIndexWrapper(ret) 435 return ret 436 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
437
438 - def __bool__(self):
439 return bool(self.obs)
440 441 __nonzero__ = __bool__ 442
443 - def __add__(self, other):
444 """Support UNION of 2 tables using "+" operator.""" 445 if isinstance(other, JoinTerm): 446 # special case if added to a JoinTerm, do join, not union 447 return other + self 448 elif isinstance(other, Table): 449 # if other is another Table, just union them 450 return self.union(other) 451 else: 452 # assume other is a sequence of some sort, insert all elements 453 return self.clone().insert_many(other)
454
455 - def __iadd__(self, other):
456 """Support UNION of 2 tables using "+=" operator.""" 457 return self.insert_many(other)
458
459 - def union(self, other):
460 return self.clone().insert_many(other.obs)
461
462 - def __call__(self, table_name=None):
463 """A simple way to assign a name to a table, such as those 464 dynamically created by joins and queries. 465 @param table_name: name for Table 466 @type table_name: string 467 """ 468 if table_name is not None: 469 self.table_name = table_name 470 return self
471
472 - def copy_template(self, name=None):
473 """Create empty copy of the current table, with copies of all 474 index definitions. 475 """ 476 ret = Table(self.table_name) 477 ret._indexes.update(dict((k,v.copy_template()) for k,v in self._indexes.items())) 478 ret(name) 479 return ret
480
481 - def clone(self, name=None):
482 """Create full copy of the current table, including table contents 483 and index definitions. 484 """ 485 ret = self.copy_template().insert_many(self.obs)(name) 486 return ret
487
488 - def create_index(self, attr, unique=False, accept_none=False):
489 """Create a new index on a given attribute. 490 If C{unique} is True and records are found in the table with duplicate 491 attribute values, the index is deleted and C{KeyError} is raised. 492 493 If the table already has an index on the given attribute, then no 494 action is taken and no exception is raised. 495 @param attr: the attribute to be used for indexed access and joins 496 @type attr: string 497 @param unique: flag indicating whether the indexed field values are 498 expected to be unique across table entries 499 @type unique: boolean 500 @param accept_none: flag indicating whether None is an acceptable 501 unique key value for this attribute 502 @type accept_none: boolean 503 """ 504 if attr in self._indexes: 505 return self 506 507 if unique: 508 self._indexes[attr] = _UniqueObjIndex(attr,accept_none) 509 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 510 else: 511 self._indexes[attr] = _ObjIndex(attr) 512 accept_none = True 513 ind = self._indexes[attr] 514 try: 515 for obj in self.obs: 516 obval = getattr(obj, attr, None) 517 if obval is not None or accept_none: 518 ind[obval] = obj 519 else: 520 raise KeyError("None is not an allowed key") 521 return self 522 523 except KeyError: 524 del self._indexes[attr] 525 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 526 raise
527
528 - def delete_index(self, attr):
529 """Deletes an index from the Table. Can be used to drop and rebuild an index, 530 or to convert a non-unique index to a unique index, or vice versa. 531 @param attr: name of an indexed attribute 532 @type attr: string 533 """ 534 if attr in self._indexes: 535 del self._indexes[attr] 536 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 537 return self
538
539 - def insert(self, obj):
540 """Insert a new object into this Table. 541 @param obj: any Python object - 542 Objects can be constructed using the defined DataObject type, or they can 543 be any Python object that does not use the Python C{__slots__} feature; C{littletable} 544 introspects the object's C{__dict__} or C{_fields} attributes to obtain join and 545 index attributes and values. 546 547 If the table contains a unique index, and the record to be inserted would add 548 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the 549 object is not inserted. 550 551 If the table has no unique indexes, then it is possible to insert duplicate 552 objects into the table. 553 """ 554 555 # verify new object doesn't duplicate any existing unique index values 556 uniqueIndexes = self._uniqueIndexes #[ind for ind in self._indexes.values() if ind.is_unique] 557 NO_SUCH_ATTR = object() 558 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none) 559 or (getattr(obj, ind.attr, NO_SUCH_ATTR) in ind) 560 for ind in uniqueIndexes): 561 # had a problem, find which one 562 for ind in uniqueIndexes: 563 if (getattr(obj, ind.attr, None) is None and not ind.accept_none): 564 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj) 565 if getattr(obj, ind.attr) in ind: 566 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj) 567 568 self.obs.append(obj) 569 for attr, ind in self._indexes.items(): 570 obval = getattr(obj, attr) 571 ind[obval] = obj 572 return self
573
574 - def insert_many(self, it):
575 """Inserts a collection of objects into the table.""" 576 do_all(self.insert(ob) for ob in it) 577 return self
578
579 - def remove(self, ob):
580 """Removes an object from the table. If object is not in the table, then 581 no action is taken and no exception is raised.""" 582 # remove from indexes 583 do_all(ind.remove(ob) for attr,ind in self._indexes.items()) 584 585 # remove from main object list 586 self.obs.remove(ob) 587 588 return self
589
590 - def remove_many(self, it):
591 """Removes a collection of objects from the table.""" 592 do_all(self.remove(ob) for ob in it) 593 return self
594
595 - def _query_attr_sort_fn(self, attr_val):
596 """Used to order where keys by most selective key first""" 597 attr,v = attr_val 598 if attr in self._indexes: 599 idx = self._indexes[attr] 600 if v in idx: 601 return len(idx[v]) 602 else: 603 return 0 604 else: 605 return 1e9
606
607 - def where(self, wherefn=None, **kwargs):
608 """ 609 Retrieves matching objects from the table, based on given 610 named parameters. If multiple named parameters are given, then 611 only objects that satisfy all of the query criteria will be returned. 612 613 Special named args: 614 - C{_orderby="attr,..."} - (Deprecated) resulting table should sort content objects 615 by the C{attr}s given in a comma-separated string; to sort in 616 descending order, reference the attribute as C{attr desc}. 617 618 - C{_limit} - maximum number of records to return 619 620 @param wherefn: a method or lambda that returns a boolean result, as in:: 621 622 lambda ob : ob.unitprice > 10 623 624 @type wherefn: callable(object) returning boolean 625 626 @param kwargs: attributes for selecting records, given as additional 627 named arguments of the form C{attrname="attrvalue"}. 628 629 @return: a new Table containing the matching objects 630 """ 631 # extract meta keys 632 flags = dict((k, kwargs.pop(k)) for k,v in kwargs.items() if k.startswith("_")) 633 634 if kwargs: 635 # order query criteria in ascending order of number of matching items 636 # for each individual given attribute; this will minimize the number 637 # of filtering records that each subsequent attribute will have to 638 # handle 639 kwargs = kwargs.items() 640 if len(kwargs) > 1 and len(self) > 100: 641 kwargs = sorted(kwargs, key=self._query_attr_sort_fn) 642 643 ret = self 644 NO_SUCH_ATTR = object() 645 for k,v in kwargs: 646 newret = ret.copy_template() 647 if k in ret._indexes: 648 newret.insert_many(ret._indexes[k][v]) 649 else: 650 newret.insert_many(r for r in ret.obs if getattr(r, k, NO_SUCH_ATTR) == v ) 651 ret = newret 652 if not ret: 653 break 654 else: 655 ret = self.clone() 656 657 if ret and wherefn is not None: 658 newret = ret.copy_template() 659 newret.insert_many(filter(wherefn, ret.obs)) 660 ret = newret 661 662 if ret: 663 # apply flags 664 # sort before clip 665 if flags: 666 if '_orderby' in flags and len(ret) > 1: 667 ret.sort(flags['_orderby']) 668 if '_limit' in flags: 669 del ret.obs[flags['_limit']:] 670 671 return ret
672
673 - def delete(self, **kwargs):
674 """Deletes matching objects from the table, based on given 675 named parameters. If multiple named parameters are given, then 676 only objects that satisfy all of the query criteria will be removed. 677 @param kwargs: attributes for selecting records, given as additional 678 named arguments of the form C{attrname="attrvalue"}. 679 @return: the number of objects removed from the table 680 """ 681 if not kwargs: 682 return 0 683 684 affected = self.where(**kwargs) 685 self.remove_many(affected) 686 return len(affected)
687
688 - def sort(self, key, reverse=False):
689 """Sort Table in place, using given fields as sort key. 690 @param key: if this is a string, it is a comma-separated list of field names, 691 optionally followed by 'desc' to indicate descending sort instead of the 692 default ascending sort; if a list or tuple, it is a list or tuple of field names 693 or field names with ' desc' appended; if it is a function, then it is the 694 function to be used as the sort key function 695 @return: self 696 """ 697 if isinstance(key, (basestring,list,tuple)): 698 if isinstance(key, basestring): 699 attrdefs = [s.strip() for s in key.split(',')] 700 # leftmost attr is the most primary sort key, so do succession of 701 # sorts from right to left 702 attr_orders = [(a.split()+['asc',])[:2] for a in attrdefs][::-1] 703 else: 704 # attr definitions were already resolved to a sequence by the caller 705 attr_orders = key 706 attrs = [attr for attr,order in attr_orders] 707 708 #special optimization if all orders are ascending or descending 709 if all(order=='asc' for attr,order in attr_orders): 710 self.obs.sort(key=attrgetter(*attrs), reverse=reverse) 711 elif all(order=='desc' for attr,order in attr_orders): 712 self.obs.sort(key=attrgetter(*attrs), reverse=not reverse) 713 else: 714 # mix of ascending and descending sorts, have to do succession of sorts 715 do_all(self.obs.sort(key=attrgetter(attr), reverse=(order=="desc")) 716 for attr,order in attr_orders) 717 else: 718 keyfn = key 719 self.obs.sort(key=keyfn, reverse=reverse) 720 return self
721
722 - def select(self, fields, **exprs):
723 """ 724 Create a new table containing a subset of attributes, with optionally 725 newly-added fields computed from each rec in the original table. 726 727 Special kwargs: 728 - C{_unique=True} - (Deprecated) only return a set of unique rows 729 730 @param fields: list of strings, or single space-delimited string, listing attribute name to be included in the output 731 @type fields: list, or space-delimited string 732 @param exprs: one or more named callable arguments, to compute additional fields using the given function 733 @type exprs: C{name=callable}, callable takes the record as an argument, and returns the new attribute value 734 If a string is passed as a callable, this string will be used using string formatting, given the record 735 as a source of interpolation values. For instance, C{fullName = '%(lastName)s, %(firstName)s'} 736 737 """ 738 if isinstance(fields, basestring): 739 fields = fields.split() 740 741 unique = exprs.pop('_unique', False) 742 743 def _makeStringCallable(expr): 744 if isinstance(expr,basestring): 745 return lambda rec: expr % rec 746 else: 747 return expr
748 749 exprs = dict((k, _makeStringCallable(v)) for k,v in exprs.items()) 750 751 raw_tuples = [] 752 for rec in self.obs: 753 attrvalues = tuple(getattr(rec, fieldname, None) for fieldname in fields) 754 if exprs: 755 attrvalues += tuple(expr(rec) for expr in exprs.values()) 756 raw_tuples.append(attrvalues) 757 758 if unique: 759 raw_tuples = list(set(raw_tuples)) 760 761 allNames = tuple(fields) + tuple(exprs.keys()) 762 return Table().insert_many(DataObject(**dict(zip(allNames, outtuple))) for outtuple in raw_tuples)
763
764 - def format(self, *fields, **exprs):
765 """ 766 Create a new table with all string formatted attribute values, typically in preparation for 767 formatted output. 768 @param fields: one or more strings, each string is an attribute name to be included in the output 769 @type fields: string (multiple) 770 @param exprs: one or more named string arguments, to format the given attribute with a formatting string 771 @type exprs: name=string 772 """ 773 #~ select_exprs = {} 774 #~ for f in fields: 775 #~ select_exprs[f] = lambda r : str(getattr,f,None) 776 fields = set(fields) 777 select_exprs = ODict((f, lambda r,f=f : str(getattr,f,None)) for f in fields) 778 779 for ename,expr in exprs.items(): 780 if isinstance(expr, basestring): 781 if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', expr): 782 select_exprs[ename] = lambda r : str(getattr(r, expr, None)) 783 else: 784 if "{}" in expr or "{0}" or "{0:" in expr: 785 select_exprs[ename] = lambda r : expr.format(r) 786 else: 787 select_exprs[ename] = lambda r : expr % getattr(r, ename, "None") 788 789 return self.select(**select_exprs)
790
791 - def join(self, other, attrlist=None, auto_create_indexes=True, **kwargs):
792 """ 793 Join the objects of one table with the objects of another, based on the given 794 matching attributes in the named arguments. The attrlist specifies the attributes to 795 be copied from the source tables - if omitted, all attributes will be copied. Entries 796 in the attrlist may be single attribute names, or if there are duplicate names in both 797 tables, then a C{(table,attributename)} tuple can be given to disambiguate which 798 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to 799 rename an attribute from a source table. 800 801 This method may be called directly, or can be constructed using the L{join_on} method and 802 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")} 803 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm 804 or tables can be added to construct a compound join expression. When complete, the 805 join expression gets executed by calling the resulting join definition, 806 using C{join_expression([attrlist])}. 807 808 @param other: other table to join to 809 @param attrlist: list of attributes to be copied to the new joined table; if 810 none provided, all attributes of both tables will be used (taken from the first 811 object in each table) 812 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples 813 (list may contain both strings and tuples) 814 @param kwargs: attributes to join on, given as additional named arguments 815 of the form C{table1attr="table2attr"}, or a dict mapping attribute names. 816 @returns: a new Table containing the joined data as new DataObjects 817 """ 818 if not kwargs: 819 raise TypeError("must specify at least one join attribute as a named argument") 820 thiscol,othercol = next(iter(kwargs.items())) 821 822 retname = ("(%s:%s^%s:%s)" % 823 (self.table_name, thiscol, other.table_name, othercol)) 824 # make sure both tables contain records to join - if not, just return empty list 825 if not (self.obs and other.obs): 826 return Table(retname) 827 828 if isinstance(attrlist, basestring): 829 attrlist = re.split(r'[,\s]+', attrlist) 830 831 # expand attrlist to full (table, name, alias) tuples 832 thisnames = set(_object_attrnames(self.obs[0])) 833 othernames = set(_object_attrnames(other.obs[0])) 834 fullcols = [] 835 if attrlist is not None: 836 for col in attrlist: 837 if isinstance(col, tuple): 838 # assume col contains at least (table, colname), fill in alias if missing 839 # to be same as colname 840 fullcols.append((col + (col[1],))[:3]) 841 else: 842 if col in thisnames: 843 fullcols.append( (self, col, col) ) 844 elif col in othernames: 845 fullcols.append( (other, col, col) ) 846 else: 847 raise ValueError("join attribute not found: " + col) 848 else: 849 fullcols = [(self,n,n) for n in thisnames] 850 fullcols += [(other,n,n) for n in othernames] 851 852 thiscols = list(filter(lambda o:o[0] is self, fullcols)) 853 othercols = list(filter(lambda o:o[0] is other, fullcols)) 854 855 if auto_create_indexes: 856 if thiscol not in self._indexes: 857 self.create_index(thiscol) 858 if othercol not in other._indexes: 859 other.create_index(othercol) 860 861 thiscolindex = othercolindex = None 862 if thiscol in self._indexes: 863 thiscolindex = self._indexes[thiscol] 864 else: 865 raise ValueError("indexed attribute required for join: "+thiscol) 866 if othercol in other._indexes: 867 othercolindex = other._indexes[othercol] 868 else: 869 raise ValueError("indexed attribute required for join: "+othercol) 870 871 # use table with fewer keys to drive join 872 if len(thiscolindex) < len(othercolindex): 873 shortindex, longindex = (thiscolindex, othercolindex) 874 swap = False 875 else: 876 shortindex, longindex = (othercolindex, thiscolindex) 877 swap = True 878 879 # find matching rows 880 matchingrows = list((longindex[key],rows) if swap else (rows, longindex[key]) 881 for key,rows in shortindex.items()) 882 883 joinrows = [] 884 for thisrows,otherrows in matchingrows: 885 for trow,orow in product(thisrows,otherrows): 886 retobj = DataObject() 887 do_all(setattr(retobj, a, getattr(trow,c)) for _,c,a in thiscols) 888 do_all(setattr(retobj, a, getattr(orow,c)) for _,c,a in othercols if not hasattr(retobj,a)) 889 joinrows.append(retobj) 890 891 ret = Table(retname) 892 for tbl,collist in zip([self,other],[thiscols,othercols]): 893 for _,c,a in collist: 894 if c in tbl._indexes: 895 if a not in ret._indexes: 896 ret.create_index(a) # no unique indexes in join results 897 ret.insert_many(joinrows) 898 return ret
899
900 - def join_on(self, attr):
901 """Creates a JoinTerm in preparation for joining with another table, to 902 indicate what attribute should be used in the join. Only indexed attributes 903 may be used in a join. 904 @param attr: attribute name to join from this table (may be different 905 from the attribute name in the table being joined to) 906 @type attr: string 907 @returns: L{JoinTerm}""" 908 if attr not in self._indexes: 909 raise ValueError("can only join on indexed attributes") 910 return JoinTerm(self, attr)
911
912 - def pivot(self, attrlist):
913 """Pivots the data using the given attributes, returning a L{PivotTable}. 914 @param attrlist: list of attributes to be used to construct the pivot table 915 @type attrlist: list of strings, or string of space-delimited attribute names 916 """ 917 if isinstance(attrlist, basestring): 918 attrlist = attrlist.split() 919 if all(a in self._indexes for a in attrlist): 920 return PivotTable(self,[],attrlist) 921 else: 922 raise ValueError("pivot can only be called using indexed attributes")
923
924 - def _import(self, source, encoding, transforms=None, reader=csv.DictReader):
925 close_on_exit = False 926 if isinstance(source, basestring): 927 if PY_3: 928 source = open(source, encoding=encoding) 929 else: 930 source = open(source) 931 close_on_exit = True 932 try: 933 csvdata = reader(source) 934 self.insert_many(DataObject(**s) for s in csvdata) 935 if transforms: 936 for attr,fn in transforms.items(): 937 default = None 938 if isinstance(fn,tuple): 939 fn,default = fn 940 objfn = lambda obj : fn(getattr(obj,attr)) 941 self.add_field(attr, objfn, default) 942 finally: 943 if close_on_exit: 944 source.close()
945
946 - def csv_import(self, csv_source, encoding='UTF-8', transforms=None, **kwargs):
947 """Imports the contents of a CSV-formatted file into this table. 948 @param csv_source: CSV file - if a string is given, the file with that name will be 949 opened, read, and closed; if a file object is given, then that object 950 will be read as-is, and left for the caller to be closed. 951 @type csv_source: string or file 952 @param encoding: encoding to be used for reading source text if C{csv_source} is 953 passed as a string filename 954 @type encoding: string (default='UTF-8') 955 @param transforms: dict of functions by attribute name; if given, each 956 attribute will be transformed using the corresponding transform; if there is no 957 matching transform, the attribute will be read as a string (default); the 958 transform function can also be defined as a (function, default-value) tuple; if 959 there is an Exception raised by the transform function, then the attribute will 960 be set to the given default value 961 @type transforms: dict (optional) 962 @param kwargs: additional constructor arguments for csv C{DictReader} objects, such as C{delimiter} 963 or C{fieldnames}; these are passed directly through to the csv C{DictReader} constructor 964 @type kwargs: named arguments (optional) 965 """ 966 reader_args = dict((k,v) for k,v in kwargs.items() if k not in ['encoding', 'csv_source', 'transforms']) 967 reader = lambda src: csv.DictReader(src, **reader_args) 968 return self._import(csv_source, encoding, transforms, reader=reader)
969
970 - def _xsv_import(self, xsv_source, transforms=None, splitstr="\t"):
971 xsv_reader = lambda src: csv.DictReader(src, delimiter=splitstr) 972 return self._import(xsv_source, transforms, reader=xsv_reader)
973
974 - def tsv_import(self, xsv_source, transforms=None):
975 """Imports the contents of a tab-separated data file into this table. 976 @param xsv_source: tab-separated data file - if a string is given, the file with that name will be 977 opened, read, and closed; if a file object is given, then that object 978 will be read as-is, and left for the caller to be closed. 979 @type xsv_source: string or file 980 @param transforms: dict of functions by attribute name; if given, each 981 attribute will be transformed using the corresponding transform; if there is no 982 matching transform, the attribute will be read as a string (default); the 983 transform function can also be defined as a (function, default-value) tuple; if 984 there is an Exception raised by the transform function, then the attribute will 985 be set to the given default value 986 @type transforms: dict (optional) 987 """ 988 return self._xsv_import(xsv_source, transforms=transforms, splitstr="\t")
989
990 - def csv_export(self, csv_dest, fieldnames=None, encoding="UTF-8"):
991 """Exports the contents of the table to a CSV-formatted file. 992 @param csv_dest: CSV file - if a string is given, the file with that name will be 993 opened, written, and closed; if a file object is given, then that object 994 will be written as-is, and left for the caller to be closed. 995 @type csv_dest: string or file 996 @param fieldnames: attribute names to be exported; can be given as a single 997 string with space-delimited names, or as a list of attribute names 998 @type fieldnames: list of strings 999 @param encoding: string (default="UTF-8"); if csv_dest is provided as a string 1000 representing an output filename, an encoding argument can be provided (Python 3 only) 1001 @type encoding: string 1002 """ 1003 close_on_exit = False 1004 if isinstance(csv_dest, basestring): 1005 if PY_3: 1006 csv_dest = open(csv_dest,'w', encoding=encoding) 1007 else: 1008 csv_dest = open(csv_dest,'w') 1009 close_on_exit = True 1010 try: 1011 if fieldnames is None: 1012 fieldnames = list(_object_attrnames(self.obs[0])) 1013 if isinstance(fieldnames, basestring): 1014 fieldnames = fieldnames.split() 1015 1016 csv_dest.write(','.join(fieldnames) + '\n') 1017 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore') 1018 if hasattr(self.obs[0], "__dict__"): 1019 do_all(csvout.writerow(o.__dict__) for o in self.obs) 1020 else: 1021 do_all(csvout.writerow(ODict(starmap(lambda obj, fld: (fld, getattr(obj, fld)), 1022 zip(repeat(o), fieldnames)))) for o in self.obs) 1023 finally: 1024 if close_on_exit: 1025 csv_dest.close()
1026
1027 - def json_import(self, source, encoding="UTF-8", transforms=None):
1028 """Imports the contents of a JSON data file into this table. 1029 @param source: JSON data file - if a string is given, the file with that name will be 1030 opened, read, and closed; if a file object is given, then that object 1031 will be read as-is, and left for the caller to be closed. 1032 @type source: string or file 1033 @param transforms: dict of functions by attribute name; if given, each 1034 attribute will be transformed using the corresponding transform; if there is no 1035 matching transform, the attribute will be read as a string (default); the 1036 transform function can also be defined as a (function, default-value) tuple; if 1037 there is an Exception raised by the transform function, then the attribute will 1038 be set to the given default value 1039 @type transforms: dict (optional) 1040 """ 1041 class _JsonFileReader(object): 1042 def __init__(self, src): 1043 self.source = src
1044 def __iter__(self): 1045 current = '' 1046 for line in self.source: 1047 if current: 1048 current += ' ' 1049 current += line 1050 try: 1051 yield json.loads(current) 1052 current = '' 1053 except Exception: 1054 pass 1055 return self._import(source, encoding, transforms=transforms, reader=_JsonFileReader) 1056
1057 - def json_export(self, dest, fieldnames=None, encoding="UTF-8"):
1058 """Exports the contents of the table to a JSON-formatted file. 1059 @param dest: output file - if a string is given, the file with that name will be 1060 opened, written, and closed; if a file object is given, then that object 1061 will be written as-is, and left for the caller to be closed. 1062 @type dest: string or file 1063 @param fieldnames: attribute names to be exported; can be given as a single 1064 string with space-delimited names, or as a list of attribute names 1065 @type fieldnames: list of strings 1066 @param encoding: string (default="UTF-8"); if csv_dest is provided as a string 1067 representing an output filename, an encoding argument can be provided (Python 3 only) 1068 @type encoding: string 1069 """ 1070 close_on_exit = False 1071 if isinstance(dest, basestring): 1072 if PY_3: 1073 dest = open(dest,'w', encoding=encoding) 1074 else: 1075 dest = open(dest,'w') 1076 close_on_exit = True 1077 try: 1078 if isinstance(fieldnames, basestring): 1079 fieldnames = fieldnames.split() 1080 1081 if fieldnames is None: 1082 do_all( 1083 dest.write(_to_json(o)+'\n') for o in self.obs 1084 ) 1085 else: 1086 do_all( 1087 dest.write(json.dumps(ODict((f, getattr(o, f)) for f in fieldnames))+'\n') 1088 for o in self.obs 1089 ) 1090 finally: 1091 if close_on_exit: 1092 dest.close()
1093
1094 - def add_field(self, attrname, fn, default=None):
1095 """Computes a new attribute for each object in table, or replaces an 1096 existing attribute in each record with a computed value 1097 @param attrname: attribute to compute for each object 1098 @type attrname: string 1099 @param fn: function used to compute new attribute value, based on 1100 other values in the object, as in:: 1101 1102 lambda ob : ob.commission_pct/100.0 * ob.gross_sales 1103 1104 @type fn: function(obj) returns value 1105 @param default: value to use if an exception is raised while trying 1106 to evaluate fn 1107 """ 1108 #~ for rec in self: 1109 def _addFieldToRec(rec, fn=fn, default=default): 1110 try: 1111 val = fn(rec) 1112 except Exception: 1113 val = default 1114 if isinstance(rec, DataObject): 1115 rec.__dict__[attrname] = val 1116 else: 1117 setattr(rec, attrname, val)
1118 do_all(_addFieldToRec(r) for r in self) 1119 return self 1120 1121 addfield = add_field 1122 """(Deprecated) Legacy method to add a field to all objects in table; to be replaced by L{add_field}. 1123 """ 1124
1125 - def groupby(self, keyexpr, **outexprs):
1126 """simple prototype of group by, with support for expressions in the group-by clause 1127 and outputs 1128 @param keyexpr: grouping field and optional expression for computing the key value; 1129 if a string is passed 1130 @type keyexpr: string or tuple 1131 @param outexprs: named arguments describing one or more summary values to 1132 compute per key 1133 @type outexprs: callable, taking a sequence of objects as input and returning 1134 a single summary value 1135 """ 1136 if isinstance(keyexpr, basestring): 1137 keyattrs = keyexpr.split() 1138 keyfn = lambda o : tuple(getattr(o, k) for k in keyattrs) 1139 1140 elif isinstance(keyexpr, tuple): 1141 keyattrs = (keyexpr[0],) 1142 keyfn = keyexpr[1] 1143 1144 else: 1145 raise TypeError("keyexpr must be string or tuple") 1146 1147 groupedobs = defaultdict(list) 1148 do_all(groupedobs[keyfn(ob)].append(ob) for ob in self.obs) 1149 1150 tbl = Table() 1151 do_all(tbl.create_index(k, unique=(len(keyattrs)==1)) for k in keyattrs) 1152 for key, recs in sorted(groupedobs.iteritems()): 1153 groupobj = DataObject(**dict(zip(keyattrs, key))) 1154 do_all(setattr(groupobj, subkey, expr(recs)) 1155 for subkey, expr in outexprs.items()) 1156 tbl.insert(groupobj) 1157 return tbl
1158
1159 - def run(self):
1160 """(Deprecated) Returns the Table. Will be removed in a future release. 1161 """ 1162 return self
1163
1164 - def unique(self, key=None):
1165 """ 1166 Create a new table of objects,containing no duplicate values. 1167 1168 @param key: (default=None) optional callable for computing a representative unique key for each 1169 object in the table. If None, then a key will be composed as a tuple of all the values in the object. 1170 @type key: callable, takes the record as an argument, and returns the key value or tuple to be used 1171 to represent uniqueness. 1172 """ 1173 ret = self.copy_template() 1174 seen = set() 1175 for rec in self: 1176 if key is None: 1177 try: 1178 ob_dict = vars(rec) 1179 except TypeError: 1180 ob_dict = dict((k, getattr(rec, k)) for k in _object_attrnames(rec)) 1181 reckey = tuple(sorted(ob_dict.items())) 1182 else: 1183 reckey = key(rec) 1184 if reckey not in seen: 1185 seen.add(reckey) 1186 ret.insert(rec) 1187 return ret
1188
1189 -class PivotTable(Table):
1190 """Enhanced Table containing pivot results from calling table.pivot(). 1191 """
1192 - def __init__(self, parent, attr_val_path, attrlist):
1193 """PivotTable initializer - do not create these directly, use 1194 L{Table.pivot}. 1195 """ 1196 super(PivotTable,self).__init__() 1197 self._attr_path = attr_val_path[:] 1198 self._pivot_attrs = attrlist[:] 1199 self._subtable_dict = {} 1200 1201 #~ for k,v in parent._indexes.items(): 1202 #~ self._indexes[k] = v.copy_template() 1203 self._indexes.update(dict((k,v.copy_template()) for k,v in parent._indexes.items())) 1204 if not attr_val_path: 1205 self.insert_many(parent.obs) 1206 else: 1207 attr,val = attr_val_path[-1] 1208 self.insert_many(parent.where(**{attr:val})) 1209 parent._subtable_dict[val] = self 1210 1211 if len(attrlist) > 0: 1212 this_attr = attrlist[0] 1213 sub_attrlist = attrlist[1:] 1214 ind = parent._indexes[this_attr] 1215 self.subtables = [ PivotTable(self, 1216 attr_val_path + [(this_attr,k)], 1217 sub_attrlist) for k in sorted(ind.keys()) ] 1218 else: 1219 self.subtables = []
1220
1221 - def __getitem__(self,val):
1222 if self._subtable_dict: 1223 return self._subtable_dict[val] 1224 else: 1225 return super(PivotTable,self).__getitem__(val)
1226
1227 - def keys(self):
1228 return sorted(self._subtable_dict.keys())
1229
1230 - def items(self):
1231 return sorted(self._subtable_dict.items())
1232
1233 - def values(self):
1234 return [self._subtable_dict.items[k] for k in self.keys()]
1235
1236 - def pivot_key(self):
1237 """Return the set of attribute-value pairs that define the contents of this 1238 table within the original source table. 1239 """ 1240 return self._attr_path
1241
1242 - def pivot_key_str(self):
1243 """Return the pivot_key as a displayable string. 1244 """ 1245 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
1246
1247 - def has_subtables(self):
1248 """Return whether this table has further subtables. 1249 """ 1250 return bool(self.subtables)
1251
1252 - def dump(self, out=sys.stdout, row_fn=repr, limit=-1, indent=0):
1253 """Dump out the contents of this table in a nested listing. 1254 @param out: output stream to write to 1255 @param row_fn: function to call to display individual rows 1256 @param limit: number of records to show at deepest level of pivot (-1=show all) 1257 @param indent: current nesting level 1258 """ 1259 NL = '\n' 1260 if indent: 1261 out.write(" "*indent + self.pivot_key_str()) 1262 else: 1263 out.write("Pivot: %s" % ','.join(self._pivot_attrs)) 1264 out.write(NL) 1265 if self.has_subtables(): 1266 do_all(sub.dump(out, row_fn, limit, indent+1) for sub in self.subtables if sub) 1267 else: 1268 if limit >= 0: 1269 showslice = slice(0,limit) 1270 else: 1271 showslice = slice(None,None) 1272 do_all(out.write(" "*(indent+1) + row_fn(r) + NL) 1273 for r in self.obs[showslice]) 1274 out.flush()
1275
1276 - def dump_counts(self, out=sys.stdout, count_fn=len, colwidth=10):
1277 """Dump out the summary counts of entries in this pivot table as a tabular listing. 1278 @param out: output stream to write to 1279 """ 1280 if len(self._pivot_attrs) == 1: 1281 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs)) 1282 maxkeylen = max(len(str(k)) for k in self.keys()) 1283 maxvallen = colwidth 1284 keytally = {} 1285 for k, sub in self.items(): 1286 sub_v = count_fn(sub) 1287 maxvallen = max(maxvallen, len(str(sub_v))) 1288 keytally[k] = sub_v 1289 for k,sub in self.items(): 1290 out.write("%-*.*s " % (maxkeylen,maxkeylen,k)) 1291 out.write("%*s\n" % (maxvallen,keytally[k])) 1292 elif len(self._pivot_attrs) == 2: 1293 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs)) 1294 maxkeylen = max(max(len(str(k)) for k in self.keys()),5) 1295 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),colwidth) 1296 keytally = dict((k,0) for k in self.subtables[0].keys()) 1297 out.write("%*s " % (maxkeylen,'')) 1298 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys())) 1299 out.write(' %*s\n' % (maxvallen, 'Total')) 1300 for k,sub in self.items(): 1301 out.write("%-*.*s " % (maxkeylen,maxkeylen,k)) 1302 for kk,ssub in sub.items(): 1303 ssub_v = count_fn(ssub) 1304 out.write("%*d " % (maxvallen,ssub_v)) 1305 keytally[kk] += ssub_v 1306 maxvallen = max(maxvallen, len(str(ssub_v))) 1307 sub_v = count_fn(sub) 1308 maxvallen = max(maxvallen, len(str(sub_v))) 1309 out.write("%*d\n" % (maxvallen,sub_v)) 1310 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total")) 1311 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items()))) 1312 out.write(" %*d\n" % (maxvallen,sum(tally for k,tally in keytally.items()))) 1313 else: 1314 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1315
1316 - def summary_counts(self, fn=None, col=None, summarycolname=None):
1317 """Dump out the summary counts of this pivot table as a Table. 1318 """ 1319 if summarycolname is None: 1320 summarycolname = col 1321 ret = Table() 1322 topattr = self._pivot_attrs[0] 1323 do_all(ret.create_index(attr) for attr in self._pivot_attrs) 1324 if len(self._pivot_attrs) == 1: 1325 for sub in self.subtables: 1326 subattr,subval = sub._attr_path[-1] 1327 attrdict = {subattr:subval} 1328 if fn is None: 1329 attrdict['Count'] = len(sub) 1330 else: 1331 attrdict[summarycolname] = fn(s[col] for s in sub) 1332 ret.insert(DataObject(**attrdict)) 1333 elif len(self._pivot_attrs) == 2: 1334 for sub in self.subtables: 1335 for ssub in sub.subtables: 1336 attrdict = dict(ssub._attr_path) 1337 if fn is None: 1338 attrdict['Count'] = len(ssub) 1339 else: 1340 attrdict[summarycolname] = fn(s[col] for s in ssub) 1341 ret.insert(DataObject(**attrdict)) 1342 elif len(self._pivot_attrs) == 3: 1343 for sub in self.subtables: 1344 for ssub in sub.subtables: 1345 for sssub in ssub.subtables: 1346 attrdict = dict(sssub._attr_path) 1347 if fn is None: 1348 attrdict['Count'] = len(sssub) 1349 else: 1350 attrdict[summarycolname] = fn(s[col] for s in sssub) 1351 ret.insert(DataObject(**attrdict)) 1352 else: 1353 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots") 1354 return ret
1355
1356 -class JoinTerm(object):
1357 """Temporary object created while composing a join across tables using 1358 L{Table.join_on} and '+' addition. JoinTerm's are usually created by 1359 calling join_on on a Table object, as in:: 1360 1361 customers.join_on("id") + orders.join_on("custid") 1362 1363 This join expression would set up the join relationship 1364 equivalent to:: 1365 1366 customers.join(orders, id="custid") 1367 1368 If tables are being joined on attributes that have the same name in 1369 both tables, then a join expression could be created by adding a 1370 JoinTerm of one table directly to the other table:: 1371 1372 customers.join_on("custid") + orders 1373 1374 Once the join expression is composed, the actual join is performed 1375 using function call notation:: 1376 1377 customerorders = customers.join_on("custid") + orders 1378 for custord in customerorders(): 1379 print custord 1380 1381 When calling the join expression, you can optionally specify a 1382 list of attributes as defined in L{Table.join}. 1383 """
1384 - def __init__(self, sourceTable, joinfield):
1385 self.sourcetable = sourceTable 1386 self.joinfield = joinfield 1387 self.jointo = None
1388
1389 - def __add__(self, other):
1390 if isinstance(other, Table): 1391 other = other.join_on(self.joinfield) 1392 if isinstance(other, JoinTerm): 1393 if self.jointo is None: 1394 if other.jointo is None: 1395 self.jointo = other 1396 else: 1397 self.jointo = other() 1398 return self 1399 else: 1400 if other.jointo is None: 1401 return self() + other 1402 else: 1403 return self() + other() 1404 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1405
1406 - def __radd__(self, other):
1407 if isinstance(other, Table): 1408 return other.join_on(self.joinfield) + self 1409 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1410
1411 - def __call__(self, attrs=None):
1412 if self.jointo: 1413 other = self.jointo 1414 if isinstance(other, Table): 1415 other = other.join_on(self.joinfield) 1416 ret = self.sourcetable.join(other.sourcetable, attrs, 1417 **{self.joinfield : other.joinfield}) 1418 return ret 1419 else: 1420 return self.sourcetable.query()
1421
1422 - def join_on(self, col):
1423 return self().join_on(col)
1424 1425 1426 if __name__ == "__main__": 1427 1428 # import json in Python 2 or 3 compatible forms 1429 from functools import partial 1430 try: 1431 import simplejson as json 1432 json_dumps = partial(json.dumps, indent=' ') 1433 except ImportError: 1434 import json 1435 json_dumps = partial(json.dumps, indent=2) 1436 1437 1438 rawdata = """\ 1439 Phoenix:AZ:85001:KPHX 1440 Phoenix:AZ:85001:KPHY 1441 Phoenix:AZ:85001:KPHA 1442 Dallas:TX:75201:KDFW""".splitlines() 1443 1444 # load miniDB 1445 stations = Table() 1446 #~ stations.create_index("city") 1447 stations.create_index("stn", unique=True) 1448 1449 fields = "city state zip stn".split() 1450 for d in rawdata: 1451 ob = DataObject() 1452 for k,v in zip(fields, d.split(':')): 1453 setattr(ob,k,v.strip()) 1454 stations.insert(ob) 1455 1456 # perform some queries and deletes 1457 for queryargs in [ 1458 dict(city="Phoenix"), 1459 dict(city="Phoenix", stn="KPHX"), 1460 dict(stn="KPHA", city="Phoenix"), 1461 dict(state="TX"), 1462 dict(city="New York"), 1463 ]: 1464 print(queryargs) 1465 result = stations.where(**queryargs) 1466 print(len(result)) 1467 for r in result: print (r) 1468 print('') 1469 #~ print stations.delete(city="Phoenix") 1470 #~ print stations.delete(city="Boston") 1471 print(list(stations.where())) 1472 print('') 1473 1474 amfm = Table() 1475 amfm.create_index("stn", unique=True) 1476 amfm.insert(DataObject(stn="KPHY", band="AM")) 1477 amfm.insert(DataObject(stn="KPHX", band="FM")) 1478 amfm.insert(DataObject(stn="KPHA", band="FM")) 1479 amfm.insert(DataObject(stn="KDFW", band="FM")) 1480 print(amfm.by.stn["KPHY"]) 1481 print(amfm.by.stn["KPHY"].band) 1482 1483 try: 1484 amfm.insert(DataObject(stn="KPHA", band="AM")) 1485 except KeyError: 1486 print("duplicate key not allowed") 1487 1488 print('') 1489 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1490 )(["stn", "city", (amfm,"band","AMFM"), 1491 (stations,"state","st")]).sort("AMFM"): 1492 print(repr(rec)) 1493 1494 print('') 1495 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1496 )(["stn", "city", (amfm,"band"), (stations,"state","st")]): 1497 print(json_dumps(vars(rec))) 1498 1499 print('') 1500 for rec in (stations.join_on("stn") + amfm.join_on("stn"))(): 1501 print(json_dumps(vars(rec))) 1502 1503 print('') 1504 stations.create_index("state") 1505 for az_stn in stations.by.state['AZ']: 1506 print(az_stn) 1507 1508 print('') 1509 pivot = stations.pivot("state") 1510 pivot.dump_counts() 1511 1512 print('') 1513 amfm.create_index("band") 1514 pivot = (stations.join_on("stn") + amfm)().pivot("state band") 1515 pivot.dump_counts() 1516