1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 __doc__ = """\
33
34 C{littletable} - a Python module to give ORM-like access to a collection of objects
35
36 The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a
37 collection of user objects. C{littletable} provides a L{DataObject} class for ad hoc creation
38 of semi-immutable objects that can be stored in a C{littletable} L{Table}.
39
40 In addition to basic insert/remove/query/delete access to the contents of a
41 Table, C{littletable} offers:
42 - simple indexing for improved retrieval performance, and optional enforcing key uniqueness
43 - access to objects using indexed attributes
44 - simplified joins using '+' operator syntax between annotated Tables
45 - the result of any query or join is a new first-class C{littletable} Table
46 - pivot on one or two attributes to gather tabulated data summaries
47 - easy import/export to CSV and JSON files
48
49 C{littletable} Tables do not require an upfront schema definition, but simply work off of the
50 attributes in the stored values, and those referenced in any query parameters.
51
52 Here is a simple C{littletable} data storage/retrieval example::
53
54 from littletable import Table, DataObject
55
56 customers = Table('customers')
57 customers.create_index("id", unique=True)
58 customers.insert(DataObject(id="0010", name="George Jetson"))
59 customers.insert(DataObject(id="0020", name="Wile E. Coyote"))
60 customers.insert(DataObject(id="0030", name="Jonny Quest"))
61
62 catalog = Table('catalog')
63 catalog.create_index("sku", unique=True)
64 catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100))
65 catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3))
66 catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8))
67 catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12))
68 print(catalog.by.sku["ANVIL-001"].descr)
69
70 wishitems = Table('wishitems')
71 wishitems.create_index("custid")
72 wishitems.create_index("sku")
73 wishitems.insert(DataObject(custid="0020", sku="ANVIL-001"))
74 wishitems.insert(DataObject(custid="0020", sku="BRDSD-001"))
75 wishitems.insert(DataObject(custid="0020", sku="MAGNT-001"))
76 wishitems.insert(DataObject(custid="0030", sku="MAGNT-001"))
77 wishitems.insert(DataObject(custid="0030", sku="MAGLS-001"))
78
79 # print a particular customer name
80 # (unique indexes will return a single item; non-unique
81 # indexes will return a list of all matching items)
82 print(customers.by.id["0030"].name)
83
84 # print all items sold by the pound
85 for item in catalog.where(unitofmeas="LB"):
86 print(item.sku, item.descr)
87
88 # print all items that cost more than 10
89 for item in catalog.where(lambda o : o.unitprice>10):
90 print(item.sku, item.descr, item.unitprice)
91
92 # join tables to create queryable wishlists collection
93 wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku")
94
95 # print all wishlist items with price > 10
96 bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10)
97 for item in bigticketitems:
98 print(item)
99
100 # list all wishlist items in descending order by price
101 for item in wishlists().sort("unitprice desc"):
102 print(item)
103 """
104
105 __version__ = "0.10"
106 __versionTime__ = "14 Jul 2016 11:52"
107 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
108
109 import sys
110 PY_2 = sys.version_info[0] == 2
111 PY_3 = sys.version_info[0] == 3
112
113 from collections import defaultdict, deque, namedtuple
114 from itertools import groupby,islice,starmap,repeat
115 if PY_2:
116 from itertools import ifilter as filter
117
118 try:
119 from collections import OrderedDict as ODict
120 except ImportError:
121
122
123 ODict = dict
124
125 from operator import attrgetter
126 import csv
127 import json
128 import re
129
130 _consumer = deque(maxlen=0)
131 do_all = _consumer.extend
132
133 try:
134 from itertools import product
135 except ImportError:
137 tupleseqs = [[(x,) for x in s] for s in seqs]
138 def _product(*seqs):
139 if len(seqs) == 1:
140 for x in seqs[0]:
141 yield x
142 else:
143 for x in seqs[0]:
144 for p in _product(*seqs[1:]):
145 yield x+p
146 for p in _product(*tupleseqs):
147 yield p
148
149 if PY_3:
150 basestring = str
151
152 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"]
153
155 if hasattr(obj, "__dict__"):
156
157 return obj.__dict__.keys()
158 elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
159
160 return obj._fields
161 elif hasattr(obj, "__slots__"):
162 return obj.__slots__
163 else:
164 raise ValueError("object with unknown attributes")
165
167 if hasattr(obj, "__dict__"):
168
169 return json.dumps(obj.__dict__)
170 elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
171
172 return json.dumps(ODict(zip(obj._fields, obj)))
173 elif hasattr(obj, "__slots__"):
174 return json.dumps(ODict((k,v) for k,v in zip(obj.__slots__,
175 (getattr(obj,a) for a in obj.__slots__))))
176 else:
177 raise ValueError("object with unknown attributes")
178
180 """A generic semi-mutable object for storing data values in a table. Attributes
181 can be set by passing in named arguments in the constructor, or by setting them
182 as C{object.attribute = value}. New attributes can be added any time, but updates
183 are ignored. Table joins are returned as a Table of DataObjects."""
185 if kwargs:
186 self.__dict__.update(kwargs)
188 return '{' + ', '.join(("%r: %r" % k_v) for k_v in self.__dict__.items()) + '}'
190
191 if attr not in self.__dict__:
192 super(DataObject,self).__setattr__(attr,val)
193 else:
194 raise AttributeError("can't set existing attribute")
196 if hasattr(self,k):
197 return getattr(self,k)
198 else:
199 raise KeyError("object has no such attribute " + k)
201 if k not in self.__dict__:
202 self.__dict__[k] = v
203 else:
204 raise KeyError("attribute already exists")
206 return self.__dict__ == other.__dict__
207
210 self.attr = attr
211 self.obs = defaultdict(list)
212 self.is_unique = False
214 self.obs[k].append(v)
216 return self.obs.get(k,[])
220 return iter(self.obs.keys())
221
223 return sorted(filter(None, self.obs.keys()))
225 return self.obs.items()
227 try:
228 k = getattr(obj, self.attr)
229 self.obs[k].remove(obj)
230 except (ValueError,AttributeError,KeyError):
231 pass
233 return key in self.obs
235 return self.__class__(self.attr)
236
238 - def __init__(self, attr, accept_none=False):
239 super(_UniqueObjIndex, self).__init__(attr)
240 self.obs = {}
241 self.is_unique = True
242 self.accept_none = accept_none
243 self.none_values = list()
245 if k is not None:
246 if k not in self.obs:
247 self.obs[k] = v
248 else:
249 raise KeyError("duplicate key value %s" % k)
250 else:
251 if self.accept_none:
252 self.none_values.append(v)
253 else:
254 raise ValueError("None is not a valid index key")
255
257 if k is not None:
258 return [self.obs.get(k)] if k in self.obs else []
259 else:
260 return list(self.none_values)
262 if k is not None:
263 return k in self.obs
264 else:
265 return self.accept_none and self.none_values
267 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
269 return [(k,[v]) for k,v in self.obs.items()]
271 k = getattr(obj, self.attr)
272 if k is not None:
273 self.obs.pop(k, None)
274 else:
275 try:
276 self.none_values.remove(obj)
277 except ValueError:
278 pass
279
284 return getattr(self._index, attr)
286 ret = Table()
287 if k in self._index:
288 ret.insert_many(self._index[k])
289 return ret
290
292 return k in self._index
293
294
297 if k is not None:
298 return self._index[k][0]
299 else:
300 ret = Table()
301 if k in self._index:
302 ret.insert_many(self._index[k])
303 return ret
304
308
310 """A quick way to query for matching records using their indexed attributes. The attribute
311 name is used to locate the index, and returns a wrapper on the index. This wrapper provides
312 dict-like access to the underlying records in the table, as in::
313
314 employees.by.socsecnum["000-00-0000"]
315 customers.by.zipcode["12345"]
316
317 (C{'by'} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes
318 are not attributes of the table, but of items in the table.)
319
320 The behavior differs slightly for unique and non-unique indexes:
321 - if the index is unique, then retrieving a matching object, will return just the object;
322 if there is no matching object, C{KeyError} is raised (making a table with a unique
323 index behave very much like a Python dict)
324 - if the index is non-unique, then all matching objects will be returned in a new Table,
325 just as if a regular query had been performed; if no objects match the key value, an empty
326 Table is returned and no exception is raised.
327
328 If there is no index defined for the given attribute, then C{AttributeError} is raised.
329 """
330 if attr in self.table._indexes:
331 ret = self.table._indexes[attr]
332 if isinstance(ret, _UniqueObjIndex):
333 ret = _UniqueObjIndexWrapper(ret)
334 if isinstance(ret, _ObjIndex):
335 ret = _ObjIndexWrapper(ret)
336 return ret
337 raise AttributeError("Table '%s' has no index '%s'" % (self.table.table_name, attr))
338
339
341 """Table is the main class in C{littletable}, for representing a collection of DataObjects or
342 user-defined objects with publicly accessible attributes or properties. Tables can be:
343 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>}
344 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index}
345 - queried, specifying values to exact match in the desired records, see L{where}
346 - filtered (using L{where}), using a simple predicate function to match desired records;
347 useful for selecting using inequalities or compound conditions
348 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__}
349 - joined, using L{join_on} to identify attribute to be used for joining with another table, and
350 L{join} or operator '+' to perform the actual join
351 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects
352 by attribute values
353 - grouped, using L{groupby} to create a summary table of computed values, grouped by a key
354 attribute
355 - L{imported<csv_import>}/L{exported<csv_export>} to CSV-format files
356 Queries and joins return their results as new Table objects, so that queries and joins can
357 be easily performed as a succession of operations.
358 """
360 """Create a new, empty Table.
361 @param table_name: name for Table
362 @type table_name: string (optional)
363 """
364 self(table_name)
365 self.obs = []
366 self._indexes = {}
367 self._uniqueIndexes = []
368 self.by = _IndexAccessor(self)
369 """
370 C{'by'} is added as a pseudo-attribute on tables, to provide
371 dict-like access to the underlying records in the table by index key, as in::
372
373 employees.by.socsecnum["000-00-0000"]
374 customers.by.zipcode["12345"]
375
376 The behavior differs slightly for unique and non-unique indexes:
377 - if the index is unique, then retrieving a matching object, will return just the object;
378 if there is no matching object, C{KeyError} is raised (making a table with a unique
379 index behave very much like a Python dict)
380 - if the index is non-unique, then all matching objects will be returned in a new Table,
381 just as if a regular query had been performed; if no objects match the key value, an empty
382 Table is returned and no exception is raised.
383
384 If there is no index defined for the given attribute, then C{AttributeError} is raised.
385 """
386
388 """Return the number of objects in the Table."""
389 return len(self.obs)
390
392 """Create an iterator over the objects in the Table."""
393 return iter(self.obs)
394
396 """Provides direct indexed/sliced access to the Table's underlying list of objects."""
397 if isinstance(i, slice):
398 ret = self.copy_template()
399 ret.insert_many(self.obs[i])
400 return ret
401 else:
402 return self.obs[i]
403
405 """(Deprecated) A quick way to query for matching records using their indexed attributes. The attribute
406 name is used to locate the index, and returns a wrapper on the index. This wrapper provides
407 dict-like access to the underlying records in the table, as in::
408
409 employees.socsecnum["000-00-0000"]
410 customers.zipcode["12345"]
411
412 (L{by} is added as a pseudo-attribute on tables, to help indicate that the indexed attributes
413 are not attributes of the table, but of items in the table. Use of C{'by'} is preferred, and
414 will replace direct attribute access in a future release.)::
415
416 employees.by.socsecnum["000-00-0000"]
417 customers.by.zipcode["12345"]
418
419 The behavior differs slightly for unique and non-unique indexes:
420 - if the index is unique, then retrieving a matching object, will return just the object;
421 if there is no matching object, C{KeyError} is raised (making a table with a unique
422 index behave very much like a Python dict)
423 - if the index is non-unique, then all matching objects will be returned in a new Table,
424 just as if a regular query had been performed; if no objects match the key value, an empty
425 Table is returned and no exception is raised.
426
427 If there is no index defined for the given attribute, then C{AttributeError} is raised.
428 """
429 if attr in self._indexes:
430 ret = self._indexes[attr]
431 if isinstance(ret, _UniqueObjIndex):
432 ret = _UniqueObjIndexWrapper(ret)
433 if isinstance(ret, _ObjIndex):
434 ret = _ObjIndexWrapper(ret)
435 return ret
436 raise AttributeError("Table '%s' has no index '%s'" % (self.table_name, attr))
437
439 return bool(self.obs)
440
441 __nonzero__ = __bool__
442
444 """Support UNION of 2 tables using "+" operator."""
445 if isinstance(other, JoinTerm):
446
447 return other + self
448 elif isinstance(other, Table):
449
450 return self.union(other)
451 else:
452
453 return self.clone().insert_many(other)
454
456 """Support UNION of 2 tables using "+=" operator."""
457 return self.insert_many(other)
458
461
463 """A simple way to assign a name to a table, such as those
464 dynamically created by joins and queries.
465 @param table_name: name for Table
466 @type table_name: string
467 """
468 if table_name is not None:
469 self.table_name = table_name
470 return self
471
473 """Create empty copy of the current table, with copies of all
474 index definitions.
475 """
476 ret = Table(self.table_name)
477 ret._indexes.update(dict((k,v.copy_template()) for k,v in self._indexes.items()))
478 ret(name)
479 return ret
480
481 - def clone(self, name=None):
482 """Create full copy of the current table, including table contents
483 and index definitions.
484 """
485 ret = self.copy_template().insert_many(self.obs)(name)
486 return ret
487
488 - def create_index(self, attr, unique=False, accept_none=False):
489 """Create a new index on a given attribute.
490 If C{unique} is True and records are found in the table with duplicate
491 attribute values, the index is deleted and C{KeyError} is raised.
492
493 If the table already has an index on the given attribute, then no
494 action is taken and no exception is raised.
495 @param attr: the attribute to be used for indexed access and joins
496 @type attr: string
497 @param unique: flag indicating whether the indexed field values are
498 expected to be unique across table entries
499 @type unique: boolean
500 @param accept_none: flag indicating whether None is an acceptable
501 unique key value for this attribute
502 @type accept_none: boolean
503 """
504 if attr in self._indexes:
505 return self
506
507 if unique:
508 self._indexes[attr] = _UniqueObjIndex(attr,accept_none)
509 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
510 else:
511 self._indexes[attr] = _ObjIndex(attr)
512 accept_none = True
513 ind = self._indexes[attr]
514 try:
515 for obj in self.obs:
516 obval = getattr(obj, attr, None)
517 if obval is not None or accept_none:
518 ind[obval] = obj
519 else:
520 raise KeyError("None is not an allowed key")
521 return self
522
523 except KeyError:
524 del self._indexes[attr]
525 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
526 raise
527
529 """Deletes an index from the Table. Can be used to drop and rebuild an index,
530 or to convert a non-unique index to a unique index, or vice versa.
531 @param attr: name of an indexed attribute
532 @type attr: string
533 """
534 if attr in self._indexes:
535 del self._indexes[attr]
536 self._uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
537 return self
538
540 """Insert a new object into this Table.
541 @param obj: any Python object -
542 Objects can be constructed using the defined DataObject type, or they can
543 be any Python object that does not use the Python C{__slots__} feature; C{littletable}
544 introspects the object's C{__dict__} or C{_fields} attributes to obtain join and
545 index attributes and values.
546
547 If the table contains a unique index, and the record to be inserted would add
548 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the
549 object is not inserted.
550
551 If the table has no unique indexes, then it is possible to insert duplicate
552 objects into the table.
553 """
554
555
556 uniqueIndexes = self._uniqueIndexes
557 NO_SUCH_ATTR = object()
558 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none)
559 or (getattr(obj, ind.attr, NO_SUCH_ATTR) in ind)
560 for ind in uniqueIndexes):
561
562 for ind in uniqueIndexes:
563 if (getattr(obj, ind.attr, None) is None and not ind.accept_none):
564 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj)
565 if getattr(obj, ind.attr) in ind:
566 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj)
567
568 self.obs.append(obj)
569 for attr, ind in self._indexes.items():
570 obval = getattr(obj, attr)
571 ind[obval] = obj
572 return self
573
575 """Inserts a collection of objects into the table."""
576 do_all(self.insert(ob) for ob in it)
577 return self
578
580 """Removes an object from the table. If object is not in the table, then
581 no action is taken and no exception is raised."""
582
583 do_all(ind.remove(ob) for attr,ind in self._indexes.items())
584
585
586 self.obs.remove(ob)
587
588 return self
589
591 """Removes a collection of objects from the table."""
592 do_all(self.remove(ob) for ob in it)
593 return self
594
596 """Used to order where keys by most selective key first"""
597 attr,v = attr_val
598 if attr in self._indexes:
599 idx = self._indexes[attr]
600 if v in idx:
601 return len(idx[v])
602 else:
603 return 0
604 else:
605 return 1e9
606
607 - def where(self, wherefn=None, **kwargs):
608 """
609 Retrieves matching objects from the table, based on given
610 named parameters. If multiple named parameters are given, then
611 only objects that satisfy all of the query criteria will be returned.
612
613 Special named args:
614 - C{_orderby="attr,..."} - (Deprecated) resulting table should sort content objects
615 by the C{attr}s given in a comma-separated string; to sort in
616 descending order, reference the attribute as C{attr desc}.
617
618 - C{_limit} - maximum number of records to return
619
620 @param wherefn: a method or lambda that returns a boolean result, as in::
621
622 lambda ob : ob.unitprice > 10
623
624 @type wherefn: callable(object) returning boolean
625
626 @param kwargs: attributes for selecting records, given as additional
627 named arguments of the form C{attrname="attrvalue"}.
628
629 @return: a new Table containing the matching objects
630 """
631
632 flags = dict((k, kwargs.pop(k)) for k,v in kwargs.items() if k.startswith("_"))
633
634 if kwargs:
635
636
637
638
639 kwargs = kwargs.items()
640 if len(kwargs) > 1 and len(self) > 100:
641 kwargs = sorted(kwargs, key=self._query_attr_sort_fn)
642
643 ret = self
644 NO_SUCH_ATTR = object()
645 for k,v in kwargs:
646 newret = ret.copy_template()
647 if k in ret._indexes:
648 newret.insert_many(ret._indexes[k][v])
649 else:
650 newret.insert_many(r for r in ret.obs if getattr(r, k, NO_SUCH_ATTR) == v )
651 ret = newret
652 if not ret:
653 break
654 else:
655 ret = self.clone()
656
657 if ret and wherefn is not None:
658 newret = ret.copy_template()
659 newret.insert_many(filter(wherefn, ret.obs))
660 ret = newret
661
662 if ret:
663
664
665 if flags:
666 if '_orderby' in flags and len(ret) > 1:
667 ret.sort(flags['_orderby'])
668 if '_limit' in flags:
669 del ret.obs[flags['_limit']:]
670
671 return ret
672
674 """Deletes matching objects from the table, based on given
675 named parameters. If multiple named parameters are given, then
676 only objects that satisfy all of the query criteria will be removed.
677 @param kwargs: attributes for selecting records, given as additional
678 named arguments of the form C{attrname="attrvalue"}.
679 @return: the number of objects removed from the table
680 """
681 if not kwargs:
682 return 0
683
684 affected = self.where(**kwargs)
685 self.remove_many(affected)
686 return len(affected)
687
688 - def sort(self, key, reverse=False):
689 """Sort Table in place, using given fields as sort key.
690 @param key: if this is a string, it is a comma-separated list of field names,
691 optionally followed by 'desc' to indicate descending sort instead of the
692 default ascending sort; if a list or tuple, it is a list or tuple of field names
693 or field names with ' desc' appended; if it is a function, then it is the
694 function to be used as the sort key function
695 @return: self
696 """
697 if isinstance(key, (basestring,list,tuple)):
698 if isinstance(key, basestring):
699 attrdefs = [s.strip() for s in key.split(',')]
700
701
702 attr_orders = [(a.split()+['asc',])[:2] for a in attrdefs][::-1]
703 else:
704
705 attr_orders = key
706 attrs = [attr for attr,order in attr_orders]
707
708
709 if all(order=='asc' for attr,order in attr_orders):
710 self.obs.sort(key=attrgetter(*attrs), reverse=reverse)
711 elif all(order=='desc' for attr,order in attr_orders):
712 self.obs.sort(key=attrgetter(*attrs), reverse=not reverse)
713 else:
714
715 do_all(self.obs.sort(key=attrgetter(attr), reverse=(order=="desc"))
716 for attr,order in attr_orders)
717 else:
718 keyfn = key
719 self.obs.sort(key=keyfn, reverse=reverse)
720 return self
721
722 - def select(self, fields, **exprs):
723 """
724 Create a new table containing a subset of attributes, with optionally
725 newly-added fields computed from each rec in the original table.
726
727 Special kwargs:
728 - C{_unique=True} - (Deprecated) only return a set of unique rows
729
730 @param fields: list of strings, or single space-delimited string, listing attribute name to be included in the output
731 @type fields: list, or space-delimited string
732 @param exprs: one or more named callable arguments, to compute additional fields using the given function
733 @type exprs: C{name=callable}, callable takes the record as an argument, and returns the new attribute value
734 If a string is passed as a callable, this string will be used using string formatting, given the record
735 as a source of interpolation values. For instance, C{fullName = '%(lastName)s, %(firstName)s'}
736
737 """
738 if isinstance(fields, basestring):
739 fields = fields.split()
740
741 unique = exprs.pop('_unique', False)
742
743 def _makeStringCallable(expr):
744 if isinstance(expr,basestring):
745 return lambda rec: expr % rec
746 else:
747 return expr
748
749 exprs = dict((k, _makeStringCallable(v)) for k,v in exprs.items())
750
751 raw_tuples = []
752 for rec in self.obs:
753 attrvalues = tuple(getattr(rec, fieldname, None) for fieldname in fields)
754 if exprs:
755 attrvalues += tuple(expr(rec) for expr in exprs.values())
756 raw_tuples.append(attrvalues)
757
758 if unique:
759 raw_tuples = list(set(raw_tuples))
760
761 allNames = tuple(fields) + tuple(exprs.keys())
762 return Table().insert_many(DataObject(**dict(zip(allNames, outtuple))) for outtuple in raw_tuples)
763
790
791 - def join(self, other, attrlist=None, auto_create_indexes=True, **kwargs):
792 """
793 Join the objects of one table with the objects of another, based on the given
794 matching attributes in the named arguments. The attrlist specifies the attributes to
795 be copied from the source tables - if omitted, all attributes will be copied. Entries
796 in the attrlist may be single attribute names, or if there are duplicate names in both
797 tables, then a C{(table,attributename)} tuple can be given to disambiguate which
798 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to
799 rename an attribute from a source table.
800
801 This method may be called directly, or can be constructed using the L{join_on} method and
802 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")}
803 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm
804 or tables can be added to construct a compound join expression. When complete, the
805 join expression gets executed by calling the resulting join definition,
806 using C{join_expression([attrlist])}.
807
808 @param other: other table to join to
809 @param attrlist: list of attributes to be copied to the new joined table; if
810 none provided, all attributes of both tables will be used (taken from the first
811 object in each table)
812 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples
813 (list may contain both strings and tuples)
814 @param kwargs: attributes to join on, given as additional named arguments
815 of the form C{table1attr="table2attr"}, or a dict mapping attribute names.
816 @returns: a new Table containing the joined data as new DataObjects
817 """
818 if not kwargs:
819 raise TypeError("must specify at least one join attribute as a named argument")
820 thiscol,othercol = next(iter(kwargs.items()))
821
822 retname = ("(%s:%s^%s:%s)" %
823 (self.table_name, thiscol, other.table_name, othercol))
824
825 if not (self.obs and other.obs):
826 return Table(retname)
827
828 if isinstance(attrlist, basestring):
829 attrlist = re.split(r'[,\s]+', attrlist)
830
831
832 thisnames = set(_object_attrnames(self.obs[0]))
833 othernames = set(_object_attrnames(other.obs[0]))
834 fullcols = []
835 if attrlist is not None:
836 for col in attrlist:
837 if isinstance(col, tuple):
838
839
840 fullcols.append((col + (col[1],))[:3])
841 else:
842 if col in thisnames:
843 fullcols.append( (self, col, col) )
844 elif col in othernames:
845 fullcols.append( (other, col, col) )
846 else:
847 raise ValueError("join attribute not found: " + col)
848 else:
849 fullcols = [(self,n,n) for n in thisnames]
850 fullcols += [(other,n,n) for n in othernames]
851
852 thiscols = list(filter(lambda o:o[0] is self, fullcols))
853 othercols = list(filter(lambda o:o[0] is other, fullcols))
854
855 if auto_create_indexes:
856 if thiscol not in self._indexes:
857 self.create_index(thiscol)
858 if othercol not in other._indexes:
859 other.create_index(othercol)
860
861 thiscolindex = othercolindex = None
862 if thiscol in self._indexes:
863 thiscolindex = self._indexes[thiscol]
864 else:
865 raise ValueError("indexed attribute required for join: "+thiscol)
866 if othercol in other._indexes:
867 othercolindex = other._indexes[othercol]
868 else:
869 raise ValueError("indexed attribute required for join: "+othercol)
870
871
872 if len(thiscolindex) < len(othercolindex):
873 shortindex, longindex = (thiscolindex, othercolindex)
874 swap = False
875 else:
876 shortindex, longindex = (othercolindex, thiscolindex)
877 swap = True
878
879
880 matchingrows = list((longindex[key],rows) if swap else (rows, longindex[key])
881 for key,rows in shortindex.items())
882
883 joinrows = []
884 for thisrows,otherrows in matchingrows:
885 for trow,orow in product(thisrows,otherrows):
886 retobj = DataObject()
887 do_all(setattr(retobj, a, getattr(trow,c)) for _,c,a in thiscols)
888 do_all(setattr(retobj, a, getattr(orow,c)) for _,c,a in othercols if not hasattr(retobj,a))
889 joinrows.append(retobj)
890
891 ret = Table(retname)
892 for tbl,collist in zip([self,other],[thiscols,othercols]):
893 for _,c,a in collist:
894 if c in tbl._indexes:
895 if a not in ret._indexes:
896 ret.create_index(a)
897 ret.insert_many(joinrows)
898 return ret
899
901 """Creates a JoinTerm in preparation for joining with another table, to
902 indicate what attribute should be used in the join. Only indexed attributes
903 may be used in a join.
904 @param attr: attribute name to join from this table (may be different
905 from the attribute name in the table being joined to)
906 @type attr: string
907 @returns: L{JoinTerm}"""
908 if attr not in self._indexes:
909 raise ValueError("can only join on indexed attributes")
910 return JoinTerm(self, attr)
911
912 - def pivot(self, attrlist):
913 """Pivots the data using the given attributes, returning a L{PivotTable}.
914 @param attrlist: list of attributes to be used to construct the pivot table
915 @type attrlist: list of strings, or string of space-delimited attribute names
916 """
917 if isinstance(attrlist, basestring):
918 attrlist = attrlist.split()
919 if all(a in self._indexes for a in attrlist):
920 return PivotTable(self,[],attrlist)
921 else:
922 raise ValueError("pivot can only be called using indexed attributes")
923
924 - def _import(self, source, encoding, transforms=None, reader=csv.DictReader):
925 close_on_exit = False
926 if isinstance(source, basestring):
927 if PY_3:
928 source = open(source, encoding=encoding)
929 else:
930 source = open(source)
931 close_on_exit = True
932 try:
933 csvdata = reader(source)
934 self.insert_many(DataObject(**s) for s in csvdata)
935 if transforms:
936 for attr,fn in transforms.items():
937 default = None
938 if isinstance(fn,tuple):
939 fn,default = fn
940 objfn = lambda obj : fn(getattr(obj,attr))
941 self.add_field(attr, objfn, default)
942 finally:
943 if close_on_exit:
944 source.close()
945
946 - def csv_import(self, csv_source, encoding='UTF-8', transforms=None, **kwargs):
947 """Imports the contents of a CSV-formatted file into this table.
948 @param csv_source: CSV file - if a string is given, the file with that name will be
949 opened, read, and closed; if a file object is given, then that object
950 will be read as-is, and left for the caller to be closed.
951 @type csv_source: string or file
952 @param encoding: encoding to be used for reading source text if C{csv_source} is
953 passed as a string filename
954 @type encoding: string (default='UTF-8')
955 @param transforms: dict of functions by attribute name; if given, each
956 attribute will be transformed using the corresponding transform; if there is no
957 matching transform, the attribute will be read as a string (default); the
958 transform function can also be defined as a (function, default-value) tuple; if
959 there is an Exception raised by the transform function, then the attribute will
960 be set to the given default value
961 @type transforms: dict (optional)
962 @param kwargs: additional constructor arguments for csv C{DictReader} objects, such as C{delimiter}
963 or C{fieldnames}; these are passed directly through to the csv C{DictReader} constructor
964 @type kwargs: named arguments (optional)
965 """
966 reader_args = dict((k,v) for k,v in kwargs.items() if k not in ['encoding', 'csv_source', 'transforms'])
967 reader = lambda src: csv.DictReader(src, **reader_args)
968 return self._import(csv_source, encoding, transforms, reader=reader)
969
970 - def _xsv_import(self, xsv_source, transforms=None, splitstr="\t"):
971 xsv_reader = lambda src: csv.DictReader(src, delimiter=splitstr)
972 return self._import(xsv_source, transforms, reader=xsv_reader)
973
974 - def tsv_import(self, xsv_source, transforms=None):
975 """Imports the contents of a tab-separated data file into this table.
976 @param xsv_source: tab-separated data file - if a string is given, the file with that name will be
977 opened, read, and closed; if a file object is given, then that object
978 will be read as-is, and left for the caller to be closed.
979 @type xsv_source: string or file
980 @param transforms: dict of functions by attribute name; if given, each
981 attribute will be transformed using the corresponding transform; if there is no
982 matching transform, the attribute will be read as a string (default); the
983 transform function can also be defined as a (function, default-value) tuple; if
984 there is an Exception raised by the transform function, then the attribute will
985 be set to the given default value
986 @type transforms: dict (optional)
987 """
988 return self._xsv_import(xsv_source, transforms=transforms, splitstr="\t")
989
990 - def csv_export(self, csv_dest, fieldnames=None, encoding="UTF-8"):
991 """Exports the contents of the table to a CSV-formatted file.
992 @param csv_dest: CSV file - if a string is given, the file with that name will be
993 opened, written, and closed; if a file object is given, then that object
994 will be written as-is, and left for the caller to be closed.
995 @type csv_dest: string or file
996 @param fieldnames: attribute names to be exported; can be given as a single
997 string with space-delimited names, or as a list of attribute names
998 @type fieldnames: list of strings
999 @param encoding: string (default="UTF-8"); if csv_dest is provided as a string
1000 representing an output filename, an encoding argument can be provided (Python 3 only)
1001 @type encoding: string
1002 """
1003 close_on_exit = False
1004 if isinstance(csv_dest, basestring):
1005 if PY_3:
1006 csv_dest = open(csv_dest,'w', encoding=encoding)
1007 else:
1008 csv_dest = open(csv_dest,'w')
1009 close_on_exit = True
1010 try:
1011 if fieldnames is None:
1012 fieldnames = list(_object_attrnames(self.obs[0]))
1013 if isinstance(fieldnames, basestring):
1014 fieldnames = fieldnames.split()
1015
1016 csv_dest.write(','.join(fieldnames) + '\n')
1017 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore')
1018 if hasattr(self.obs[0], "__dict__"):
1019 do_all(csvout.writerow(o.__dict__) for o in self.obs)
1020 else:
1021 do_all(csvout.writerow(ODict(starmap(lambda obj, fld: (fld, getattr(obj, fld)),
1022 zip(repeat(o), fieldnames)))) for o in self.obs)
1023 finally:
1024 if close_on_exit:
1025 csv_dest.close()
1026
1027 - def json_import(self, source, encoding="UTF-8", transforms=None):
1028 """Imports the contents of a JSON data file into this table.
1029 @param source: JSON data file - if a string is given, the file with that name will be
1030 opened, read, and closed; if a file object is given, then that object
1031 will be read as-is, and left for the caller to be closed.
1032 @type source: string or file
1033 @param transforms: dict of functions by attribute name; if given, each
1034 attribute will be transformed using the corresponding transform; if there is no
1035 matching transform, the attribute will be read as a string (default); the
1036 transform function can also be defined as a (function, default-value) tuple; if
1037 there is an Exception raised by the transform function, then the attribute will
1038 be set to the given default value
1039 @type transforms: dict (optional)
1040 """
1041 class _JsonFileReader(object):
1042 def __init__(self, src):
1043 self.source = src
1044 def __iter__(self):
1045 current = ''
1046 for line in self.source:
1047 if current:
1048 current += ' '
1049 current += line
1050 try:
1051 yield json.loads(current)
1052 current = ''
1053 except Exception:
1054 pass
1055 return self._import(source, encoding, transforms=transforms, reader=_JsonFileReader)
1056
1057 - def json_export(self, dest, fieldnames=None, encoding="UTF-8"):
1058 """Exports the contents of the table to a JSON-formatted file.
1059 @param dest: output file - if a string is given, the file with that name will be
1060 opened, written, and closed; if a file object is given, then that object
1061 will be written as-is, and left for the caller to be closed.
1062 @type dest: string or file
1063 @param fieldnames: attribute names to be exported; can be given as a single
1064 string with space-delimited names, or as a list of attribute names
1065 @type fieldnames: list of strings
1066 @param encoding: string (default="UTF-8"); if csv_dest is provided as a string
1067 representing an output filename, an encoding argument can be provided (Python 3 only)
1068 @type encoding: string
1069 """
1070 close_on_exit = False
1071 if isinstance(dest, basestring):
1072 if PY_3:
1073 dest = open(dest,'w', encoding=encoding)
1074 else:
1075 dest = open(dest,'w')
1076 close_on_exit = True
1077 try:
1078 if isinstance(fieldnames, basestring):
1079 fieldnames = fieldnames.split()
1080
1081 if fieldnames is None:
1082 do_all(
1083 dest.write(_to_json(o)+'\n') for o in self.obs
1084 )
1085 else:
1086 do_all(
1087 dest.write(json.dumps(ODict((f, getattr(o, f)) for f in fieldnames))+'\n')
1088 for o in self.obs
1089 )
1090 finally:
1091 if close_on_exit:
1092 dest.close()
1093
1094 - def add_field(self, attrname, fn, default=None):
1095 """Computes a new attribute for each object in table, or replaces an
1096 existing attribute in each record with a computed value
1097 @param attrname: attribute to compute for each object
1098 @type attrname: string
1099 @param fn: function used to compute new attribute value, based on
1100 other values in the object, as in::
1101
1102 lambda ob : ob.commission_pct/100.0 * ob.gross_sales
1103
1104 @type fn: function(obj) returns value
1105 @param default: value to use if an exception is raised while trying
1106 to evaluate fn
1107 """
1108
1109 def _addFieldToRec(rec, fn=fn, default=default):
1110 try:
1111 val = fn(rec)
1112 except Exception:
1113 val = default
1114 if isinstance(rec, DataObject):
1115 rec.__dict__[attrname] = val
1116 else:
1117 setattr(rec, attrname, val)
1118 do_all(_addFieldToRec(r) for r in self)
1119 return self
1120
1121 addfield = add_field
1122 """(Deprecated) Legacy method to add a field to all objects in table; to be replaced by L{add_field}.
1123 """
1124
1125 - def groupby(self, keyexpr, **outexprs):
1126 """simple prototype of group by, with support for expressions in the group-by clause
1127 and outputs
1128 @param keyexpr: grouping field and optional expression for computing the key value;
1129 if a string is passed
1130 @type keyexpr: string or tuple
1131 @param outexprs: named arguments describing one or more summary values to
1132 compute per key
1133 @type outexprs: callable, taking a sequence of objects as input and returning
1134 a single summary value
1135 """
1136 if isinstance(keyexpr, basestring):
1137 keyattrs = keyexpr.split()
1138 keyfn = lambda o : tuple(getattr(o, k) for k in keyattrs)
1139
1140 elif isinstance(keyexpr, tuple):
1141 keyattrs = (keyexpr[0],)
1142 keyfn = keyexpr[1]
1143
1144 else:
1145 raise TypeError("keyexpr must be string or tuple")
1146
1147 groupedobs = defaultdict(list)
1148 do_all(groupedobs[keyfn(ob)].append(ob) for ob in self.obs)
1149
1150 tbl = Table()
1151 do_all(tbl.create_index(k, unique=(len(keyattrs)==1)) for k in keyattrs)
1152 for key, recs in sorted(groupedobs.iteritems()):
1153 groupobj = DataObject(**dict(zip(keyattrs, key)))
1154 do_all(setattr(groupobj, subkey, expr(recs))
1155 for subkey, expr in outexprs.items())
1156 tbl.insert(groupobj)
1157 return tbl
1158
1160 """(Deprecated) Returns the Table. Will be removed in a future release.
1161 """
1162 return self
1163
1165 """
1166 Create a new table of objects,containing no duplicate values.
1167
1168 @param key: (default=None) optional callable for computing a representative unique key for each
1169 object in the table. If None, then a key will be composed as a tuple of all the values in the object.
1170 @type key: callable, takes the record as an argument, and returns the key value or tuple to be used
1171 to represent uniqueness.
1172 """
1173 ret = self.copy_template()
1174 seen = set()
1175 for rec in self:
1176 if key is None:
1177 try:
1178 ob_dict = vars(rec)
1179 except TypeError:
1180 ob_dict = dict((k, getattr(rec, k)) for k in _object_attrnames(rec))
1181 reckey = tuple(sorted(ob_dict.items()))
1182 else:
1183 reckey = key(rec)
1184 if reckey not in seen:
1185 seen.add(reckey)
1186 ret.insert(rec)
1187 return ret
1188
1190 """Enhanced Table containing pivot results from calling table.pivot().
1191 """
1192 - def __init__(self, parent, attr_val_path, attrlist):
1193 """PivotTable initializer - do not create these directly, use
1194 L{Table.pivot}.
1195 """
1196 super(PivotTable,self).__init__()
1197 self._attr_path = attr_val_path[:]
1198 self._pivot_attrs = attrlist[:]
1199 self._subtable_dict = {}
1200
1201
1202
1203 self._indexes.update(dict((k,v.copy_template()) for k,v in parent._indexes.items()))
1204 if not attr_val_path:
1205 self.insert_many(parent.obs)
1206 else:
1207 attr,val = attr_val_path[-1]
1208 self.insert_many(parent.where(**{attr:val}))
1209 parent._subtable_dict[val] = self
1210
1211 if len(attrlist) > 0:
1212 this_attr = attrlist[0]
1213 sub_attrlist = attrlist[1:]
1214 ind = parent._indexes[this_attr]
1215 self.subtables = [ PivotTable(self,
1216 attr_val_path + [(this_attr,k)],
1217 sub_attrlist) for k in sorted(ind.keys()) ]
1218 else:
1219 self.subtables = []
1220
1222 if self._subtable_dict:
1223 return self._subtable_dict[val]
1224 else:
1225 return super(PivotTable,self).__getitem__(val)
1226
1228 return sorted(self._subtable_dict.keys())
1229
1231 return sorted(self._subtable_dict.items())
1232
1234 return [self._subtable_dict.items[k] for k in self.keys()]
1235
1237 """Return the set of attribute-value pairs that define the contents of this
1238 table within the original source table.
1239 """
1240 return self._attr_path
1241
1243 """Return the pivot_key as a displayable string.
1244 """
1245 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
1246
1248 """Return whether this table has further subtables.
1249 """
1250 return bool(self.subtables)
1251
1252 - def dump(self, out=sys.stdout, row_fn=repr, limit=-1, indent=0):
1253 """Dump out the contents of this table in a nested listing.
1254 @param out: output stream to write to
1255 @param row_fn: function to call to display individual rows
1256 @param limit: number of records to show at deepest level of pivot (-1=show all)
1257 @param indent: current nesting level
1258 """
1259 NL = '\n'
1260 if indent:
1261 out.write(" "*indent + self.pivot_key_str())
1262 else:
1263 out.write("Pivot: %s" % ','.join(self._pivot_attrs))
1264 out.write(NL)
1265 if self.has_subtables():
1266 do_all(sub.dump(out, row_fn, limit, indent+1) for sub in self.subtables if sub)
1267 else:
1268 if limit >= 0:
1269 showslice = slice(0,limit)
1270 else:
1271 showslice = slice(None,None)
1272 do_all(out.write(" "*(indent+1) + row_fn(r) + NL)
1273 for r in self.obs[showslice])
1274 out.flush()
1275
1276 - def dump_counts(self, out=sys.stdout, count_fn=len, colwidth=10):
1277 """Dump out the summary counts of entries in this pivot table as a tabular listing.
1278 @param out: output stream to write to
1279 """
1280 if len(self._pivot_attrs) == 1:
1281 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs))
1282 maxkeylen = max(len(str(k)) for k in self.keys())
1283 maxvallen = colwidth
1284 keytally = {}
1285 for k, sub in self.items():
1286 sub_v = count_fn(sub)
1287 maxvallen = max(maxvallen, len(str(sub_v)))
1288 keytally[k] = sub_v
1289 for k,sub in self.items():
1290 out.write("%-*.*s " % (maxkeylen,maxkeylen,k))
1291 out.write("%*s\n" % (maxvallen,keytally[k]))
1292 elif len(self._pivot_attrs) == 2:
1293 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs))
1294 maxkeylen = max(max(len(str(k)) for k in self.keys()),5)
1295 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),colwidth)
1296 keytally = dict((k,0) for k in self.subtables[0].keys())
1297 out.write("%*s " % (maxkeylen,''))
1298 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys()))
1299 out.write(' %*s\n' % (maxvallen, 'Total'))
1300 for k,sub in self.items():
1301 out.write("%-*.*s " % (maxkeylen,maxkeylen,k))
1302 for kk,ssub in sub.items():
1303 ssub_v = count_fn(ssub)
1304 out.write("%*d " % (maxvallen,ssub_v))
1305 keytally[kk] += ssub_v
1306 maxvallen = max(maxvallen, len(str(ssub_v)))
1307 sub_v = count_fn(sub)
1308 maxvallen = max(maxvallen, len(str(sub_v)))
1309 out.write("%*d\n" % (maxvallen,sub_v))
1310 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total"))
1311 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items())))
1312 out.write(" %*d\n" % (maxvallen,sum(tally for k,tally in keytally.items())))
1313 else:
1314 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1315
1317 """Dump out the summary counts of this pivot table as a Table.
1318 """
1319 if summarycolname is None:
1320 summarycolname = col
1321 ret = Table()
1322 topattr = self._pivot_attrs[0]
1323 do_all(ret.create_index(attr) for attr in self._pivot_attrs)
1324 if len(self._pivot_attrs) == 1:
1325 for sub in self.subtables:
1326 subattr,subval = sub._attr_path[-1]
1327 attrdict = {subattr:subval}
1328 if fn is None:
1329 attrdict['Count'] = len(sub)
1330 else:
1331 attrdict[summarycolname] = fn(s[col] for s in sub)
1332 ret.insert(DataObject(**attrdict))
1333 elif len(self._pivot_attrs) == 2:
1334 for sub in self.subtables:
1335 for ssub in sub.subtables:
1336 attrdict = dict(ssub._attr_path)
1337 if fn is None:
1338 attrdict['Count'] = len(ssub)
1339 else:
1340 attrdict[summarycolname] = fn(s[col] for s in ssub)
1341 ret.insert(DataObject(**attrdict))
1342 elif len(self._pivot_attrs) == 3:
1343 for sub in self.subtables:
1344 for ssub in sub.subtables:
1345 for sssub in ssub.subtables:
1346 attrdict = dict(sssub._attr_path)
1347 if fn is None:
1348 attrdict['Count'] = len(sssub)
1349 else:
1350 attrdict[summarycolname] = fn(s[col] for s in sssub)
1351 ret.insert(DataObject(**attrdict))
1352 else:
1353 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
1354 return ret
1355
1357 """Temporary object created while composing a join across tables using
1358 L{Table.join_on} and '+' addition. JoinTerm's are usually created by
1359 calling join_on on a Table object, as in::
1360
1361 customers.join_on("id") + orders.join_on("custid")
1362
1363 This join expression would set up the join relationship
1364 equivalent to::
1365
1366 customers.join(orders, id="custid")
1367
1368 If tables are being joined on attributes that have the same name in
1369 both tables, then a join expression could be created by adding a
1370 JoinTerm of one table directly to the other table::
1371
1372 customers.join_on("custid") + orders
1373
1374 Once the join expression is composed, the actual join is performed
1375 using function call notation::
1376
1377 customerorders = customers.join_on("custid") + orders
1378 for custord in customerorders():
1379 print custord
1380
1381 When calling the join expression, you can optionally specify a
1382 list of attributes as defined in L{Table.join}.
1383 """
1384 - def __init__(self, sourceTable, joinfield):
1385 self.sourcetable = sourceTable
1386 self.joinfield = joinfield
1387 self.jointo = None
1388
1390 if isinstance(other, Table):
1391 other = other.join_on(self.joinfield)
1392 if isinstance(other, JoinTerm):
1393 if self.jointo is None:
1394 if other.jointo is None:
1395 self.jointo = other
1396 else:
1397 self.jointo = other()
1398 return self
1399 else:
1400 if other.jointo is None:
1401 return self() + other
1402 else:
1403 return self() + other()
1404 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1405
1407 if isinstance(other, Table):
1408 return other.join_on(self.joinfield) + self
1409 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
1410
1412 if self.jointo:
1413 other = self.jointo
1414 if isinstance(other, Table):
1415 other = other.join_on(self.joinfield)
1416 ret = self.sourcetable.join(other.sourcetable, attrs,
1417 **{self.joinfield : other.joinfield})
1418 return ret
1419 else:
1420 return self.sourcetable.query()
1421
1424
1425
1426 if __name__ == "__main__":
1427
1428
1429 from functools import partial
1430 try:
1431 import simplejson as json
1432 json_dumps = partial(json.dumps, indent=' ')
1433 except ImportError:
1434 import json
1435 json_dumps = partial(json.dumps, indent=2)
1436
1437
1438 rawdata = """\
1439 Phoenix:AZ:85001:KPHX
1440 Phoenix:AZ:85001:KPHY
1441 Phoenix:AZ:85001:KPHA
1442 Dallas:TX:75201:KDFW""".splitlines()
1443
1444
1445 stations = Table()
1446
1447 stations.create_index("stn", unique=True)
1448
1449 fields = "city state zip stn".split()
1450 for d in rawdata:
1451 ob = DataObject()
1452 for k,v in zip(fields, d.split(':')):
1453 setattr(ob,k,v.strip())
1454 stations.insert(ob)
1455
1456
1457 for queryargs in [
1458 dict(city="Phoenix"),
1459 dict(city="Phoenix", stn="KPHX"),
1460 dict(stn="KPHA", city="Phoenix"),
1461 dict(state="TX"),
1462 dict(city="New York"),
1463 ]:
1464 print(queryargs)
1465 result = stations.where(**queryargs)
1466 print(len(result))
1467 for r in result: print (r)
1468 print('')
1469
1470
1471 print(list(stations.where()))
1472 print('')
1473
1474 amfm = Table()
1475 amfm.create_index("stn", unique=True)
1476 amfm.insert(DataObject(stn="KPHY", band="AM"))
1477 amfm.insert(DataObject(stn="KPHX", band="FM"))
1478 amfm.insert(DataObject(stn="KPHA", band="FM"))
1479 amfm.insert(DataObject(stn="KDFW", band="FM"))
1480 print(amfm.by.stn["KPHY"])
1481 print(amfm.by.stn["KPHY"].band)
1482
1483 try:
1484 amfm.insert(DataObject(stn="KPHA", band="AM"))
1485 except KeyError:
1486 print("duplicate key not allowed")
1487
1488 print('')
1489 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1490 )(["stn", "city", (amfm,"band","AMFM"),
1491 (stations,"state","st")]).sort("AMFM"):
1492 print(repr(rec))
1493
1494 print('')
1495 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1496 )(["stn", "city", (amfm,"band"), (stations,"state","st")]):
1497 print(json_dumps(vars(rec)))
1498
1499 print('')
1500 for rec in (stations.join_on("stn") + amfm.join_on("stn"))():
1501 print(json_dumps(vars(rec)))
1502
1503 print('')
1504 stations.create_index("state")
1505 for az_stn in stations.by.state['AZ']:
1506 print(az_stn)
1507
1508 print('')
1509 pivot = stations.pivot("state")
1510 pivot.dump_counts()
1511
1512 print('')
1513 amfm.create_index("band")
1514 pivot = (stations.join_on("stn") + amfm)().pivot("state band")
1515 pivot.dump_counts()
1516