1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from intermine.util import openAnything, ReadableException
6 from intermine.pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 from intermine.model import Column, Class, Model, Reference, ConstraintNode
8 import constraints
9
10 """
11 Classes representing queries against webservices
12 ================================================
13
14 Representations of queries, and templates.
15
16 """
17
18 __author__ = "Alex Kalderimis"
19 __organization__ = "InterMine"
20 __license__ = "LGPL"
21 __contact__ = "dev@intermine.org"
22
23
24 -class Query(object):
25 """
26 A Class representing a structured database query
27 ================================================
28
29 Objects of this class have properties that model the
30 attributes of the query, and methods for performing
31 the request.
32
33 SYNOPSIS
34 --------
35
36 example:
37
38 >>> service = Service("http://www.flymine.org/query/service")
39 >>> query = service.new_query()
40 >>>
41 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
42 >>> query.add_sort_order("Gene.pathways.name")
43 >>>
44 >>> query.add_constraint("Gene", "LOOKUP", "eve")
45 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
46 >>>
47 >>> query.set_logic("A or B")
48 >>>
49 >>> for row in query.rows():
50 ... handle_row(row)
51
52 OR, using an SQL style DSL:
53
54 >>> s = Service("www.flymine.org/query")
55 >>> query = s.query("Gene").\\
56 ... select("*", "pathways.*").\\
57 ... where("symbol", "=", "H").\\
58 ... outerjoin("pathways").\\
59 ... order_by("symbol")
60 >>> for row in query.rows(start=10, size=5):
61 ... handle_row(row)
62
63 OR, for a more SQL-alchemy, ORM style:
64
65 >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles):
66 ... handle(gene)
67
68 Query objects represent structured requests for information over the database
69 housed at the datawarehouse whose webservice you are querying. They utilise
70 some of the concepts of relational databases, within an object-related
71 ORM context. If you don't know what that means, don't worry: you
72 don't need to write SQL, and the queries will be fast.
73
74 To make things slightly more familiar to those with knowledge of SQL, some syntactical
75 sugar is provided to make constructing queries a bit more recognisable.
76
77 PRINCIPLES
78 ----------
79
80 The data model represents tables in the databases as classes, with records
81 within tables as instances of that class. The columns of the database are the
82 fields of that object::
83
84 The Gene table - showing two records/objects
85 +---------------------------------------------------+
86 | id | symbol | length | cyto-location | organism |
87 +----------------------------------------+----------+
88 | 01 | eve | 1539 | 46C10-46C10 | 01 |
89 +----------------------------------------+----------+
90 | 02 | zen | 1331 | 84A5-84A5 | 01 |
91 +----------------------------------------+----------+
92 ...
93
94 The organism table - showing one record/object
95 +----------------------------------+
96 | id | name | taxon id |
97 +----------------------------------+
98 | 01 | D. melanogaster | 7227 |
99 +----------------------------------+
100
101 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
102 everything except the id columns). The other columns (such as "organism" in the gene table)
103 are ones that reference records of other tables (ie. other objects), and are called
104 references. You can refer to any field in any class, that has a connection,
105 however tenuous, with a table, by using dotted path notation::
106
107 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
108
109 These paths, and the connections between records and tables they represent,
110 are the basis for the structure of InterMine queries.
111
112 THE STUCTURE OF A QUERY
113 -----------------------
114
115 A query has two principle sets of properties:
116 - its view: the set of output columns
117 - its constraints: the set of rules for what to include
118
119 A query must have at least one output column in its view, but constraints
120 are optional - if you don't include any, you will get back every record
121 from the table (every object of that type)
122
123 In addition, the query must be coherent: if you have information about
124 an organism, and you want a list of genes, then the "Gene" table
125 should be the basis for your query, and as such the Gene class, which
126 represents this table, should be the root of all the paths that appear in it:
127
128 So, to take a simple example::
129
130 I have an organism name, and I want a list of genes:
131
132 The view is the list of things I want to know about those genes:
133
134 >>> query.add_view("Gene.name")
135 >>> query.add_view("Gene.length")
136 >>> query.add_view("Gene.proteins.sequence.length")
137
138 Note I can freely mix attributes and references, as long as every view ends in
139 an attribute (a meaningful value). As a short-cut I can also write:
140
141 >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
142
143 or:
144
145 >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length")
146
147 They are all equivalent. You can also use common SQL style shortcuts such as "*" for all
148 attribute fields:
149
150 >>> query.add_views("Gene.*")
151
152 You can also use "select" as a synonymn for "add_view"
153
154 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
155
156 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
157
158 (note, here I can use "where" as a synonymn for "add_constraint")
159
160 If I run this query, I will get literally millions of results -
161 it needs to be filtered further:
162
163 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
164
165 If that doesn't restrict things enough I can add more filters:
166
167 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
168
169 Now I am guaranteed to get only information on genes I am interested in.
170
171 Note, though, that because I have included the link (or "join") from Gene -> Protein,
172 this, by default, means that I only want genes that have protein information associated
173 with them. If in fact I want information on all genes, and just want to know the
174 protein information if it is available, then I can specify that with:
175
176 >>> query.add_join("Gene.proteins", "OUTER")
177
178 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
179 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
180 (as unlikely as that sounds), I can specify the logic for that too:
181
182 >>> query.set_logic("A and (B or C)")
183
184 Each letter refers to one of the constraints - the codes are assigned in the order you add
185 the constraints. If you want to be absolutely certain about the constraints you mean, you
186 can use the constraint objects themselves:
187
188 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
189 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
190 >>>
191 >>> query.set_logic(gene_is_eve | gene_is_zen)
192
193 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
194
195 Putting it all together:
196
197 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
198 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
199 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
200 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
201 >>> query.add_join("Gene.proteins", "OUTER")
202 >>> query.set_logic("A and (B or C)")
203
204 This can be made more concise and readable with a little DSL sugar:
205
206 >>> query = service.query("Gene")
207 >>> query.select("name", "length", "proteins.sequence.length").\
208 ... where('organism.name' '=', 'D. melanogaster').\
209 ... where("proteins.sequence.length", "<", 500).\
210 ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\
211 ... outerjoin('proteins').\
212 ... set_logic("A and (B or C)")
213
214 And the query is defined.
215
216 Result Processing: Rows
217 -----------------------
218
219 calling ".rows()" on a query will return an iterator of rows, where each row
220 is a ResultRow object, which can be treated as both a list and a dictionary.
221
222 Which means you can refer to columns by name:
223
224 >>> for row in query.rows():
225 ... print "name is %s" % (row["name"])
226 ... print "length is %d" % (row["length"])
227
228 As well as using list indices:
229
230 >>> for row in query.rows():
231 ... print "The first column is %s" % (row[0])
232
233 Iterating over a row iterates over the cell values as a list:
234
235 >>> for row in query.rows():
236 ... for column in row:
237 ... do_something(column)
238
239 Here each row will have a gene name, a gene length, and a sequence length, eg:
240
241 >>> print row.to_l
242 ["even skipped", "1359", "376"]
243
244 To make that clearer, you can ask for a dictionary instead of a list:
245
246 >>> for row in query.rows()
247 ... print row.to_d
248 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
249
250
251 If you just want the raw results, for printing to a file, or for piping to another program,
252 you can request strings instead:
253
254 >>> for row in query.result("string")
255 ... print(row)
256
257 Result Processing: Results
258 --------------------------
259
260 Results can also be processing on a record by record basis. If you have a query that
261 has output columns of "Gene.symbol", "Gene.pathways.name" and "Gene.proteins.proteinDomains.primaryIdentifier",
262 than processing it by records will return one object per gene, and that gene will have a property
263 named "pathways" which contains objects which have a name property. Likewise there will be a
264 proteins property which holds a list of proteinDomains which all have a primaryIdentifier property, and so on.
265 This allows a more object orientated approach to database records, familiar to users of
266 other ORMs.
267
268 This is the format used when you choose to iterate over a query directly, or can be explicitly
269 chosen by invoking L{intermine.query.Query.results}:
270
271 >>> for gene in query:
272 ... print gene.name, map(lambda x: x.name, gene.pathways)
273
274 The structure of the object and the information it contains depends entirely
275 on the output columns selected. The values may be None, of course, but also any valid values of an object
276 (according to the data model) will also be None if they were not selected for output. Attempts
277 to access invalid properties (such as gene.favourite_colour) will cause exceptions to be thrown.
278
279 Getting us to Generate your Code
280 --------------------------------
281
282 Not that you have to actually write any of this! The webapp will happily
283 generate the code for any query (and template) you can build in it. A good way to get
284 started is to use the webapp to generate your code, and then run it as scripts
285 to speed up your queries. You can always tinker with and edit the scripts you download.
286
287 To get generated queries, look for the "python" link at the bottom of query-builder and
288 template form pages, it looks a bit like this::
289
290 . +=====================================+=============
291 | |
292 | Perl | Python | Java [Help] |
293 | |
294 +==============================================
295
296 """
297
298 SO_SPLIT_PATTERN = re.compile("\s*(asc|desc)\s*", re.I)
299 LOGIC_SPLIT_PATTERN = re.compile("\s*(?:and|or|\(|\))\s*", re.I)
300 TRAILING_OP_PATTERN = re.compile("\s*(and|or)\s*$", re.I)
301 LEADING_OP_PATTERN = re.compile("^\s*(and|or)\s*", re.I)
302 ORPHANED_OP_PATTERN = re.compile("(?:\(\s*(?:and|or)\s*|\s*(?:and|or)\s*\))", re.I)
303 LOGIC_OPS = ["and", "or"]
304 LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS]
305
306 - def __init__(self, model, service=None, validate=True, root=None):
307 """
308 Construct a new Query
309 =====================
310
311 Construct a new query for making database queries
312 against an InterMine data warehouse.
313
314 Normally you would not need to use this constructor
315 directly, but instead use the factory method on
316 intermine.webservice.Service, which will handle construction
317 for you.
318
319 @param model: an instance of L{intermine.model.Model}. Required
320 @param service: an instance of l{intermine.service.Service}. Optional,
321 but you will not be able to make requests without one.
322 @param validate: a boolean - defaults to True. If set to false, the query
323 will not try and validate itself. You should not set this to false.
324
325 """
326 self.model = model
327 if root is None:
328 self.root = root
329 else:
330 self.root = model.make_path(root).root
331
332 self.name = ''
333 self.description = ''
334 self.service = service
335 self.prefetch_depth = service.prefetch_depth if service is not None else 1
336 self.prefetch_id_only = service.prefetch_id_only if service is not None else False
337 self.do_verification = validate
338 self.path_descriptions = []
339 self.joins = []
340 self.constraint_dict = {}
341 self.uncoded_constraints = []
342 self.views = []
343 self._sort_order_list = SortOrderList()
344 self._logic_parser = constraints.LogicParser(self)
345 self._logic = None
346 self.constraint_factory = constraints.ConstraintFactory()
347
348
349 self.c = self.column
350 self.filter = self.where
351 self.add_column = self.add_view
352 self.add_columns = self.add_view
353 self.add_views = self.add_view
354 self.add_to_select = self.add_view
355 self.order_by = self.add_sort_order
356 self.all = self.get_results_list
357 self.size = self.count
358 self.summarize = self.summarise
359
361 """Return an iterator over all the objects returned by this query"""
362 return self.results("jsonobjects")
363
365 """Return the number of rows this query will return."""
366 return self.count()
367
369 """Construct a new list from the symmetric difference of these things"""
370 return self.service._list_manager.subtract([self], [other])
371
373 """Calculate the symmetric difference of this query and another"""
374 return self.service._list_manager.xor([self, other])
375
377 """
378 Intersect this query and another query or list
379 """
380 return self.service._list_manager.intersect([self, other])
381
383 """
384 Return the union of this query and another query or list.
385 """
386 return self.service._list_manager.union([self, other])
387
389 """
390 Return the union of this query and another query or list
391 """
392 return self.service._list_manager.union([self, other])
393
394 @classmethod
395 - def from_xml(cls, xml, *args, **kwargs):
396 """
397 Deserialise a query serialised to XML
398 =====================================
399
400 This method is used to instantiate serialised queries.
401 It is used by intermine.webservice.Service objects
402 to instantiate Template objects and it can be used
403 to read in queries you have saved to a file.
404
405 @param xml: The xml as a file name, url, or string
406
407 @raise QueryParseError: if the query cannot be parsed
408 @raise ModelError: if the query has illegal paths in it
409 @raise ConstraintError: if the constraints don't make sense
410
411 @rtype: L{Query}
412 """
413 obj = cls(*args, **kwargs)
414 obj.do_verification = False
415 f = openAnything(xml)
416 doc = minidom.parse(f)
417 f.close()
418
419 queries = doc.getElementsByTagName('query')
420 if len(queries) != 1:
421 raise QueryParseError("wrong number of queries in xml. "
422 + "Only one <query> element is allowed. Found %d" % len(queries))
423 q = queries[0]
424 obj.name = q.getAttribute('name')
425 obj.description = q.getAttribute('description')
426 obj.add_view(q.getAttribute('view'))
427 for p in q.getElementsByTagName('pathDescription'):
428 path = p.getAttribute('pathString')
429 description = p.getAttribute('description')
430 obj.add_path_description(path, description)
431 for j in q.getElementsByTagName('join'):
432 path = j.getAttribute('path')
433 style = j.getAttribute('style')
434 obj.add_join(path, style)
435 for c in q.getElementsByTagName('constraint'):
436 args = {}
437 args['path'] = c.getAttribute('path')
438 if args['path'] is None:
439 if c.parentNode.tagName != "node":
440 msg = "Constraints must have a path"
441 raise QueryParseError(msg)
442 args['path'] = c.parentNode.getAttribute('path')
443 args['op'] = c.getAttribute('op')
444 args['value'] = c.getAttribute('value')
445 args['code'] = c.getAttribute('code')
446 args['subclass'] = c.getAttribute('type')
447 args['editable'] = c.getAttribute('editable')
448 args['optional'] = c.getAttribute('switchable')
449 args['extra_value'] = c.getAttribute('extraValue')
450 args['loopPath'] = c.getAttribute('loopPath')
451 values = []
452 for val_e in c.getElementsByTagName('value'):
453 texts = []
454 for node in val_e.childNodes:
455 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
456 values.append(' '.join(texts))
457 if len(values) > 0: args["values"] = values
458 for k, v in args.items():
459 if v is None or v == '': del args[k]
460 if "loopPath" in args:
461 args["op"] = {
462 "=" : "IS",
463 "!=": "IS NOT"
464 }.get(args["op"])
465 con = obj.add_constraint(**args)
466 if not con:
467 raise ConstraintError("error adding constraint with args: " + args)
468
469 def group(iterator, count):
470 itr = iter(iterator)
471 while True:
472 yield tuple([itr.next() for i in range(count)])
473
474 if q.getAttribute('sortOrder') is not None:
475 sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder'))
476 if len(sos) == 1:
477 if sos[0] in obj.views:
478 obj.add_sort_order(sos[0])
479 else:
480 sos.pop()
481 for path, direction in group(sos, 2):
482 if path in obj.views:
483 obj.add_sort_order(path, direction)
484
485 if q.getAttribute('constraintLogic') is not None:
486 obj._set_questionable_logic(q.getAttribute('constraintLogic'))
487
488 obj.verify()
489
490 return obj
491
493 """Attempts to sanity check the logic argument before it is set"""
494 logic = questionable_logic
495 used_codes = set(self.constraint_dict.keys())
496 logic_codes = set(Query.LOGIC_SPLIT_PATTERN.split(questionable_logic))
497 if "" in logic_codes:
498 logic_codes.remove("")
499 irrelevant_codes = logic_codes - used_codes
500 for c in irrelevant_codes:
501 pattern = re.compile("\\b" + c + "\\b", re.I)
502 logic = pattern.sub("", logic)
503
504 logic = re.sub("\((:?and|or|\s)*\)", "", logic)
505
506 logic = Query.LEADING_OP_PATTERN.sub("", logic)
507 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
508 for x in range(2):
509 for left, right in Query.LOGIC_PRODUCT:
510 if left == right:
511 repl = left
512 else:
513 repl = "and"
514 pattern = re.compile(left + "\s*" + right, re.I)
515 logic = pattern.sub(repl, logic)
516 logic = Query.ORPHANED_OP_PATTERN.sub(lambda x: "(" if "(" in x.group(0) else ")", logic)
517 logic = logic.strip().lstrip()
518 logic = Query.LEADING_OP_PATTERN.sub("", logic)
519 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
520 try:
521 if len(logic) > 0 and logic not in ["and", "or"]:
522 self.set_logic(logic)
523 except Exception, e:
524 raise Exception("Error parsing logic string "
525 + repr(questionable_logic)
526 + " (which is " + repr(logic) + " after irrelevant codes have been removed)"
527 + " with available codes: " + repr(list(used_codes))
528 + " because: " + e.message)
529
531 """Return the XML serialisation of this query"""
532 return self.to_xml()
533
535 """
536 Validate the query
537 ==================
538
539 Invalid queries will fail to run, and it is not always
540 obvious why. The validation routine checks to see that
541 the query will not cause errors on execution, and tries to
542 provide informative error messages.
543
544 This method is called immediately after a query is fully
545 deserialised.
546
547 @raise ModelError: if the paths are invalid
548 @raise QueryError: if there are errors in query construction
549 @raise ConstraintError: if there are errors in constraint construction
550
551 """
552 self.verify_views()
553 self.verify_constraint_paths()
554 self.verify_join_paths()
555 self.verify_pd_paths()
556 self.validate_sort_order()
557 self.do_verification = True
558
560 """
561 Replace the current selection of output columns with this one
562 =============================================================
563
564 example::
565
566 query.select("*", "proteins.name")
567
568 This method is intended to provide an API familiar to those
569 with experience of SQL or other ORM layers. This method, in
570 contrast to other view manipulation methods, replaces
571 the selection of output columns, rather than appending to it.
572
573 Note that any sort orders that are no longer in the view will
574 be removed.
575
576 @param paths: The output columns to add
577 """
578 self.views = []
579 self.add_view(*paths)
580 so_elems = self._sort_order_list
581 self._sort_order_list = SortOrderList()
582
583 for so in so_elems:
584 if so.path in self.views:
585 self._sort_order_list.append(so)
586 return self
587
589 """
590 Add one or more views to the list of output columns
591 ===================================================
592
593 example::
594
595 query.add_view("Gene.name Gene.organism.name")
596
597 This is the main method for adding views to the list
598 of output columns. As well as appending views, it
599 will also split a single, space or comma delimited
600 string into multiple paths, and flatten out lists, or any
601 combination. It will also immediately try to validate
602 the views.
603
604 Output columns must be valid paths according to the
605 data model, and they must represent attributes of tables
606
607 Also available as:
608 - add_views
609 - add_column
610 - add_columns
611 - add_to_select
612
613 @see: intermine.model.Model
614 @see: intermine.model.Path
615 @see: intermine.model.Attribute
616 """
617 views = []
618 for p in paths:
619 if isinstance(p, (set, list)):
620 views.extend(list(p))
621 elif isinstance(p, Class):
622 views.append(p.name + ".*")
623 elif isinstance(p, Column):
624 if p._path.is_attribute():
625 views.append(str(p))
626 else:
627 views.append(str(p) + ".*")
628 elif isinstance(p, Reference):
629 views.append(p.name + ".*")
630 else:
631 views.extend(re.split("(?:,?\s+|,)", str(p)))
632
633 views = map(self.prefix_path, views)
634
635 views_to_add = []
636 for view in views:
637 if view.endswith(".*"):
638 view = re.sub("\.\*$", "", view)
639 scd = self.get_subclass_dict()
640 def expand(p, level, id_only=False):
641 if level > 0:
642 path = self.model.make_path(p, scd)
643 cd = path.end_class
644 add_f = lambda x: p + "." + x.name
645 vs = [p + ".id"] if id_only and cd.has_id else map(add_f, cd.attributes)
646 next_level = level - 1
647 rs_and_cs = cd.references + cd.collections
648 for r in rs_and_cs:
649 rp = add_f(r)
650 if next_level:
651 self.outerjoin(rp)
652 vs.extend(expand(rp, next_level, self.prefetch_id_only))
653 return vs
654 else:
655 return []
656 depth = self.prefetch_depth
657 views_to_add.extend(expand(view, depth))
658 else:
659 views_to_add.append(view)
660
661 if self.do_verification:
662 self.verify_views(views_to_add)
663
664 self.views.extend(views_to_add)
665
666 return self
667
669 if self.root is None:
670 if self.do_verification:
671 if path.endswith(".*"):
672 trimmed = re.sub("\.\*$", "", path)
673 else:
674 trimmed = path
675 self.root = self.model.make_path(trimmed, self.get_subclass_dict()).root
676 return path
677 else:
678 if path.startswith(self.root.name):
679 return path
680 else:
681 return self.root.name + "." + path
682
684 """
685 Clear the output column list
686 ============================
687
688 Deletes all entries currently in the view list.
689 """
690 self.views = []
691
693 """
694 Check to see if the views given are valid
695 =========================================
696
697 This method checks to see if the views:
698 - are valid according to the model
699 - represent attributes
700
701 @see: L{intermine.model.Attribute}
702
703 @raise intermine.model.ModelError: if the paths are invalid
704 @raise ConstraintError: if the paths are not attributes
705 """
706 if views is None: views = self.views
707 for path in views:
708 path = self.model.make_path(path, self.get_subclass_dict())
709 if not path.is_attribute():
710 raise ConstraintError("'" + str(path)
711 + "' does not represent an attribute")
712
714 """
715 Add a constraint (filter on records)
716 ====================================
717
718 example::
719
720 query.add_constraint("Gene.symbol", "=", "zen")
721
722 This method will try to make a constraint from the arguments
723 given, trying each of the classes it knows of in turn
724 to see if they accept the arguments. This allows you
725 to add constraints of different types without having to know
726 or care what their classes or implementation details are.
727 All constraints derive from intermine.constraints.Constraint,
728 and they all have a path attribute, but are otherwise diverse.
729
730 Before adding the constraint to the query, this method
731 will also try to check that the constraint is valid by
732 calling Query.verify_constraint_paths()
733
734 @see: L{intermine.constraints}
735
736 @rtype: L{intermine.constraints.Constraint}
737 """
738 if len(args) == 1 and len(kwargs) == 0:
739 if isinstance(args[0], tuple):
740 con = self.constraint_factory.make_constraint(*args[0])
741 else:
742 try:
743 con = self.constraint_factory.make_constraint(*args[0].vargs, **args[0].kwargs)
744 except AttributeError:
745 con = args[0]
746 else:
747 if len(args) == 0 and len(kwargs) == 1:
748 k, v = kwargs.items()[0]
749 d = {"path": k}
750 if v in constraints.UnaryConstraint.OPS:
751 d["op"] = v
752 else:
753 d["op"] = "="
754 d["value"] = v
755 kwargs = d
756
757 con = self.constraint_factory.make_constraint(*args, **kwargs)
758
759 con.path = self.prefix_path(con.path)
760 if self.do_verification: self.verify_constraint_paths([con])
761 if hasattr(con, "code"):
762 self.constraint_dict[con.code] = con
763 else:
764 self.uncoded_constraints.append(con)
765
766 return con
767
768 - def where(self, *cons, **kwargs):
769 """
770 Add a constraint to the query
771 =============================
772
773 In contrast to add_constraint, this method returns
774 a new object with the given comstraint added.
775
776 Also available as Query.filter
777 """
778 c = self.clone()
779 try:
780 for conset in cons:
781 codeds = c.coded_constraints
782 lstr = str(c.get_logic()) + " AND " if codeds else ""
783 start_c = chr(ord(codeds[-1].code) + 1) if codeds else 'A'
784 for con in conset:
785 c.add_constraint(*con.vargs, **con.kwargs)
786 try:
787 c.set_logic(lstr + conset.as_logic(start = start_c))
788 except constraints.EmptyLogicError:
789 pass
790 for path, value in kwargs.items():
791 c.add_constraint(path, "=", value)
792 except AttributeError:
793 c.add_constraint(*cons, **kwargs)
794 return c
795
797 """
798 Return a Column object suitable for using to construct constraints with
799 =======================================================================
800
801 This method is part of the SQLAlchemy style API.
802
803 Also available as Query.c
804 """
805 return self.model.column(self.prefix_path(str(col)), self.get_subclass_dict(), self)
806
808 """
809 Check that the constraints are valid
810 ====================================
811
812 This method will check the path attribute of each constraint.
813 In addition it will:
814 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
815 - Check that TernaryConstraints have a Reference as theirs
816 - Check that SubClassConstraints have a correct subclass relationship
817 - Check that LoopConstraints have a valid loopPath, of a compatible type
818 - Check that ListConstraints refer to an object
819 - Don't even try to check RangeConstraints: these have variable semantics
820
821 @param cons: The constraints to check (defaults to all constraints on the query)
822
823 @raise ModelError: if the paths are not valid
824 @raise ConstraintError: if the constraints do not satisfy the above rules
825
826 """
827 if cons is None: cons = self.constraints
828 for con in cons:
829 pathA = self.model.make_path(con.path, self.get_subclass_dict())
830 if isinstance(con, constraints.RangeConstraint):
831 pass
832 elif isinstance(con, constraints.TernaryConstraint):
833 if pathA.get_class() is None:
834 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
835 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
836 if not pathA.is_attribute():
837 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
838 elif isinstance(con, constraints.SubClassConstraint):
839 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
840 if not pathB.get_class().isa(pathA.get_class()):
841 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
842 elif isinstance(con, constraints.LoopConstraint):
843 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
844 for path in [pathA, pathB]:
845 if not path.get_class():
846 raise ConstraintError("'" + str(path) + "' does not refer to an object")
847 (classA, classB) = (pathA.get_class(), pathB.get_class())
848 if not classA.isa(classB) and not classB.isa(classA):
849 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
850 elif isinstance(con, constraints.ListConstraint):
851 if not pathA.get_class():
852 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
853
854 @property
856 """
857 Returns the constraints of the query
858 ====================================
859
860 Query.constraints S{->} list(intermine.constraints.Constraint)
861
862 Constraints are returned in the order of their code (normally
863 the order they were added to the query) and with any
864 subclass contraints at the end.
865
866 @rtype: list(Constraint)
867 """
868 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
869 ret.extend(self.uncoded_constraints)
870 return ret
871
873 """
874 Returns the constraint with the given code
875 ==========================================
876
877 Returns the constraint with the given code, if if exists.
878 If no such constraint exists, it throws a ConstraintError
879
880 @return: the constraint corresponding to the given code
881 @rtype: L{intermine.constraints.CodedConstraint}
882 """
883 if code in self.constraint_dict:
884 return self.constraint_dict[code]
885 else:
886 raise ConstraintError("There is no constraint with the code '"
887 + code + "' on this query")
888
890 """
891 Add a join statement to the query
892 =================================
893
894 example::
895
896 query.add_join("Gene.proteins", "OUTER")
897
898 A join statement is used to determine if references should
899 restrict the result set by only including those references
900 exist. For example, if one had a query with the view::
901
902 "Gene.name", "Gene.proteins.name"
903
904 Then in the normal case (that of an INNER join), we would only
905 get Genes that also have at least one protein that they reference.
906 Simply by asking for this output column you are placing a
907 restriction on the information you get back.
908
909 If in fact you wanted all genes, regardless of whether they had
910 proteins associated with them or not, but if they did
911 you would rather like to know _what_ proteins, then you need
912 to specify this reference to be an OUTER join::
913
914 query.add_join("Gene.proteins", "OUTER")
915
916 Now you will get many more rows of results, some of which will
917 have "null" values where the protein name would have been,
918
919 This method will also attempt to validate the join by calling
920 Query.verify_join_paths(). Joins must have a valid path, the
921 style can be either INNER or OUTER (defaults to OUTER,
922 as the user does not need to specify inner joins, since all
923 references start out as inner joins), and the path
924 must be a reference.
925
926 @raise ModelError: if the path is invalid
927 @raise TypeError: if the join style is invalid
928
929 @rtype: L{intermine.pathfeatures.Join}
930 """
931 join = Join(*args, **kwargs)
932 join.path = self.prefix_path(join.path)
933 if self.do_verification: self.verify_join_paths([join])
934 self.joins.append(join)
935 return self
936
938 """Alias for add_join(column, "OUTER")"""
939 return self.add_join(str(column), "OUTER")
940
942 """
943 Check that the joins are valid
944 ==============================
945
946 Joins must have valid paths, and they must refer to references.
947
948 @raise ModelError: if the paths are invalid
949 @raise QueryError: if the paths are not references
950 """
951 if joins is None: joins = self.joins
952 for join in joins:
953 path = self.model.make_path(join.path, self.get_subclass_dict())
954 if not path.is_reference():
955 raise QueryError("'" + join.path + "' is not a reference")
956
958 """
959 Add a path description to the query
960 ===================================
961
962 example::
963
964 query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain")
965
966 This allows you to alias the components of long paths to
967 improve the way they display column headers in a variety of circumstances.
968 In the above example, if the view included the unwieldy path
969 "Gene.proteins.proteinDomains.primaryIdentifier", it would (depending on the
970 mine) be displayed as "Protein Domain > DB Identifer". These
971 setting are taken into account by the webservice when generating
972 column headers for flat-file results with the columnheaders parameter given, and
973 always supplied when requesting jsontable results.
974
975 @rtype: L{intermine.pathfeatures.PathDescription}
976
977 """
978 path_description = PathDescription(*args, **kwargs)
979 path_description.path = self.prefix_path(path_description.path)
980 if self.do_verification: self.verify_pd_paths([path_description])
981 self.path_descriptions.append(path_description)
982 return path_description
983
985 """
986 Check that the path of the path description is valid
987 ====================================================
988
989 Checks for consistency with the data model
990
991 @raise ModelError: if the paths are invalid
992 """
993 if pds is None: pds = self.path_descriptions
994 for pd in pds:
995 self.model.validate_path(pd.path, self.get_subclass_dict())
996
997 @property
999 """
1000 Returns the list of constraints that have a code
1001 ================================================
1002
1003 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
1004
1005 This returns an up to date list of the constraints that can
1006 be used in a logic expression. The only kind of constraint
1007 that this excludes, at present, is SubClassConstraints
1008
1009 @rtype: list(L{intermine.constraints.CodedConstraint})
1010 """
1011 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
1012
1014 """
1015 Returns the logic expression for the query
1016 ==========================================
1017
1018 This returns the up to date logic expression. The default
1019 value is the representation of all coded constraints and'ed together.
1020
1021 If the logic is empty and there are no constraints, returns an
1022 empty string.
1023
1024 The LogicGroup object stringifies to a string that can be parsed to
1025 obtain itself (eg: "A and (B or C or D)").
1026
1027 @rtype: L{intermine.constraints.LogicGroup}
1028 """
1029 if self._logic is None:
1030 if len(self.coded_constraints) > 0:
1031 return reduce(lambda x, y: x+y, self.coded_constraints)
1032 else:
1033 return ""
1034 else:
1035 return self._logic
1036
1038 """
1039 Sets the Logic given the appropriate input
1040 ==========================================
1041
1042 example::
1043
1044 Query.set_logic("A and (B or C)")
1045
1046 This sets the logic to the appropriate value. If the value is
1047 already a LogicGroup, it is accepted, otherwise
1048 the string is tokenised and parsed.
1049
1050 The logic is then validated with a call to validate_logic()
1051
1052 raise LogicParseError: if there is a syntax error in the logic
1053 """
1054 if isinstance(value, constraints.LogicGroup):
1055 logic = value
1056 else:
1057 try:
1058 logic = self._logic_parser.parse(value)
1059 except constraints.EmptyLogicError:
1060 if self.coded_constraints:
1061 raise
1062 else:
1063 return self
1064 if self.do_verification: self.validate_logic(logic)
1065 self._logic = logic
1066 return self
1067
1069 """
1070 Validates the query logic
1071 =========================
1072
1073 Attempts to validate the logic by checking
1074 that every coded_constraint is included
1075 at least once
1076
1077 @raise QueryError: if not every coded constraint is represented
1078 """
1079 if logic is None: logic = self._logic
1080 logic_codes = set(logic.get_codes())
1081 for con in self.coded_constraints:
1082 if con.code not in logic_codes:
1083 raise QueryError("Constraint " + con.code + repr(con)
1084 + " is not mentioned in the logic: " + str(logic))
1085
1087 """
1088 Gets the sort order when none has been specified
1089 ================================================
1090
1091 This method is called to determine the sort order if
1092 none is specified
1093
1094 @raise QueryError: if the view is empty
1095
1096 @rtype: L{intermine.pathfeatures.SortOrderList}
1097 """
1098 try:
1099 v0 = self.views[0]
1100 for j in self.joins:
1101 if j.style == "OUTER":
1102 if v0.startswith(j.path):
1103 return ""
1104 return SortOrderList((self.views[0], SortOrder.ASC))
1105 except IndexError:
1106 raise QueryError("Query view is empty")
1107
1109 """
1110 Return a sort order for the query
1111 =================================
1112
1113 This method returns the sort order if set, otherwise
1114 it returns the default sort order
1115
1116 @raise QueryError: if the view is empty
1117
1118 @rtype: L{intermine.pathfeatures.SortOrderList}
1119 """
1120 if self._sort_order_list.is_empty():
1121 return self.get_default_sort_order()
1122 else:
1123 return self._sort_order_list
1124
1126 """
1127 Adds a sort order to the query
1128 ==============================
1129
1130 example::
1131
1132 Query.add_sort_order("Gene.name", "DESC")
1133
1134 This method adds a sort order to the query.
1135 A query can have multiple sort orders, which are
1136 assessed in sequence.
1137
1138 If a query has two sort-orders, for example,
1139 the first being "Gene.organism.name asc",
1140 and the second being "Gene.name desc", you would have
1141 the list of genes grouped by organism, with the
1142 lists within those groupings in reverse alphabetical
1143 order by gene name.
1144
1145 This method will try to validate the sort order
1146 by calling validate_sort_order()
1147
1148 Also available as Query.order_by
1149 """
1150 so = SortOrder(str(path), direction)
1151 so.path = self.prefix_path(so.path)
1152 if self.do_verification: self.validate_sort_order(so)
1153 self._sort_order_list.append(so)
1154 return self
1155
1157 """
1158 Check the validity of the sort order
1159 ====================================
1160
1161 Checks that the sort order paths are:
1162 - valid paths
1163 - in the view
1164
1165 @raise QueryError: if the sort order is not in the view
1166 @raise ModelError: if the path is invalid
1167
1168 """
1169 if not so_elems:
1170 so_elems = self._sort_order_list
1171 from_paths = self._from_paths()
1172 for so in so_elems:
1173 p = self.model.make_path(so.path, self.get_subclass_dict())
1174 if p.prefix() not in from_paths:
1175 raise QueryError("Sort order element %s is not in the query" % so.path)
1176
1187
1189 """
1190 Return the current mapping of class to subclass
1191 ===============================================
1192
1193 This method returns a mapping of classes used
1194 by the model for assessing whether certain paths are valid. For
1195 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
1196 you can refer to the .presentCall attributes of fly atlas results.
1197 MicroArrayResults do not have this attribute, and a path such as::
1198
1199 Gene.microArrayResult.presentCall
1200
1201 would be marked as invalid unless the dictionary is provided.
1202
1203 Users most likely will not need to ever call this method.
1204
1205 @rtype: dict(string, string)
1206 """
1207 subclass_dict = {}
1208 for c in self.constraints:
1209 if isinstance(c, constraints.SubClassConstraint):
1210 subclass_dict[c.path] = c.subclass
1211 return subclass_dict
1212
1213 - def results(self, row="object", start=0, size=None, summary_path=None):
1214 """
1215 Return an iterator over result rows
1216 ===================================
1217
1218 Usage::
1219
1220 >>> query = service.model.Gene.select("symbol", "length")
1221 >>> total = 0
1222 >>> for gene in query.results():
1223 ... print gene.symbol # handle strings
1224 ... total += gene.length # handle numbers
1225 >>> for row in query.results(row="rr"):
1226 ... print row["symbol"] # handle strings by dict index
1227 ... total += row["length"] # handle numbers by dict index
1228 ... print row["Gene.symbol"] # handle strings by full dict index
1229 ... total += row["Gene.length"] # handle numbers by full dict index
1230 ... print row[0] # handle strings by list index
1231 ... total += row[1] # handle numbers by list index
1232 >>> for d in query.results(row="dict"):
1233 ... print row["Gene.symbol"] # handle strings
1234 ... total += row["Gene.length"] # handle numbers
1235 >>> for l in query.results(row="list"):
1236 ... print row[0] # handle strings
1237 ... total += row[1] # handle numbers
1238 >>> import csv
1239 >>> csv_reader = csv.reader(q.results(row="csv"), delimiter=",", quotechar='"')
1240 >>> for row in csv_reader:
1241 ... print row[0] # handle strings
1242 ... length_sum += int(row[1]) # handle numbers
1243 >>> tsv_reader = csv.reader(q.results(row="tsv"), delimiter="\t")
1244 >>> for row in tsv_reader:
1245 ... print row[0] # handle strings
1246 ... length_sum += int(row[1]) # handle numbers
1247
1248 This is the general method that allows access to any of the available
1249 result formats. The example above shows the ways these differ in terms
1250 of accessing fields of the rows, as well as dealing with different
1251 data types. Results can either be retrieved as typed values (jsonobjects,
1252 rr ['ResultRows'], dict, list), or as lists of strings (csv, tsv) which then require
1253 further parsing. The default format for this method is "objects", where
1254 information is grouped by its relationships. The other main format is
1255 "rr", which stands for 'ResultRows', and can be accessed directly through
1256 the L{rows} method.
1257
1258 Note that when requesting object based results (the default), if your query
1259 contains any kind of collection, it is highly likely that start and size won't do what
1260 you think, as they operate only on the underlying
1261 rows used to build up the returned objects. If you want rows
1262 back, you are recommeded to use the simpler rows method.
1263
1264 If no views have been specified, all attributes of the root class
1265 are selected for output.
1266
1267 @param row: The format for each result. One of "object", "rr",
1268 "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
1269 @type row: string
1270 @param start: the index of the first result to return (default = 0)
1271 @type start: int
1272 @param size: The maximum number of results to return (default = all)
1273 @type size: int
1274 @param summary_path: A column name to optionally summarise. Specifying a path
1275 will force "jsonrows" format, and return an iterator over a list
1276 of dictionaries. Use this when you are interested in processing
1277 a summary in order of greatest count to smallest.
1278 @type summary_path: str or L{intermine.model.Path}
1279
1280 @rtype: L{intermine.webservice.ResultIterator}
1281
1282 @raise WebserviceError: if the request is unsuccessful
1283 """
1284
1285 to_run = self.clone()
1286
1287 if len(to_run.views) == 0:
1288 to_run.add_view(to_run.root)
1289
1290 if "object" in row:
1291 for c in self.coded_constraints:
1292 p = to_run.column(c.path)._path
1293 from_p = p if p.end_class is not None else p.prefix()
1294 if not filter(lambda v: v.startswith(str(from_p)), to_run.views):
1295 if p.is_attribute():
1296 to_run.add_view(p)
1297 else:
1298 to_run.add_view(p.append("id"))
1299
1300 path = to_run.get_results_path()
1301 params = to_run.to_query_params()
1302 params["start"] = start
1303 if size:
1304 params["size"] = size
1305 if summary_path:
1306 params["summaryPath"] = to_run.prefix_path(summary_path)
1307 row = "jsonrows"
1308
1309 view = to_run.views
1310 cld = to_run.root
1311 return to_run.service.get_results(path, params, row, view, cld)
1312
1313 - def rows(self, start=0, size=None):
1314 """
1315 Return the results as rows of data
1316 ==================================
1317
1318 This is a shortcut for results("rr")
1319
1320 Usage::
1321
1322 >>> for row in query.rows(start=10, size=10):
1323 ... print row["proteins.name"]
1324
1325 @param start: the index of the first result to return (default = 0)
1326 @type start: int
1327 @param size: The maximum number of results to return (default = all)
1328 @type size: int
1329 @rtype: iterable<intermine.webservice.ResultRow>
1330 """
1331 return self.results(row="rr", start=start, size=size)
1332
1333 - def summarise(self, summary_path, **kwargs):
1334 """
1335 Return a summary of the results for this column.
1336 ================================================
1337
1338 Usage::
1339 >>> query = service.select("Gene.*", "organism.*").where("Gene", "IN", "my-list")
1340 >>> print query.summarise("length")["average"]
1341 ... 12345.67890
1342 >>> print query.summarise("organism.name")["Drosophila simulans"]
1343 ... 98
1344
1345 This method allows you to get statistics summarising the information
1346 from just one column of a query. For numerical columns you get dictionary with
1347 four keys ('average', 'stdev', 'max', 'min'), and for non-numerical
1348 columns you get a dictionary where each item is a key and the values
1349 are the number of occurrences of this value in the column.
1350
1351 Any key word arguments will be passed to the underlying results call -
1352 so you can limit the result size to the top 100 items by passing "size = 100"
1353 as part of the call.
1354
1355 @see: L{intermine.query.Query.results}
1356
1357 @param summary_path: The column to summarise (either in long or short form)
1358 @type summary_path: str or L{intermine.model.Path}
1359
1360 @rtype: dict
1361 This method is sugar for particular combinations of calls to L{results}.
1362 """
1363 p = self.model.make_path(self.prefix_path(summary_path), self.get_subclass_dict())
1364 results = self.results(summary_path = summary_path, **kwargs)
1365 if p.end.type_name in Model.NUMERIC_TYPES:
1366 return dict([ (k, float(v)) for k, v in results.next().iteritems()])
1367 else:
1368 return dict([ (r["item"], r["count"]) for r in results])
1369
1370 - def one(self, row="jsonobjects"):
1371 """Return one result, and raise an error if the result size is not 1"""
1372 if row == "jsonobjects":
1373 if self.count() == 1:
1374 return self.first(row)
1375 else:
1376 ret = None
1377 for obj in self.results():
1378 if ret is not None:
1379 raise QueryError("More than one result received")
1380 else:
1381 ret = obj
1382 if ret is None:
1383 raise QueryError("No results received")
1384
1385 return ret
1386 else:
1387 c = self.count()
1388 if (c != 1):
1389 raise QueryError("Result size is not one: got %d results" % (c))
1390 else:
1391 return self.first(row)
1392
1393 - def first(self, row="jsonobjects", start=0, **kw):
1394 """Return the first result, or None if the results are empty"""
1395 if row == "jsonobjects":
1396 size = None
1397 else:
1398 size = 1
1399 try:
1400 return self.results(row, start=start, size=size, **kw).next()
1401 except StopIteration:
1402 return None
1403
1405 """
1406 Get a list of result rows
1407 =========================
1408
1409 This method is a shortcut so that you do not have to
1410 do a list comprehension yourself on the iterator that
1411 is normally returned. If you have a very large result
1412 set (and these can get up to 100's of thousands or rows
1413 pretty easily) you will not want to
1414 have the whole list in memory at once, but there may
1415 be other circumstances when you might want to keep the whole
1416 list in one place.
1417
1418 It takes all the same arguments and parameters as Query.results
1419
1420 Also available as Query.all
1421
1422 @see: L{intermine.query.Query.results}
1423
1424 """
1425 rows = self.results(*args, **kwargs)
1426 return [r for r in rows]
1427
1430
1432 """
1433 Return the total number of rows this query returns
1434 ==================================================
1435
1436 Obtain the number of rows a particular query will
1437 return, without having to fetch and parse all the
1438 actual data. This method makes a request to the server
1439 to report the count for the query, and is sugar for a
1440 results call.
1441
1442 Also available as Query.size
1443
1444 @rtype: int
1445 @raise WebserviceError: if the request is unsuccessful.
1446 """
1447 count_str = ""
1448 for row in self.results(row = "count"):
1449 count_str += row
1450 try:
1451 return int(count_str)
1452 except ValueError:
1453 raise ResultError("Server returned a non-integer count: " + count_str)
1454
1456 """
1457 Returns the uri to use to create a list from this query
1458 =======================================================
1459
1460 Query.get_list_upload_uri() -> str
1461
1462 This method is used internally when performing list operations
1463 on queries.
1464
1465 @rtype: str
1466 """
1467 return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH
1468
1470 """
1471 Returns the uri to use to create a list from this query
1472 =======================================================
1473
1474 Query.get_list_append_uri() -> str
1475
1476 This method is used internally when performing list operations
1477 on queries.
1478
1479 @rtype: str
1480 """
1481 return self.service.root + self.service.QUERY_LIST_APPEND_PATH
1482
1483
1485 """
1486 Returns the path section pointing to the REST resource
1487 ======================================================
1488
1489 Query.get_results_path() -> str
1490
1491 Internally, this just calls a constant property
1492 in intermine.service.Service
1493
1494 @rtype: str
1495 """
1496 return self.service.QUERY_PATH
1497
1498
1500 """
1501 Returns the child objects of the query
1502 ======================================
1503
1504 This method is used during the serialisation of queries
1505 to xml. It is unlikely you will need access to this as a whole.
1506 Consider using "path_descriptions", "joins", "constraints" instead
1507
1508 @see: Query.path_descriptions
1509 @see: Query.joins
1510 @see: Query.constraints
1511
1512 @return: the child element of this query
1513 @rtype: list
1514 """
1515 return sum([self.path_descriptions, self.joins, self.constraints], [])
1516
1518 """
1519 Implementation of trait that allows use of these objects as queries (casting).
1520 """
1521 return self
1522
1524 """
1525 Implementation of trait that allows use of these objects in list constraints
1526 """
1527 l = self.service.create_list(self)
1528 return ConstraintNode(path, op, l.name)
1529
1531 """
1532 Returns the parameters to be passed to the webservice
1533 =====================================================
1534
1535 The query is responsible for producing its own query
1536 parameters. These consist simply of:
1537 - query: the xml representation of the query
1538
1539 @rtype: dict
1540
1541 """
1542 xml = self.to_xml()
1543 params = {'query' : xml }
1544 return params
1545
1547 """
1548 Returns a DOM node representing the query
1549 =========================================
1550
1551 This is an intermediate step in the creation of the
1552 xml serialised version of the query. You probably
1553 won't need to call this directly.
1554
1555 @rtype: xml.minidom.Node
1556 """
1557 impl = getDOMImplementation()
1558 doc = impl.createDocument(None, "query", None)
1559 query = doc.documentElement
1560
1561 query.setAttribute('name', self.name)
1562 query.setAttribute('model', self.model.name)
1563 query.setAttribute('view', ' '.join(self.views))
1564 query.setAttribute('sortOrder', str(self.get_sort_order()))
1565 query.setAttribute('longDescription', self.description)
1566 if len(self.coded_constraints) > 1:
1567 query.setAttribute('constraintLogic', str(self.get_logic()))
1568
1569 for c in self.children():
1570 element = doc.createElement(c.child_type)
1571 for name, value in c.to_dict().items():
1572 if isinstance(value, (set, list)):
1573 for v in value:
1574 subelement = doc.createElement(name)
1575 text = doc.createTextNode(v)
1576 subelement.appendChild(text)
1577 element.appendChild(subelement)
1578 else:
1579 element.setAttribute(name, value)
1580 query.appendChild(element)
1581 return query
1582
1584 """
1585 Return an XML serialisation of the query
1586 ========================================
1587
1588 This method serialises the current state of the query to an
1589 xml string, suitable for storing, or sending over the
1590 internet to the webservice.
1591
1592 @return: the serialised xml string
1593 @rtype: string
1594 """
1595 n = self.to_Node()
1596 return n.toxml()
1597
1612
1614 """
1615 Performs a deep clone
1616 =====================
1617
1618 This method will produce a clone that is independent,
1619 and can be altered without affecting the original,
1620 but starts off with the exact same state as it.
1621
1622 The only shared elements should be the model
1623 and the service, which are shared by all queries
1624 that refer to the same webservice.
1625
1626 @return: same class as caller
1627 """
1628 newobj = self.__class__(self.model)
1629 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict", "uncoded_constraints"]:
1630 setattr(newobj, attr, deepcopy(getattr(self, attr)))
1631
1632 for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]:
1633 setattr(newobj, attr, getattr(self, attr))
1634 return newobj
1635
1637 """
1638 A Class representing a predefined query
1639 =======================================
1640
1641 Templates are ways of saving queries
1642 and allowing others to run them
1643 simply. They are the main interface
1644 to querying in the webapp
1645
1646 SYNOPSIS
1647 --------
1648
1649 example::
1650
1651 service = Service("http://www.flymine.org/query/service")
1652 template = service.get_template("Gene_Pathways")
1653 for row in template.results(A={"value":"eve"}):
1654 process_row(row)
1655 ...
1656
1657 A template is a subclass of query that comes predefined. They
1658 are typically retrieved from the webservice and run by specifying
1659 the values for their existing constraints. They are a concise
1660 and powerful way of running queries in the webapp.
1661
1662 Being subclasses of query, everything is true of them that is true
1663 of a query. They are just less work, as you don't have to design each
1664 one. Also, you can store your own templates in the web-app, and then
1665 access them as a private webservice method, from anywhere, making them
1666 a kind of query in the cloud - for this you will need to authenticate
1667 by providing log in details to the service.
1668
1669 The most significant difference is how constraint values are specified
1670 for each set of results.
1671
1672 @see: L{Template.results}
1673
1674 """
1676 """
1677 Constructor
1678 ===========
1679
1680 Instantiation is identical that of queries. As with queries,
1681 these are best obtained from the intermine.webservice.Service
1682 factory methods.
1683
1684 @see: L{intermine.webservice.Service.get_template}
1685 """
1686 super(Template, self).__init__(*args, **kwargs)
1687 self.constraint_factory = constraints.TemplateConstraintFactory()
1688 @property
1690 """
1691 Return the list of constraints you can edit
1692 ===========================================
1693
1694 Template.editable_constraints -> list(intermine.constraints.Constraint)
1695
1696 Templates have a concept of editable constraints, which
1697 is a way of hiding complexity from users. An underlying query may have
1698 five constraints, but only expose the one that is actually
1699 interesting. This property returns this subset of constraints
1700 that have the editable flag set to true.
1701 """
1702 isEditable = lambda x: x.editable
1703 return filter(isEditable, self.constraints)
1704
1706 """
1707 Returns the query parameters needed for the webservice
1708 ======================================================
1709
1710 Template.to_query_params() -> dict(string, string)
1711
1712 Overrides the method of the same name in query to provide the
1713 parameters needed by the templates results service. These
1714 are slightly more complex:
1715 - name: The template's name
1716 - for each constraint: (where [i] is an integer incremented for each constraint)
1717 - constraint[i]: the path
1718 - op[i]: the operator
1719 - value[i]: the value
1720 - code[i]: the code
1721 - extra[i]: the extra value for ternary constraints (optional)
1722
1723
1724 @rtype: dict
1725 """
1726 p = {'name' : self.name}
1727 i = 1
1728 for c in self.editable_constraints:
1729 if not c.switched_on: next
1730 for k, v in c.to_dict().items():
1731 if k == "extraValue": k = "extra"
1732 if k == "path": k = "constraint"
1733 p[k + str(i)] = v
1734 i += 1
1735 return p
1736
1738 """
1739 Returns the path section pointing to the REST resource
1740 ======================================================
1741
1742 Template.get_results_path() S{->} str
1743
1744 Internally, this just calls a constant property
1745 in intermine.service.Service
1746
1747 This overrides the method of the same name in Query
1748
1749 @return: the path to the REST resource
1750 @rtype: string
1751 """
1752 return self.service.TEMPLATEQUERY_PATH
1753
1755 """
1756 Gets a template to run
1757 ======================
1758
1759 Template.get_adjusted_template(con_values) S{->} Template
1760
1761 When templates are run, they are first cloned, and their
1762 values are changed to those desired. This leaves the original
1763 template unchanged so it can be run again with different
1764 values. This method does the cloning and changing of constraint
1765 values
1766
1767 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1768
1769 @rtype: L{Template}
1770 """
1771 clone = self.clone()
1772 for code, options in con_values.items():
1773 con = clone.get_constraint(code)
1774 if not con.editable:
1775 raise ConstraintError("There is a constraint '" + code
1776 + "' on this query, but it is not editable")
1777 try:
1778 for key, value in options.items():
1779 setattr(con, key, value)
1780 except AttributeError:
1781 setattr(con, "value", options)
1782 return clone
1783
1784 - def results(self, row="object", start=0, size=None, **con_values):
1785 """
1786 Get an iterator over result rows
1787 ================================
1788
1789 This method returns the same values with the
1790 same options as the method of the same name in
1791 Query (see intermine.query.Query). The main difference in in the
1792 arguments.
1793
1794 The template result methods also accept a key-word pair
1795 set of arguments that are used to supply values
1796 to the editable constraints. eg::
1797
1798 template.results(
1799 A = {"value": "eve"},
1800 B = {"op": ">", "value": 5000}
1801 )
1802
1803 The keys should be codes for editable constraints (you can inspect these
1804 with Template.editable_constraints) and the values should be a dictionary
1805 of constraint properties to replace. You can replace the values for
1806 "op" (operator), "value", and "extra_value" and "values" in the case of
1807 ternary and multi constraints.
1808
1809 @rtype: L{intermine.webservice.ResultIterator}
1810 """
1811 clone = self.get_adjusted_template(con_values)
1812 return super(Template, clone).results(row, start, size)
1813
1815 """
1816 Get a list of result rows
1817 =========================
1818
1819 This method performs the same as the method of the
1820 same name in Query, and it shares the semantics of
1821 Template.results().
1822
1823 @see: L{intermine.query.Query.get_results_list}
1824 @see: L{intermine.query.Template.results}
1825
1826 @rtype: list
1827
1828 """
1829 clone = self.get_adjusted_template(con_values)
1830 return super(Template, clone).get_results_list(row, start, size)
1831
1836
1837 - def rows(self, start=0, size=None, **con_values):
1841
1842 - def count(self, **con_values):
1843 """
1844 Return the total number of rows this template returns
1845 =====================================================
1846
1847 Obtain the number of rows a particular query will
1848 return, without having to fetch and parse all the
1849 actual data. This method makes a request to the server
1850 to report the count for the query, and is sugar for a
1851 results call.
1852
1853 @rtype: int
1854 @raise WebserviceError: if the request is unsuccessful.
1855 """
1856 clone = self.get_adjusted_template(con_values)
1857 return super(Template, clone).count()
1858
1862
1865
1868
1871