1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from intermine.util import openAnything, ReadableException
6 from intermine.pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 from intermine.model import Column, Class
8 import constraints
9
10 """
11 Classes representing queries against webservices
12 ================================================
13
14 Representations of queries, and templates.
15
16 """
17
18 __author__ = "Alex Kalderimis"
19 __organization__ = "InterMine"
20 __license__ = "LGPL"
21 __contact__ = "dev@intermine.org"
22
23
24 -class Query(object):
25 """
26 A Class representing a structured database query
27 ================================================
28
29 Objects of this class have properties that model the
30 attributes of the query, and methods for performing
31 the request.
32
33 SYNOPSIS
34 --------
35
36 example:
37
38 >>> service = Service("http://www.flymine.org/query/service")
39 >>> query = service.new_query()
40 >>>
41 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
42 >>> query.add_sort_order("Gene.pathways.name")
43 >>>
44 >>> query.add_constraint("Gene", "LOOKUP", "eve")
45 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
46 >>>
47 >>> query.set_logic("A or B")
48 >>>
49 >>> for row in query.results():
50 ... handle_row(row)
51
52 OR, using an SQL style DSL:
53
54 >>> s = Service("www.flymine.org/query")
55 >>> query = s.query("Gene").\
56 ... select("*", "pathways.*").\
57 ... where("symbol", "=", "H").\
58 ... outerjoin("pathways").\
59 ... order_by("symbol")
60 >>> for row in query.results():
61 ... handle_row(row)
62
63 OR, for a more SQL-alchemy, ORM style:
64
65 >>>
66 >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles):
67 ... handle(gene)
68
69 Query objects represent structured requests for information over the database
70 housed at the datawarehouse whose webservice you are querying. They utilise
71 some of the concepts of relational databases, within an object-related
72 ORM context. If you don't know what that means, don't worry: you
73 don't need to write SQL, and the queries will be fast.
74
75 To make things slightly more familiar to those with knowledge of SQL, some syntactical
76 sugar is provided to make constructing queries a bit more recognisable.
77
78 PRINCIPLES
79 ----------
80
81 The data model represents tables in the databases as classes, with records
82 within tables as instances of that class. The columns of the database are the
83 fields of that object::
84
85 The Gene table - showing two records/objects
86 +---------------------------------------------------+
87 | id | symbol | length | cyto-location | organism |
88 +----------------------------------------+----------+
89 | 01 | eve | 1539 | 46C10-46C10 | 01 |
90 +----------------------------------------+----------+
91 | 02 | zen | 1331 | 84A5-84A5 | 01 |
92 +----------------------------------------+----------+
93 ...
94
95 The organism table - showing one record/object
96 +----------------------------------+
97 | id | name | taxon id |
98 +----------------------------------+
99 | 01 | D. melanogaster | 7227 |
100 +----------------------------------+
101
102 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
103 everything except the id columns). The other columns (such as "organism" in the gene table)
104 are ones that reference records of other tables (ie. other objects), and are called
105 references. You can refer to any field in any class, that has a connection,
106 however tenuous, with a table, by using dotted path notation::
107
108 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
109
110 These paths, and the connections between records and tables they represent,
111 are the basis for the structure of InterMine queries.
112
113 THE STUCTURE OF A QUERY
114 -----------------------
115
116 A query has two principle sets of properties:
117 - its view: the set of output columns
118 - its constraints: the set of rules for what to include
119
120 A query must have at least one output column in its view, but constraints
121 are optional - if you don't include any, you will get back every record
122 from the table (every object of that type)
123
124 In addition, the query must be coherent: if you have information about
125 an organism, and you want a list of genes, then the "Gene" table
126 should be the basis for your query, and as such the Gene class, which
127 represents this table, should be the root of all the paths that appear in it:
128
129 So, to take a simple example::
130
131 I have an organism name, and I want a list of genes:
132
133 The view is the list of things I want to know about those genes:
134
135 >>> query.add_view("Gene.name")
136 >>> query.add_view("Gene.length")
137 >>> query.add_view("Gene.proteins.sequence.length")
138
139 Note I can freely mix attributes and references, as long as every view ends in
140 an attribute (a meaningful value). As a short-cut I can also write:
141
142 >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
143
144 or:
145
146 >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length")
147
148 They are all equivalent. You can also use common SQL style shortcuts such as "*" for all
149 attribute fields:
150
151 >>> query.add_views("Gene.*")
152
153 You can also use "select" as a synonymn for "add_view"
154
155 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
156
157 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
158
159 (note, here I can use "where" as a synonymn for "add_constraint")
160
161 If I run this query, I will get literally millions of results -
162 it needs to be filtered further:
163
164 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
165
166 If that doesn't restrict things enough I can add more filters:
167
168 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
169
170 Now I am guaranteed to get only information on genes I am interested in.
171
172 Note, though, that because I have included the link (or "join") from Gene -> Protein,
173 this, by default, means that I only want genes that have protein information associated
174 with them. If in fact I want information on all genes, and just want to know the
175 protein information if it is available, then I can specify that with:
176
177 >>> query.add_join("Gene.proteins", "OUTER")
178
179 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
180 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
181 (as unlikely as that sounds), I can specify the logic for that too:
182
183 >>> query.set_logic("A and (B or C)")
184
185 Each letter refers to one of the constraints - the codes are assigned in the order you add
186 the constraints. If you want to be absolutely certain about the constraints you mean, you
187 can use the constraint objects themselves:
188
189 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
190 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
191 >>>
192 >>> query.set_logic(gene_is_eve | gene_is_zen)
193
194 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
195
196 Putting it all together:
197
198 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
199 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
200 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
201 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
202 >>> query.add_join("Gene.proteins", "OUTER")
203 >>> query.set_logic("A and (B or C)")
204
205 This can be made more concise and readable with a little DSL sugar:
206
207 >>> query = service.query("Gene")
208 >>> query.select("name", "length", "proteins.sequence.length").\
209 ... where('organism.name' '=', 'D. melanogaster').\
210 ... where("proteins.sequence.length", "<", 500).\
211 ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\
212 ... outerjoin('proteins').\
213 ... set_logic("A and (B or C)")
214
215 And the query is defined.
216
217 Result Processing
218 -----------------
219
220 calling ".results()" on a query will return an iterator of rows, where each row
221 is a ResultRow object, which can be treated as both a list and a dictionary.
222
223 Which means you can refer to columns by name:
224
225 >>> for row in query.results():
226 ... print "name is %s" % (row["name"])
227 ... print "length is %d" % (row["length"])
228
229 As well as using list indices:
230
231 >>> for row in query.results():
232 ... print "The first column is %s" % (row[0])
233
234 Iterating over a row iterates over the cell values as a list:
235
236 >>> for row in query.results():
237 ... for column in row:
238 ... do_something(column)
239
240 Here each row will have a gene name, a gene length, and a sequence length, eg:
241
242 >>> print row.to_l
243 ["even skipped", "1359", "376"]
244
245 To make that clearer, you can ask for a dictionary instead of a list:
246
247 >>> for row in query.result()
248 ... print row.to_d
249 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
250
251
252 If you just want the raw results, for printing to a file, or for piping to another program,
253 you can request strings instead:
254
255 >>> for row in query.result("string")
256 ... print(row)
257
258
259 Getting us to Generate your Code
260 --------------------------------
261
262 Not that you have to actually write any of this! The webapp will happily
263 generate the code for any query (and template) you can build in it. A good way to get
264 started is to use the webapp to generate your code, and then run it as scripts
265 to speed up your queries. You can always tinker with and edit the scripts you download.
266
267 To get generated queries, look for the "python" link at the bottom of query-builder and
268 template form pages, it looks a bit like this::
269
270 . +=====================================+=============
271 | |
272 | Perl | Python | Java [Help] |
273 | |
274 +==============================================
275
276 """
277
278 SO_SPLIT_PATTERN = re.compile("\s*(asc|desc)\s*", re.I)
279 LOGIC_SPLIT_PATTERN = re.compile("\s*(?:and|or|\(|\))\s*", re.I)
280 TRAILING_OP_PATTERN = re.compile("\s*(and|or)\s*$", re.I)
281 LEADING_OP_PATTERN = re.compile("^\s*(and|or)\s*", re.I)
282 LOGIC_OPS = ["and", "or"]
283 LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS]
284
285 - def __init__(self, model, service=None, validate=True, root=None):
286 """
287 Construct a new Query
288 =====================
289
290 Construct a new query for making database queries
291 against an InterMine data warehouse.
292
293 Normally you would not need to use this constructor
294 directly, but instead use the factory method on
295 intermine.webservice.Service, which will handle construction
296 for you.
297
298 @param model: an instance of L{intermine.model.Model}. Required
299 @param service: an instance of l{intermine.service.Service}. Optional,
300 but you will not be able to make requests without one.
301 @param validate: a boolean - defaults to True. If set to false, the query
302 will not try and validate itself. You should not set this to false.
303
304 """
305 self.model = model
306 if root is None:
307 self.root = root
308 else:
309 self.root = model.make_path(root).root
310
311 self.name = ''
312 self.description = ''
313 self.service = service
314 self.do_verification = validate
315 self.path_descriptions = []
316 self.joins = []
317 self.constraint_dict = {}
318 self.uncoded_constraints = []
319 self.views = []
320 self._sort_order_list = SortOrderList()
321 self._logic_parser = constraints.LogicParser(self)
322 self._logic = None
323 self.constraint_factory = constraints.ConstraintFactory()
324
325
326 self.c = self.column
327 self.filter = self.where
328 self.add_column = self.add_view
329 self.add_columns = self.add_view
330 self.add_views = self.add_view
331 self.select = self.add_view
332 self.order_by = self.add_sort_order
333 self.all = self.get_results_list
334 self.rows = self.results
335
337 return self.results("jsonobjects")
338
339 @classmethod
340 - def from_xml(cls, xml, *args, **kwargs):
341 """
342 Deserialise a query serialised to XML
343 =====================================
344
345 This method is used to instantiate serialised queries.
346 It is used by intermine.webservice.Service objects
347 to instantiate Template objects and it can be used
348 to read in queries you have saved to a file.
349
350 @param xml: The xml as a file name, url, or string
351
352 @raise QueryParseError: if the query cannot be parsed
353 @raise ModelError: if the query has illegal paths in it
354 @raise ConstraintError: if the constraints don't make sense
355
356 @rtype: L{Query}
357 """
358 obj = cls(*args, **kwargs)
359 obj.do_verification = False
360 f = openAnything(xml)
361 doc = minidom.parse(f)
362 f.close()
363
364 queries = doc.getElementsByTagName('query')
365 assert len(queries) == 1, "wrong number of queries in xml"
366 q = queries[0]
367 obj.name = q.getAttribute('name')
368 obj.description = q.getAttribute('description')
369 obj.add_view(q.getAttribute('view'))
370 for p in q.getElementsByTagName('pathDescription'):
371 path = p.getAttribute('pathString')
372 description = p.getAttribute('description')
373 obj.add_path_description(path, description)
374 for j in q.getElementsByTagName('join'):
375 path = j.getAttribute('path')
376 style = j.getAttribute('style')
377 obj.add_join(path, style)
378 for c in q.getElementsByTagName('constraint'):
379 args = {}
380 args['path'] = c.getAttribute('path')
381 if args['path'] is None:
382 if c.parentNode.tagName != "node":
383 msg = "Constraints must have a path"
384 raise QueryParseError(msg)
385 args['path'] = c.parentNode.getAttribute('path')
386 args['op'] = c.getAttribute('op')
387 args['value'] = c.getAttribute('value')
388 args['code'] = c.getAttribute('code')
389 args['subclass'] = c.getAttribute('type')
390 args['editable'] = c.getAttribute('editable')
391 args['optional'] = c.getAttribute('switchable')
392 args['extra_value'] = c.getAttribute('extraValue')
393 args['loopPath'] = c.getAttribute('loopPath')
394 values = []
395 for val_e in c.getElementsByTagName('value'):
396 texts = []
397 for node in val_e.childNodes:
398 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
399 values.append(' '.join(texts))
400 if len(values) > 0: args["values"] = values
401 for k, v in args.items():
402 if v is None or v == '': del args[k]
403 if "loopPath" in args:
404 args["op"] = {
405 "=" : "IS",
406 "!=": "IS NOT"
407 }.get(args["op"])
408 con = obj.add_constraint(**args)
409 if not con:
410 raise ConstraintError("error adding constraint with args: " + args)
411
412 def group(iterator, count):
413 itr = iter(iterator)
414 while True:
415 yield tuple([itr.next() for i in range(count)])
416
417 if q.getAttribute('sortOrder') is not None:
418 sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder'))
419 if len(sos) == 1:
420 if sos[0] in obj.views:
421 obj.add_sort_order(sos[0])
422 else:
423 sos.pop()
424 for path, direction in group(sos, 2):
425 if path in obj.views:
426 obj.add_sort_order(path, direction)
427
428 if q.getAttribute('constraintLogic') is not None:
429 logic = q.getAttribute('constraintLogic')
430 used_codes = set(obj.constraint_dict.keys())
431 logic_codes = set(Query.LOGIC_SPLIT_PATTERN.split(logic))
432 if "" in logic_codes:
433 logic_codes.remove("")
434 irrelevant_codes = logic_codes - used_codes
435 for c in irrelevant_codes:
436 pattern = re.compile("((and|or)\s+)?\\b" + c + "\\b(\s+(and|or))?", re.I)
437 logic = pattern.sub("", logic)
438
439 logic = re.sub("\(\s*\)", "", logic)
440
441 logic = Query.LEADING_OP_PATTERN.sub("", logic)
442 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
443 for left, right in Query.LOGIC_PRODUCT:
444 if left == right:
445 repl = left
446 else:
447 repl = "and"
448 pattern = re.compile(left + "\s*" + right, re.I)
449 logic = pattern.sub(repl, logic)
450 logic = logic.strip().lstrip()
451 try:
452 if len(logic) > 0:
453 obj.set_logic(logic)
454 except Exception, e:
455 raise Exception("Error parsing " + q.getAttribute('constraintLogic') + " => " + repr(logic) + " with views: " + repr(used_codes) + e.message)
456
457 obj.verify()
458
459 return obj
460
463
465 """
466 Validate the query
467 ==================
468
469 Invalid queries will fail to run, and it is not always
470 obvious why. The validation routine checks to see that
471 the query will not cause errors on execution, and tries to
472 provide informative error messages.
473
474 This method is called immediately after a query is fully
475 deserialised.
476
477 @raise ModelError: if the paths are invalid
478 @raise QueryError: if there are errors in query construction
479 @raise ConstraintError: if there are errors in constraint construction
480
481 """
482 self.verify_views()
483 self.verify_constraint_paths()
484 self.verify_join_paths()
485 self.verify_pd_paths()
486 self.validate_sort_order()
487 self.do_verification = True
488
490 """
491 Add one or more views to the list of output columns
492 ===================================================
493
494 example::
495
496 query.add_view("Gene.name Gene.organism.name")
497
498 This is the main method for adding views to the list
499 of output columns. As well as appending views, it
500 will also split a single, space or comma delimited
501 string into multiple paths, and flatten out lists, or any
502 combination. It will also immediately try to validate
503 the views.
504
505 Output columns must be valid paths according to the
506 data model, and they must represent attributes of tables
507
508 @see: intermine.model.Model
509 @see: intermine.model.Path
510 @see: intermine.model.Attribute
511 """
512 views = []
513 for p in paths:
514 if isinstance(p, (set, list)):
515 views.extend(list(p))
516 elif isinstance(p, Class):
517 views.append(p.name + ".*")
518 elif isinstance(p, Column):
519 if p._path.is_attribute():
520 views.append(str(p))
521 else:
522 views.append(str(p) + ".*")
523 else:
524 views.extend(re.split("(?:,?\s+|,)", p))
525
526 views = map(self.prefix_path, views)
527
528 views_to_add = []
529 for view in views:
530 if view.endswith(".*"):
531 view = re.sub("\.\*$", "", view)
532 path = self.model.make_path(view, self.get_subclass_dict())
533 cd = path.end_class
534 attr_views = map(lambda x: view + "." + x.name, cd.attributes)
535 views_to_add.extend(attr_views)
536 else:
537 views_to_add.append(view)
538
539 if self.do_verification:
540 self.verify_views(views_to_add)
541
542 self.views.extend(views_to_add)
543
544 return self
545
555
557 """
558 Clear the output column list
559 ============================
560
561 Deletes all entries currently in the view list.
562 """
563 self.views = []
564
566 """
567 Check to see if the views given are valid
568 =========================================
569
570 This method checks to see if the views:
571 - are valid according to the model
572 - represent attributes
573
574 @see: L{intermine.model.Attribute}
575
576 @raise intermine.model.ModelError: if the paths are invalid
577 @raise ConstraintError: if the paths are not attributes
578 """
579 if views is None: views = self.views
580 for path in views:
581 path = self.model.make_path(path, self.get_subclass_dict())
582 if not path.is_attribute():
583 raise ConstraintError("'" + str(path)
584 + "' does not represent an attribute")
585
587 """
588 Add a constraint (filter on records)
589 ====================================
590
591 example::
592
593 query.add_constraint("Gene.symbol", "=", "zen")
594
595 This method will try to make a constraint from the arguments
596 given, trying each of the classes it knows of in turn
597 to see if they accept the arguments. This allows you
598 to add constraints of different types without having to know
599 or care what their classes or implementation details are.
600 All constraints derive from intermine.constraints.Constraint,
601 and they all have a path attribute, but are otherwise diverse.
602
603 Before adding the constraint to the query, this method
604 will also try to check that the constraint is valid by
605 calling Query.verify_constraint_paths()
606
607 @see: L{intermine.constraints}
608
609 @rtype: L{intermine.constraints.Constraint}
610 """
611 if len(args) == 1 and len(kwargs) == 0:
612 if isinstance(args[0], tuple):
613 con = self.constraint_factory.make_constraint(*args[0])
614 else:
615 con = args[0]
616 else:
617 con = self.constraint_factory.make_constraint(*args, **kwargs)
618
619 con.path = self.prefix_path(con.path)
620 if self.do_verification: self.verify_constraint_paths([con])
621 if hasattr(con, "code"):
622 self.constraint_dict[con.code] = con
623 else:
624 self.uncoded_constraints.append(con)
625
626 return con
627
628 - def where(self, *args, **kwargs):
629 """
630 Add a constraint to the query
631 =============================
632
633 In contrast to add_constraint, this method also adds all attributes to the query
634 if no view has been set, and returns self to support method chaining.
635 """
636 if len(self.views) == 0:
637 self.add_view(self.root)
638
639 self.add_constraint(*args, **kwargs)
640 return self
641
644
646 """
647 Check that the constraints are valid
648 ====================================
649
650 This method will check the path attribute of each constraint.
651 In addition it will:
652 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
653 - Check that TernaryConstraints have a Reference as theirs
654 - Check that SubClassConstraints have a correct subclass relationship
655 - Check that LoopConstraints have a valid loopPath, of a compatible type
656 - Check that ListConstraints refer to an object
657
658 @param cons: The constraints to check (defaults to all constraints on the query)
659
660 @raise ModelError: if the paths are not valid
661 @raise ConstraintError: if the constraints do not satisfy the above rules
662
663 """
664 if cons is None: cons = self.constraints
665 for con in cons:
666 pathA = self.model.make_path(con.path, self.get_subclass_dict())
667 if isinstance(con, constraints.TernaryConstraint):
668 if pathA.get_class() is None:
669 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
670 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
671 if not pathA.is_attribute():
672 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
673 elif isinstance(con, constraints.SubClassConstraint):
674 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
675 if not pathB.get_class().isa(pathA.get_class()):
676 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
677 elif isinstance(con, constraints.LoopConstraint):
678 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
679 for path in [pathA, pathB]:
680 if not path.get_class():
681 raise ConstraintError("'" + str(path) + "' does not refer to an object")
682 (classA, classB) = (pathA.get_class(), pathB.get_class())
683 if not classA.isa(classB) and not classB.isa(classA):
684 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
685 elif isinstance(con, constraints.ListConstraint):
686 if not pathA.get_class():
687 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
688
689 @property
691 """
692 Returns the constraints of the query
693 ====================================
694
695 Query.constraints S{->} list(intermine.constraints.Constraint)
696
697 Constraints are returned in the order of their code (normally
698 the order they were added to the query) and with any
699 subclass contraints at the end.
700
701 @rtype: list(Constraint)
702 """
703 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
704 ret.extend(self.uncoded_constraints)
705 return ret
706
708 """
709 Returns the constraint with the given code
710 ==========================================
711
712 Returns the constraint with the given code, if if exists.
713 If no such constraint exists, it throws a ConstraintError
714
715 @return: the constraint corresponding to the given code
716 @rtype: L{intermine.constraints.CodedConstraint}
717 """
718 if code in self.constraint_dict:
719 return self.constraint_dict[code]
720 else:
721 raise ConstraintError("There is no constraint with the code '"
722 + code + "' on this query")
723
725 """
726 Add a join statement to the query
727 =================================
728
729 example::
730
731 query.add_join("Gene.proteins", "OUTER")
732
733 A join statement is used to determine if references should
734 restrict the result set by only including those references
735 exist. For example, if one had a query with the view::
736
737 "Gene.name", "Gene.proteins.name"
738
739 Then in the normal case (that of an INNER join), we would only
740 get Genes that also have at least one protein that they reference.
741 Simply by asking for this output column you are placing a
742 restriction on the information you get back.
743
744 If in fact you wanted all genes, regardless of whether they had
745 proteins associated with them or not, but if they did
746 you would rather like to know _what_ proteins, then you need
747 to specify this reference to be an OUTER join::
748
749 query.add_join("Gene.proteins", "OUTER")
750
751 Now you will get many more rows of results, some of which will
752 have "null" values where the protein name would have been,
753
754 This method will also attempt to validate the join by calling
755 Query.verify_join_paths(). Joins must have a valid path, the
756 style can be either INNER or OUTER (defaults to OUTER,
757 as the user does not need to specify inner joins, since all
758 references start out as inner joins), and the path
759 must be a reference.
760
761 @raise ModelError: if the path is invalid
762 @raise TypeError: if the join style is invalid
763
764 @rtype: L{intermine.pathfeatures.Join}
765 """
766 join = Join(*args, **kwargs)
767 join.path = self.prefix_path(join.path)
768 if self.do_verification: self.verify_join_paths([join])
769 self.joins.append(join)
770 return self
771
773 """Alias for add_join(column, "OUTER")"""
774 return self.add_join(str(column), "OUTER")
775
777 """
778 Check that the joins are valid
779 ==============================
780
781 Joins must have valid paths, and they must refer to references.
782
783 @raise ModelError: if the paths are invalid
784 @raise QueryError: if the paths are not references
785 """
786 if joins is None: joins = self.joins
787 for join in joins:
788 path = self.model.make_path(join.path, self.get_subclass_dict())
789 if not path.is_reference():
790 raise QueryError("'" + join.path + "' is not a reference")
791
793 """
794 Add a path description to the query
795 ===================================
796
797 example::
798
799 query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain")
800
801 This allows you to alias the components of long paths to
802 improve the way they display column headers in a variety of circumstances.
803 In the above example, if the view included the unwieldy path
804 "Gene.proteins.proteinDomains.primaryIdentifier", it would (depending on the
805 mine) be displayed as "Protein Domain > DB Identifer". These
806 setting are taken into account by the webservice when generating
807 column headers for flat-file results with the columnheaders parameter given, and
808 always supplied when requesting jsontable results.
809
810 @rtype: L{intermine.pathfeatures.PathDescription}
811
812 """
813 path_description = PathDescription(*args, **kwargs)
814 path_description.path = self.prefix_path(path_description.path)
815 if self.do_verification: self.verify_pd_paths([path_description])
816 self.path_descriptions.append(path_description)
817 return path_description
818
820 """
821 Check that the path of the path description is valid
822 ====================================================
823
824 Checks for consistency with the data model
825
826 @raise ModelError: if the paths are invalid
827 """
828 if pds is None: pds = self.path_descriptions
829 for pd in pds:
830 self.model.validate_path(pd.path, self.get_subclass_dict())
831
832 @property
834 """
835 Returns the list of constraints that have a code
836 ================================================
837
838 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
839
840 This returns an up to date list of the constraints that can
841 be used in a logic expression. The only kind of constraint
842 that this excludes, at present, is SubClassConstraints
843
844 @rtype: list(L{intermine.constraints.CodedConstraint})
845 """
846 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
847
849 """
850 Returns the logic expression for the query
851 ==========================================
852
853 This returns the up to date logic expression. The default
854 value is the representation of all coded constraints and'ed together.
855
856 If the logic is empty and there are no constraints, returns an
857 empty string.
858
859 The LogicGroup object stringifies to a string that can be parsed to
860 obtain itself (eg: "A and (B or C or D)").
861
862 @rtype: L{intermine.constraints.LogicGroup}
863 """
864 if self._logic is None:
865 if len(self.coded_constraints) > 0:
866 return reduce(lambda x, y: x+y, self.coded_constraints)
867 else:
868 return ""
869 else:
870 return self._logic
871
873 """
874 Sets the Logic given the appropriate input
875 ==========================================
876
877 example::
878
879 Query.set_logic("A and (B or C)")
880
881 This sets the logic to the appropriate value. If the value is
882 already a LogicGroup, it is accepted, otherwise
883 the string is tokenised and parsed.
884
885 The logic is then validated with a call to validate_logic()
886
887 raise LogicParseError: if there is a syntax error in the logic
888 """
889 if isinstance(value, constraints.LogicGroup):
890 logic = value
891 else:
892 logic = self._logic_parser.parse(value)
893 if self.do_verification: self.validate_logic(logic)
894 self._logic = logic
895 return self
896
898 """
899 Validates the query logic
900 =========================
901
902 Attempts to validate the logic by checking
903 that every coded_constraint is included
904 at least once
905
906 @raise QueryError: if not every coded constraint is represented
907 """
908 if logic is None: logic = self._logic
909 logic_codes = set(logic.get_codes())
910 for con in self.coded_constraints:
911 if con.code not in logic_codes:
912 raise QueryError("Constraint " + con.code + repr(con)
913 + " is not mentioned in the logic: " + str(logic))
914
916 """
917 Gets the sort order when none has been specified
918 ================================================
919
920 This method is called to determine the sort order if
921 none is specified
922
923 @raise QueryError: if the view is empty
924
925 @rtype: L{intermine.pathfeatures.SortOrderList}
926 """
927 try:
928 return SortOrderList((self.views[0], SortOrder.ASC))
929 except IndexError:
930 raise QueryError("Query view is empty")
931
933 """
934 Return a sort order for the query
935 =================================
936
937 This method returns the sort order if set, otherwise
938 it returns the default sort order
939
940 @raise QueryError: if the view is empty
941
942 @rtype: L{intermine.pathfeatures.SortOrderList}
943 """
944 if self._sort_order_list.is_empty():
945 return self.get_default_sort_order()
946 else:
947 return self._sort_order_list
948
950 """
951 Adds a sort order to the query
952 ==============================
953
954 example::
955
956 Query.add_sort_order("Gene.name", "DESC")
957
958 This method adds a sort order to the query.
959 A query can have multiple sort orders, which are
960 assessed in sequence.
961
962 If a query has two sort-orders, for example,
963 the first being "Gene.organism.name asc",
964 and the second being "Gene.name desc", you would have
965 the list of genes grouped by organism, with the
966 lists within those groupings in reverse alphabetical
967 order by gene name.
968
969 This method will try to validate the sort order
970 by calling validate_sort_order()
971 """
972 so = SortOrder(str(path), direction)
973 so.path = self.prefix_path(so.path)
974 if self.do_verification: self.validate_sort_order(so)
975 self._sort_order_list.append(so)
976 return self
977
979 """
980 Check the validity of the sort order
981 ====================================
982
983 Checks that the sort order paths are:
984 - valid paths
985 - in the view
986
987 @raise QueryError: if the sort order is not in the view
988 @raise ModelError: if the path is invalid
989
990 """
991 if not so_elems:
992 so_elems = self._sort_order_list
993
994 for so in so_elems:
995 self.model.validate_path(so.path, self.get_subclass_dict())
996 if so.path not in self.views:
997 raise QueryError("Sort order element is not in the view: " + so.path)
998
1000 """
1001 Return the current mapping of class to subclass
1002 ===============================================
1003
1004 This method returns a mapping of classes used
1005 by the model for assessing whether certain paths are valid. For
1006 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
1007 you can refer to the .presentCall attributes of fly atlas results.
1008 MicroArrayResults do not have this attribute, and a path such as::
1009
1010 Gene.microArrayResult.presentCall
1011
1012 would be marked as invalid unless the dictionary is provided.
1013
1014 Users most likely will not need to ever call this method.
1015
1016 @rtype: dict(string, string)
1017 """
1018 subclass_dict = {}
1019 for c in self.constraints:
1020 if isinstance(c, constraints.SubClassConstraint):
1021 subclass_dict[c.path] = c.subclass
1022 return subclass_dict
1023
1024 - def results(self, row="rr", start=0, size=None):
1025 """
1026 Return an iterator over result rows
1027 ===================================
1028
1029 Usage::
1030
1031 for row in query.results():
1032 do_sth_with(row)
1033
1034 @param row: the format for the row. Defaults to "rr". Valid options are
1035 "rr", "dict", "list", "jsonrows", "jsonobject", "tsv", "csv".
1036 @type row: string
1037
1038 @rtype: L{intermine.webservice.ResultIterator}
1039
1040 @raise WebserviceError: if the request is unsuccessful
1041 """
1042 path = self.get_results_path()
1043 params = self.to_query_params()
1044 params["start"] = start
1045 if size:
1046 params["size"] = size
1047 view = self.views
1048 cld = self.root
1049 return self.service.get_results(path, params, row, view, cld)
1050
1051 - def one(self, row="rr"):
1052 """Return one result, and raise an error if the result size is not 1"""
1053 c = self.count()
1054 if (c != 1):
1055 raise "Result size is not one: got " + str(c) + " results"
1056 else:
1057 return self.first(row)
1058
1059 - def first(self, row="rr", start=0):
1060 """Return the first result, or None if the results are empty"""
1061 if row == "jsonobjects":
1062 size = None
1063 else:
1064 size = 1
1065 try:
1066 return self.results(row, start=start, size=size).next()
1067 except StopIteration:
1068 return None
1069
1071 """
1072 Get a list of result rows
1073 =========================
1074
1075 This method is a shortcut so that you do not have to
1076 do a list comprehension yourself on the iterator that
1077 is normally returned. If you have a very large result
1078 set (and these can get up to 100's of thousands or rows
1079 pretty easily) you will not want to
1080 have the whole list in memory at once, but there may
1081 be other circumstances when you might want to keep the whole
1082 list in one place.
1083
1084 It takes all the same arguments and parameters as Query.results
1085
1086 aliased as 'all'
1087
1088 @see: L{intermine.query.results}
1089
1090 """
1091 rows = self.results(*args, **kwargs)
1092 return [r for r in rows]
1093
1095 """
1096 Return the total number of rows this query returns
1097 ==================================================
1098
1099 Obtain the number of rows a particular query will
1100 return, without having to fetch and parse all the
1101 actual data. This method makes a request to the server
1102 to report the count for the query, and is sugar for a
1103 results call.
1104
1105 @rtype: int
1106 @raise WebserviceError: if the request is unsuccessful.
1107 """
1108 count_str = ""
1109 rows = self.results("count")
1110 for row in rows:
1111 count_str += row
1112 try:
1113 return int(count_str)
1114 except ValueError:
1115 raise WebserviceError("Server returned a non-integer count: " + count_str)
1116
1118 """
1119 Returns the uri to use to create a list from this query
1120 =======================================================
1121
1122 Query.get_list_upload_uri() -> str
1123
1124 This method is used internally when performing list operations
1125 on queries.
1126
1127 @rtype: str
1128 """
1129 return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH
1130
1132 """
1133 Returns the uri to use to create a list from this query
1134 =======================================================
1135
1136 Query.get_list_append_uri() -> str
1137
1138 This method is used internally when performing list operations
1139 on queries.
1140
1141 @rtype: str
1142 """
1143 return self.service.root + self.service.QUERY_LIST_APPEND_PATH
1144
1145
1147 """
1148 Returns the path section pointing to the REST resource
1149 ======================================================
1150
1151 Query.get_results_path() -> str
1152
1153 Internally, this just calls a constant property
1154 in intermine.service.Service
1155
1156 @rtype: str
1157 """
1158 return self.service.QUERY_PATH
1159
1160
1162 """
1163 Returns the child objects of the query
1164 ======================================
1165
1166 This method is used during the serialisation of queries
1167 to xml. It is unlikely you will need access to this as a whole.
1168 Consider using "path_descriptions", "joins", "constraints" instead
1169
1170 @see: Query.path_descriptions
1171 @see: Query.joins
1172 @see: Query.constraints
1173
1174 @return: the child element of this query
1175 @rtype: list
1176 """
1177 return sum([self.path_descriptions, self.joins, self.constraints], [])
1178
1180 """
1181 Returns the parameters to be passed to the webservice
1182 =====================================================
1183
1184 The query is responsible for producing its own query
1185 parameters. These consist simply of:
1186 - query: the xml representation of the query
1187
1188 @rtype: dict
1189
1190 """
1191 xml = self.to_xml()
1192 params = {'query' : xml }
1193 return params
1194
1196 """
1197 Returns a DOM node representing the query
1198 =========================================
1199
1200 This is an intermediate step in the creation of the
1201 xml serialised version of the query. You probably
1202 won't need to call this directly.
1203
1204 @rtype: xml.minidom.Node
1205 """
1206 impl = getDOMImplementation()
1207 doc = impl.createDocument(None, "query", None)
1208 query = doc.documentElement
1209
1210 query.setAttribute('name', self.name)
1211 query.setAttribute('model', self.model.name)
1212 query.setAttribute('view', ' '.join(self.views))
1213 query.setAttribute('sortOrder', str(self.get_sort_order()))
1214 query.setAttribute('longDescription', self.description)
1215 if len(self.coded_constraints) > 1:
1216 query.setAttribute('constraintLogic', str(self.get_logic()))
1217
1218 for c in self.children():
1219 element = doc.createElement(c.child_type)
1220 for name, value in c.to_dict().items():
1221 if isinstance(value, (set, list)):
1222 for v in value:
1223 subelement = doc.createElement(name)
1224 text = doc.createTextNode(v)
1225 subelement.appendChild(text)
1226 element.appendChild(subelement)
1227 else:
1228 element.setAttribute(name, value)
1229 query.appendChild(element)
1230 return query
1231
1233 """
1234 Return an XML serialisation of the query
1235 ========================================
1236
1237 This method serialises the current state of the query to an
1238 xml string, suitable for storing, or sending over the
1239 internet to the webservice.
1240
1241 @return: the serialised xml string
1242 @rtype: string
1243 """
1244 n = self.to_Node()
1245 return n.toxml()
1246
1261
1263 """
1264 Performs a deep clone
1265 =====================
1266
1267 This method will produce a clone that is independent,
1268 and can be altered without affecting the original,
1269 but starts off with the exact same state as it.
1270
1271 The only shared elements should be the model
1272 and the service, which are shared by all queries
1273 that refer to the same webservice.
1274
1275 @return: same class as caller
1276 """
1277 newobj = self.__class__(self.model)
1278 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict"]:
1279 setattr(newobj, attr, deepcopy(getattr(self, attr)))
1280
1281 for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]:
1282 setattr(newobj, attr, getattr(self, attr))
1283 return newobj
1284
1286 """
1287 A Class representing a predefined query
1288 =======================================
1289
1290 Templates are ways of saving queries
1291 and allowing others to run them
1292 simply. They are the main interface
1293 to querying in the webapp
1294
1295 SYNOPSIS
1296 --------
1297
1298 example::
1299
1300 service = Service("http://www.flymine.org/query/service")
1301 template = service.get_template("Gene_Pathways")
1302 for row in template.results(A={"value":"eve"}):
1303 process_row(row)
1304 ...
1305
1306 A template is a subclass of query that comes predefined. They
1307 are typically retrieved from the webservice and run by specifying
1308 the values for their existing constraints. They are a concise
1309 and powerful way of running queries in the webapp.
1310
1311 Being subclasses of query, everything is true of them that is true
1312 of a query. They are just less work, as you don't have to design each
1313 one. Also, you can store your own templates in the web-app, and then
1314 access them as a private webservice method, from anywhere, making them
1315 a kind of query in the cloud - for this you will need to authenticate
1316 by providing log in details to the service.
1317
1318 The most significant difference is how constraint values are specified
1319 for each set of results.
1320
1321 @see: L{Template.results}
1322
1323 """
1325 """
1326 Constructor
1327 ===========
1328
1329 Instantiation is identical that of queries. As with queries,
1330 these are best obtained from the intermine.webservice.Service
1331 factory methods.
1332
1333 @see: L{intermine.webservice.Service.get_template}
1334 """
1335 super(Template, self).__init__(*args, **kwargs)
1336 self.constraint_factory = constraints.TemplateConstraintFactory()
1337 @property
1339 """
1340 Return the list of constraints you can edit
1341 ===========================================
1342
1343 Template.editable_constraints -> list(intermine.constraints.Constraint)
1344
1345 Templates have a concept of editable constraints, which
1346 is a way of hiding complexity from users. An underlying query may have
1347 five constraints, but only expose the one that is actually
1348 interesting. This property returns this subset of constraints
1349 that have the editable flag set to true.
1350 """
1351 isEditable = lambda x: x.editable
1352 return filter(isEditable, self.constraints)
1353
1355 """
1356 Returns the query parameters needed for the webservice
1357 ======================================================
1358
1359 Template.to_query_params() -> dict(string, string)
1360
1361 Overrides the method of the same name in query to provide the
1362 parameters needed by the templates results service. These
1363 are slightly more complex:
1364 - name: The template's name
1365 - for each constraint: (where [i] is an integer incremented for each constraint)
1366 - constraint[i]: the path
1367 - op[i]: the operator
1368 - value[i]: the value
1369 - code[i]: the code
1370 - extra[i]: the extra value for ternary constraints (optional)
1371
1372
1373 @rtype: dict
1374 """
1375 p = {'name' : self.name}
1376 i = 1
1377 for c in self.editable_constraints:
1378 if not c.switched_on: next
1379 for k, v in c.to_dict().items():
1380 if k == "extraValue": k = "extra"
1381 if k == "path": k = "constraint"
1382 p[k + str(i)] = v
1383 i += 1
1384 return p
1385
1387 """
1388 Returns the path section pointing to the REST resource
1389 ======================================================
1390
1391 Template.get_results_path() S{->} str
1392
1393 Internally, this just calls a constant property
1394 in intermine.service.Service
1395
1396 This overrides the method of the same name in Query
1397
1398 @return: the path to the REST resource
1399 @rtype: string
1400 """
1401 return self.service.TEMPLATEQUERY_PATH
1402
1404 """
1405 Gets a template to run
1406 ======================
1407
1408 Template.get_adjusted_template(con_values) S{->} Template
1409
1410 When templates are run, they are first cloned, and their
1411 values are changed to those desired. This leaves the original
1412 template unchanged so it can be run again with different
1413 values. This method does the cloning and changing of constraint
1414 values
1415
1416 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1417
1418 @rtype: L{Template}
1419 """
1420 clone = self.clone()
1421 for code, options in con_values.items():
1422 con = clone.get_constraint(code)
1423 if not con.editable:
1424 raise ConstraintError("There is a constraint '" + code
1425 + "' on this query, but it is not editable")
1426 for key, value in options.items():
1427 setattr(con, key, value)
1428 return clone
1429
1430 - def results(self, row="rr", start=0, size=None, **con_values):
1431 """
1432 Get an iterator over result rows
1433 ================================
1434
1435 This method returns the same values with the
1436 same options as the method of the same name in
1437 Query (see intermine.query.Query). The main difference in in the
1438 arguments.
1439
1440 The template result methods also accept a key-word pair
1441 set of arguments that are used to supply values
1442 to the editable constraints. eg::
1443
1444 template.results(
1445 A = {"value": "eve"},
1446 B = {"op": ">", "value": 5000}
1447 )
1448
1449 The keys should be codes for editable constraints (you can inspect these
1450 with Template.editable_constraints) and the values should be a dictionary
1451 of constraint properties to replace. You can replace the values for
1452 "op" (operator), "value", and "extra_value" and "values" in the case of
1453 ternary and multi constraints.
1454
1455 @rtype: L{intermine.webservice.ResultIterator}
1456 """
1457 clone = self.get_adjusted_template(con_values)
1458 return super(Template, clone).results(row, start, size)
1459
1461 """
1462 Get a list of result rows
1463 =========================
1464
1465 This method performs the same as the method of the
1466 same name in Query, and it shares the semantics of
1467 Template.results().
1468
1469 @see: L{intermine.query.Query.get_results_list}
1470 @see: L{intermine.query.Template.results}
1471
1472 @rtype: list
1473
1474 """
1475 clone = self.get_adjusted_template(con_values)
1476 return super(Template, clone).get_results_list(row, start, size)
1477
1478 - def count(self, **con_values):
1479 """
1480 Return the total number of rows this template returns
1481 =====================================================
1482
1483 Obtain the number of rows a particular query will
1484 return, without having to fetch and parse all the
1485 actual data. This method makes a request to the server
1486 to report the count for the query, and is sugar for a
1487 results call.
1488
1489 @rtype: int
1490 @raise WebserviceError: if the request is unsuccessful.
1491 """
1492 clone = self.get_adjusted_template(con_values)
1493 return super(Template, clone).count()
1494
1498
1501
1504