1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from intermine.util import openAnything, ReadableException
6 from intermine.pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 from intermine.model import Column, Class
8 import constraints
9
10 """
11 Classes representing queries against webservices
12 ================================================
13
14 Representations of queries, and templates.
15
16 """
17
18 __author__ = "Alex Kalderimis"
19 __organization__ = "InterMine"
20 __license__ = "LGPL"
21 __contact__ = "dev@intermine.org"
22
23
24 -class Query(object):
25 """
26 A Class representing a structured database query
27 ================================================
28
29 Objects of this class have properties that model the
30 attributes of the query, and methods for performing
31 the request.
32
33 SYNOPSIS
34 --------
35
36 example:
37
38 >>> service = Service("http://www.flymine.org/query/service")
39 >>> query = service.new_query()
40 >>>
41 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
42 >>> query.add_sort_order("Gene.pathways.name")
43 >>>
44 >>> query.add_constraint("Gene", "LOOKUP", "eve")
45 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
46 >>>
47 >>> query.set_logic("A or B")
48 >>>
49 >>> for row in query.rows():
50 ... handle_row(row)
51
52 OR, using an SQL style DSL:
53
54 >>> s = Service("www.flymine.org/query")
55 >>> query = s.query("Gene").\\
56 ... select("*", "pathways.*").\\
57 ... where("symbol", "=", "H").\\
58 ... outerjoin("pathways").\\
59 ... order_by("symbol")
60 >>> for row in query.rows(start=10, size=5):
61 ... handle_row(row)
62
63 OR, for a more SQL-alchemy, ORM style:
64
65 >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles):
66 ... handle(gene)
67
68 Query objects represent structured requests for information over the database
69 housed at the datawarehouse whose webservice you are querying. They utilise
70 some of the concepts of relational databases, within an object-related
71 ORM context. If you don't know what that means, don't worry: you
72 don't need to write SQL, and the queries will be fast.
73
74 To make things slightly more familiar to those with knowledge of SQL, some syntactical
75 sugar is provided to make constructing queries a bit more recognisable.
76
77 PRINCIPLES
78 ----------
79
80 The data model represents tables in the databases as classes, with records
81 within tables as instances of that class. The columns of the database are the
82 fields of that object::
83
84 The Gene table - showing two records/objects
85 +---------------------------------------------------+
86 | id | symbol | length | cyto-location | organism |
87 +----------------------------------------+----------+
88 | 01 | eve | 1539 | 46C10-46C10 | 01 |
89 +----------------------------------------+----------+
90 | 02 | zen | 1331 | 84A5-84A5 | 01 |
91 +----------------------------------------+----------+
92 ...
93
94 The organism table - showing one record/object
95 +----------------------------------+
96 | id | name | taxon id |
97 +----------------------------------+
98 | 01 | D. melanogaster | 7227 |
99 +----------------------------------+
100
101 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
102 everything except the id columns). The other columns (such as "organism" in the gene table)
103 are ones that reference records of other tables (ie. other objects), and are called
104 references. You can refer to any field in any class, that has a connection,
105 however tenuous, with a table, by using dotted path notation::
106
107 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
108
109 These paths, and the connections between records and tables they represent,
110 are the basis for the structure of InterMine queries.
111
112 THE STUCTURE OF A QUERY
113 -----------------------
114
115 A query has two principle sets of properties:
116 - its view: the set of output columns
117 - its constraints: the set of rules for what to include
118
119 A query must have at least one output column in its view, but constraints
120 are optional - if you don't include any, you will get back every record
121 from the table (every object of that type)
122
123 In addition, the query must be coherent: if you have information about
124 an organism, and you want a list of genes, then the "Gene" table
125 should be the basis for your query, and as such the Gene class, which
126 represents this table, should be the root of all the paths that appear in it:
127
128 So, to take a simple example::
129
130 I have an organism name, and I want a list of genes:
131
132 The view is the list of things I want to know about those genes:
133
134 >>> query.add_view("Gene.name")
135 >>> query.add_view("Gene.length")
136 >>> query.add_view("Gene.proteins.sequence.length")
137
138 Note I can freely mix attributes and references, as long as every view ends in
139 an attribute (a meaningful value). As a short-cut I can also write:
140
141 >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
142
143 or:
144
145 >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length")
146
147 They are all equivalent. You can also use common SQL style shortcuts such as "*" for all
148 attribute fields:
149
150 >>> query.add_views("Gene.*")
151
152 You can also use "select" as a synonymn for "add_view"
153
154 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
155
156 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
157
158 (note, here I can use "where" as a synonymn for "add_constraint")
159
160 If I run this query, I will get literally millions of results -
161 it needs to be filtered further:
162
163 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
164
165 If that doesn't restrict things enough I can add more filters:
166
167 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
168
169 Now I am guaranteed to get only information on genes I am interested in.
170
171 Note, though, that because I have included the link (or "join") from Gene -> Protein,
172 this, by default, means that I only want genes that have protein information associated
173 with them. If in fact I want information on all genes, and just want to know the
174 protein information if it is available, then I can specify that with:
175
176 >>> query.add_join("Gene.proteins", "OUTER")
177
178 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
179 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
180 (as unlikely as that sounds), I can specify the logic for that too:
181
182 >>> query.set_logic("A and (B or C)")
183
184 Each letter refers to one of the constraints - the codes are assigned in the order you add
185 the constraints. If you want to be absolutely certain about the constraints you mean, you
186 can use the constraint objects themselves:
187
188 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
189 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
190 >>>
191 >>> query.set_logic(gene_is_eve | gene_is_zen)
192
193 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
194
195 Putting it all together:
196
197 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
198 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
199 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
200 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
201 >>> query.add_join("Gene.proteins", "OUTER")
202 >>> query.set_logic("A and (B or C)")
203
204 This can be made more concise and readable with a little DSL sugar:
205
206 >>> query = service.query("Gene")
207 >>> query.select("name", "length", "proteins.sequence.length").\
208 ... where('organism.name' '=', 'D. melanogaster').\
209 ... where("proteins.sequence.length", "<", 500).\
210 ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\
211 ... outerjoin('proteins').\
212 ... set_logic("A and (B or C)")
213
214 And the query is defined.
215
216 Result Processing: Rows
217 -----------------------
218
219 calling ".rows()" on a query will return an iterator of rows, where each row
220 is a ResultRow object, which can be treated as both a list and a dictionary.
221
222 Which means you can refer to columns by name:
223
224 >>> for row in query.rows():
225 ... print "name is %s" % (row["name"])
226 ... print "length is %d" % (row["length"])
227
228 As well as using list indices:
229
230 >>> for row in query.rows():
231 ... print "The first column is %s" % (row[0])
232
233 Iterating over a row iterates over the cell values as a list:
234
235 >>> for row in query.rows():
236 ... for column in row:
237 ... do_something(column)
238
239 Here each row will have a gene name, a gene length, and a sequence length, eg:
240
241 >>> print row.to_l
242 ["even skipped", "1359", "376"]
243
244 To make that clearer, you can ask for a dictionary instead of a list:
245
246 >>> for row in query.rows()
247 ... print row.to_d
248 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
249
250
251 If you just want the raw results, for printing to a file, or for piping to another program,
252 you can request strings instead:
253
254 >>> for row in query.result("string")
255 ... print(row)
256
257 Result Processing: Results
258 --------------------------
259
260 Results can also be processing on a record by record basis. If you have a query that
261 has output columns of "Gene.symbol", "Gene.pathways.name" and "Gene.proteins.proteinDomains.primaryIdentifier",
262 than processing it by records will return one object per gene, and that gene will have a property
263 named "pathways" which contains objects which have a name property. Likewise there will be a
264 proteins property which holds a list of proteinDomains which all have a primaryIdentifier property, and so on.
265 This allows a more object orientated approach to database records, familiar to users of
266 other ORMs.
267
268 This is the format used when you choose to iterate over a query directly, or can be explicitly
269 chosen by invoking L{intermine.query.Query.results}:
270
271 >>> for gene in query:
272 ... print gene.name, map(lambda x: x.name, gene.pathways)
273
274 The structure of the object and the information it contains depends entirely
275 on the output columns selected. The values may be None, of course, but also any valid values of an object
276 (according to the data model) will also be None if they were not selected for output. Attempts
277 to access invalid properties (such as gene.favourite_colour) will cause exceptions to be thrown.
278
279 Getting us to Generate your Code
280 --------------------------------
281
282 Not that you have to actually write any of this! The webapp will happily
283 generate the code for any query (and template) you can build in it. A good way to get
284 started is to use the webapp to generate your code, and then run it as scripts
285 to speed up your queries. You can always tinker with and edit the scripts you download.
286
287 To get generated queries, look for the "python" link at the bottom of query-builder and
288 template form pages, it looks a bit like this::
289
290 . +=====================================+=============
291 | |
292 | Perl | Python | Java [Help] |
293 | |
294 +==============================================
295
296 """
297
298 SO_SPLIT_PATTERN = re.compile("\s*(asc|desc)\s*", re.I)
299 LOGIC_SPLIT_PATTERN = re.compile("\s*(?:and|or|\(|\))\s*", re.I)
300 TRAILING_OP_PATTERN = re.compile("\s*(and|or)\s*$", re.I)
301 LEADING_OP_PATTERN = re.compile("^\s*(and|or)\s*", re.I)
302 LOGIC_OPS = ["and", "or"]
303 LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS]
304
305 - def __init__(self, model, service=None, validate=True, root=None):
306 """
307 Construct a new Query
308 =====================
309
310 Construct a new query for making database queries
311 against an InterMine data warehouse.
312
313 Normally you would not need to use this constructor
314 directly, but instead use the factory method on
315 intermine.webservice.Service, which will handle construction
316 for you.
317
318 @param model: an instance of L{intermine.model.Model}. Required
319 @param service: an instance of l{intermine.service.Service}. Optional,
320 but you will not be able to make requests without one.
321 @param validate: a boolean - defaults to True. If set to false, the query
322 will not try and validate itself. You should not set this to false.
323
324 """
325 self.model = model
326 if root is None:
327 self.root = root
328 else:
329 self.root = model.make_path(root).root
330
331 self.name = ''
332 self.description = ''
333 self.service = service
334 self.do_verification = validate
335 self.path_descriptions = []
336 self.joins = []
337 self.constraint_dict = {}
338 self.uncoded_constraints = []
339 self.views = []
340 self._sort_order_list = SortOrderList()
341 self._logic_parser = constraints.LogicParser(self)
342 self._logic = None
343 self.constraint_factory = constraints.ConstraintFactory()
344
345
346 self.c = self.column
347 self.filter = self.where
348 self.add_column = self.add_view
349 self.add_columns = self.add_view
350 self.add_views = self.add_view
351 self.add_to_select = self.add_view
352 self.order_by = self.add_sort_order
353 self.all = self.get_results_list
354 self.size = self.count
355
357 """Return an iterator over all the objects returned by this query"""
358 return self.results("jsonobjects")
359
361 """Return the number of rows this query will return."""
362 return self.count()
363
364 @classmethod
365 - def from_xml(cls, xml, *args, **kwargs):
366 """
367 Deserialise a query serialised to XML
368 =====================================
369
370 This method is used to instantiate serialised queries.
371 It is used by intermine.webservice.Service objects
372 to instantiate Template objects and it can be used
373 to read in queries you have saved to a file.
374
375 @param xml: The xml as a file name, url, or string
376
377 @raise QueryParseError: if the query cannot be parsed
378 @raise ModelError: if the query has illegal paths in it
379 @raise ConstraintError: if the constraints don't make sense
380
381 @rtype: L{Query}
382 """
383 obj = cls(*args, **kwargs)
384 obj.do_verification = False
385 f = openAnything(xml)
386 doc = minidom.parse(f)
387 f.close()
388
389 queries = doc.getElementsByTagName('query')
390 assert len(queries) == 1, "wrong number of queries in xml"
391 q = queries[0]
392 obj.name = q.getAttribute('name')
393 obj.description = q.getAttribute('description')
394 obj.add_view(q.getAttribute('view'))
395 for p in q.getElementsByTagName('pathDescription'):
396 path = p.getAttribute('pathString')
397 description = p.getAttribute('description')
398 obj.add_path_description(path, description)
399 for j in q.getElementsByTagName('join'):
400 path = j.getAttribute('path')
401 style = j.getAttribute('style')
402 obj.add_join(path, style)
403 for c in q.getElementsByTagName('constraint'):
404 args = {}
405 args['path'] = c.getAttribute('path')
406 if args['path'] is None:
407 if c.parentNode.tagName != "node":
408 msg = "Constraints must have a path"
409 raise QueryParseError(msg)
410 args['path'] = c.parentNode.getAttribute('path')
411 args['op'] = c.getAttribute('op')
412 args['value'] = c.getAttribute('value')
413 args['code'] = c.getAttribute('code')
414 args['subclass'] = c.getAttribute('type')
415 args['editable'] = c.getAttribute('editable')
416 args['optional'] = c.getAttribute('switchable')
417 args['extra_value'] = c.getAttribute('extraValue')
418 args['loopPath'] = c.getAttribute('loopPath')
419 values = []
420 for val_e in c.getElementsByTagName('value'):
421 texts = []
422 for node in val_e.childNodes:
423 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
424 values.append(' '.join(texts))
425 if len(values) > 0: args["values"] = values
426 for k, v in args.items():
427 if v is None or v == '': del args[k]
428 if "loopPath" in args:
429 args["op"] = {
430 "=" : "IS",
431 "!=": "IS NOT"
432 }.get(args["op"])
433 con = obj.add_constraint(**args)
434 if not con:
435 raise ConstraintError("error adding constraint with args: " + args)
436
437 def group(iterator, count):
438 itr = iter(iterator)
439 while True:
440 yield tuple([itr.next() for i in range(count)])
441
442 if q.getAttribute('sortOrder') is not None:
443 sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder'))
444 if len(sos) == 1:
445 if sos[0] in obj.views:
446 obj.add_sort_order(sos[0])
447 else:
448 sos.pop()
449 for path, direction in group(sos, 2):
450 if path in obj.views:
451 obj.add_sort_order(path, direction)
452
453 if q.getAttribute('constraintLogic') is not None:
454 logic = q.getAttribute('constraintLogic')
455 used_codes = set(obj.constraint_dict.keys())
456 logic_codes = set(Query.LOGIC_SPLIT_PATTERN.split(logic))
457 if "" in logic_codes:
458 logic_codes.remove("")
459 irrelevant_codes = logic_codes - used_codes
460 for c in irrelevant_codes:
461 pattern = re.compile("((and|or)\s+)?\\b" + c + "\\b(\s+(and|or))?", re.I)
462 logic = pattern.sub("", logic)
463
464 logic = re.sub("\(\s*\)", "", logic)
465
466 logic = Query.LEADING_OP_PATTERN.sub("", logic)
467 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
468 for left, right in Query.LOGIC_PRODUCT:
469 if left == right:
470 repl = left
471 else:
472 repl = "and"
473 pattern = re.compile(left + "\s*" + right, re.I)
474 logic = pattern.sub(repl, logic)
475 logic = logic.strip().lstrip()
476 try:
477 if len(logic) > 0:
478 obj.set_logic(logic)
479 except Exception, e:
480 raise Exception("Error parsing " + q.getAttribute('constraintLogic') + " => " + repr(logic) + " with views: " + repr(used_codes) + e.message)
481
482 obj.verify()
483
484 return obj
485
487 """Return the XML serialisation of this query"""
488 return self.to_xml()
489
491 """
492 Validate the query
493 ==================
494
495 Invalid queries will fail to run, and it is not always
496 obvious why. The validation routine checks to see that
497 the query will not cause errors on execution, and tries to
498 provide informative error messages.
499
500 This method is called immediately after a query is fully
501 deserialised.
502
503 @raise ModelError: if the paths are invalid
504 @raise QueryError: if there are errors in query construction
505 @raise ConstraintError: if there are errors in constraint construction
506
507 """
508 self.verify_views()
509 self.verify_constraint_paths()
510 self.verify_join_paths()
511 self.verify_pd_paths()
512 self.validate_sort_order()
513 self.do_verification = True
514
516 """
517 Replace the current selection of output columns with this one
518 =============================================================
519
520 example::
521
522 query.select("*", "proteins.name")
523
524 This method is intended to provide an API familiar to those
525 with experience of SQL or other ORM layers. This method, in
526 contrast to other view manipulation methods, replaces
527 the selection of output columns, rather than appending to it.
528
529 Note that any sort orders that are no longer in the view will
530 be removed.
531
532 @param paths: The output columns to add
533 """
534 self.views = []
535 self.add_view(*paths)
536 so_elems = self._sort_order_list
537 self._sort_order_list = SortOrderList()
538
539 for so in so_elems:
540 if so.path in self.views:
541 self._sort_order_list.append(so)
542 return self
543
545 """
546 Add one or more views to the list of output columns
547 ===================================================
548
549 example::
550
551 query.add_view("Gene.name Gene.organism.name")
552
553 This is the main method for adding views to the list
554 of output columns. As well as appending views, it
555 will also split a single, space or comma delimited
556 string into multiple paths, and flatten out lists, or any
557 combination. It will also immediately try to validate
558 the views.
559
560 Output columns must be valid paths according to the
561 data model, and they must represent attributes of tables
562
563 Also available as:
564 - add_views
565 - add_column
566 - add_columns
567 - add_to_select
568
569 @see: intermine.model.Model
570 @see: intermine.model.Path
571 @see: intermine.model.Attribute
572 """
573 views = []
574 for p in paths:
575 if isinstance(p, (set, list)):
576 views.extend(list(p))
577 elif isinstance(p, Class):
578 views.append(p.name + ".*")
579 elif isinstance(p, Column):
580 if p._path.is_attribute():
581 views.append(str(p))
582 else:
583 views.append(str(p) + ".*")
584 else:
585 views.extend(re.split("(?:,?\s+|,)", p))
586
587 views = map(self.prefix_path, views)
588
589 views_to_add = []
590 for view in views:
591 if view.endswith(".*"):
592 view = re.sub("\.\*$", "", view)
593 path = self.model.make_path(view, self.get_subclass_dict())
594 cd = path.end_class
595 attr_views = map(lambda x: view + "." + x.name, cd.attributes)
596 views_to_add.extend(attr_views)
597 else:
598 views_to_add.append(view)
599
600 if self.do_verification:
601 self.verify_views(views_to_add)
602
603 self.views.extend(views_to_add)
604
605 return self
606
616
618 """
619 Clear the output column list
620 ============================
621
622 Deletes all entries currently in the view list.
623 """
624 self.views = []
625
627 """
628 Check to see if the views given are valid
629 =========================================
630
631 This method checks to see if the views:
632 - are valid according to the model
633 - represent attributes
634
635 @see: L{intermine.model.Attribute}
636
637 @raise intermine.model.ModelError: if the paths are invalid
638 @raise ConstraintError: if the paths are not attributes
639 """
640 if views is None: views = self.views
641 for path in views:
642 path = self.model.make_path(path, self.get_subclass_dict())
643 if not path.is_attribute():
644 raise ConstraintError("'" + str(path)
645 + "' does not represent an attribute")
646
648 """
649 Add a constraint (filter on records)
650 ====================================
651
652 example::
653
654 query.add_constraint("Gene.symbol", "=", "zen")
655
656 This method will try to make a constraint from the arguments
657 given, trying each of the classes it knows of in turn
658 to see if they accept the arguments. This allows you
659 to add constraints of different types without having to know
660 or care what their classes or implementation details are.
661 All constraints derive from intermine.constraints.Constraint,
662 and they all have a path attribute, but are otherwise diverse.
663
664 Before adding the constraint to the query, this method
665 will also try to check that the constraint is valid by
666 calling Query.verify_constraint_paths()
667
668 @see: L{intermine.constraints}
669
670 @rtype: L{intermine.constraints.Constraint}
671 """
672 if len(args) == 1 and len(kwargs) == 0:
673 if isinstance(args[0], tuple):
674 con = self.constraint_factory.make_constraint(*args[0])
675 else:
676 con = args[0]
677 else:
678 con = self.constraint_factory.make_constraint(*args, **kwargs)
679
680 con.path = self.prefix_path(con.path)
681 if self.do_verification: self.verify_constraint_paths([con])
682 if hasattr(con, "code"):
683 self.constraint_dict[con.code] = con
684 else:
685 self.uncoded_constraints.append(con)
686
687 return con
688
689 - def where(self, *args, **kwargs):
690 """
691 Add a constraint to the query
692 =============================
693
694 In contrast to add_constraint, this method also adds all attributes to the query
695 if no view has been set, and returns self to support method chaining.
696
697 Also available as Query.filter
698 """
699 if len(self.views) == 0:
700 self.add_view(self.root)
701
702 self.add_constraint(*args, **kwargs)
703 return self
704
706 """
707 Return a Column object suitable for using to construct constraints with
708 =======================================================================
709
710 This method is part of the SQLAlchemy compatible API.
711
712 Also available as Query.c
713 """
714 return self.model.column(self.prefix_path(string), self.get_subclass_dict(), self)
715
717 """
718 Check that the constraints are valid
719 ====================================
720
721 This method will check the path attribute of each constraint.
722 In addition it will:
723 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
724 - Check that TernaryConstraints have a Reference as theirs
725 - Check that SubClassConstraints have a correct subclass relationship
726 - Check that LoopConstraints have a valid loopPath, of a compatible type
727 - Check that ListConstraints refer to an object
728
729 @param cons: The constraints to check (defaults to all constraints on the query)
730
731 @raise ModelError: if the paths are not valid
732 @raise ConstraintError: if the constraints do not satisfy the above rules
733
734 """
735 if cons is None: cons = self.constraints
736 for con in cons:
737 pathA = self.model.make_path(con.path, self.get_subclass_dict())
738 if isinstance(con, constraints.TernaryConstraint):
739 if pathA.get_class() is None:
740 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
741 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
742 if not pathA.is_attribute():
743 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
744 elif isinstance(con, constraints.SubClassConstraint):
745 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
746 if not pathB.get_class().isa(pathA.get_class()):
747 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
748 elif isinstance(con, constraints.LoopConstraint):
749 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
750 for path in [pathA, pathB]:
751 if not path.get_class():
752 raise ConstraintError("'" + str(path) + "' does not refer to an object")
753 (classA, classB) = (pathA.get_class(), pathB.get_class())
754 if not classA.isa(classB) and not classB.isa(classA):
755 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
756 elif isinstance(con, constraints.ListConstraint):
757 if not pathA.get_class():
758 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
759
760 @property
762 """
763 Returns the constraints of the query
764 ====================================
765
766 Query.constraints S{->} list(intermine.constraints.Constraint)
767
768 Constraints are returned in the order of their code (normally
769 the order they were added to the query) and with any
770 subclass contraints at the end.
771
772 @rtype: list(Constraint)
773 """
774 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
775 ret.extend(self.uncoded_constraints)
776 return ret
777
779 """
780 Returns the constraint with the given code
781 ==========================================
782
783 Returns the constraint with the given code, if if exists.
784 If no such constraint exists, it throws a ConstraintError
785
786 @return: the constraint corresponding to the given code
787 @rtype: L{intermine.constraints.CodedConstraint}
788 """
789 if code in self.constraint_dict:
790 return self.constraint_dict[code]
791 else:
792 raise ConstraintError("There is no constraint with the code '"
793 + code + "' on this query")
794
796 """
797 Add a join statement to the query
798 =================================
799
800 example::
801
802 query.add_join("Gene.proteins", "OUTER")
803
804 A join statement is used to determine if references should
805 restrict the result set by only including those references
806 exist. For example, if one had a query with the view::
807
808 "Gene.name", "Gene.proteins.name"
809
810 Then in the normal case (that of an INNER join), we would only
811 get Genes that also have at least one protein that they reference.
812 Simply by asking for this output column you are placing a
813 restriction on the information you get back.
814
815 If in fact you wanted all genes, regardless of whether they had
816 proteins associated with them or not, but if they did
817 you would rather like to know _what_ proteins, then you need
818 to specify this reference to be an OUTER join::
819
820 query.add_join("Gene.proteins", "OUTER")
821
822 Now you will get many more rows of results, some of which will
823 have "null" values where the protein name would have been,
824
825 This method will also attempt to validate the join by calling
826 Query.verify_join_paths(). Joins must have a valid path, the
827 style can be either INNER or OUTER (defaults to OUTER,
828 as the user does not need to specify inner joins, since all
829 references start out as inner joins), and the path
830 must be a reference.
831
832 @raise ModelError: if the path is invalid
833 @raise TypeError: if the join style is invalid
834
835 @rtype: L{intermine.pathfeatures.Join}
836 """
837 join = Join(*args, **kwargs)
838 join.path = self.prefix_path(join.path)
839 if self.do_verification: self.verify_join_paths([join])
840 self.joins.append(join)
841 return self
842
844 """Alias for add_join(column, "OUTER")"""
845 return self.add_join(str(column), "OUTER")
846
848 """
849 Check that the joins are valid
850 ==============================
851
852 Joins must have valid paths, and they must refer to references.
853
854 @raise ModelError: if the paths are invalid
855 @raise QueryError: if the paths are not references
856 """
857 if joins is None: joins = self.joins
858 for join in joins:
859 path = self.model.make_path(join.path, self.get_subclass_dict())
860 if not path.is_reference():
861 raise QueryError("'" + join.path + "' is not a reference")
862
864 """
865 Add a path description to the query
866 ===================================
867
868 example::
869
870 query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain")
871
872 This allows you to alias the components of long paths to
873 improve the way they display column headers in a variety of circumstances.
874 In the above example, if the view included the unwieldy path
875 "Gene.proteins.proteinDomains.primaryIdentifier", it would (depending on the
876 mine) be displayed as "Protein Domain > DB Identifer". These
877 setting are taken into account by the webservice when generating
878 column headers for flat-file results with the columnheaders parameter given, and
879 always supplied when requesting jsontable results.
880
881 @rtype: L{intermine.pathfeatures.PathDescription}
882
883 """
884 path_description = PathDescription(*args, **kwargs)
885 path_description.path = self.prefix_path(path_description.path)
886 if self.do_verification: self.verify_pd_paths([path_description])
887 self.path_descriptions.append(path_description)
888 return path_description
889
891 """
892 Check that the path of the path description is valid
893 ====================================================
894
895 Checks for consistency with the data model
896
897 @raise ModelError: if the paths are invalid
898 """
899 if pds is None: pds = self.path_descriptions
900 for pd in pds:
901 self.model.validate_path(pd.path, self.get_subclass_dict())
902
903 @property
905 """
906 Returns the list of constraints that have a code
907 ================================================
908
909 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
910
911 This returns an up to date list of the constraints that can
912 be used in a logic expression. The only kind of constraint
913 that this excludes, at present, is SubClassConstraints
914
915 @rtype: list(L{intermine.constraints.CodedConstraint})
916 """
917 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
918
920 """
921 Returns the logic expression for the query
922 ==========================================
923
924 This returns the up to date logic expression. The default
925 value is the representation of all coded constraints and'ed together.
926
927 If the logic is empty and there are no constraints, returns an
928 empty string.
929
930 The LogicGroup object stringifies to a string that can be parsed to
931 obtain itself (eg: "A and (B or C or D)").
932
933 @rtype: L{intermine.constraints.LogicGroup}
934 """
935 if self._logic is None:
936 if len(self.coded_constraints) > 0:
937 return reduce(lambda x, y: x+y, self.coded_constraints)
938 else:
939 return ""
940 else:
941 return self._logic
942
944 """
945 Sets the Logic given the appropriate input
946 ==========================================
947
948 example::
949
950 Query.set_logic("A and (B or C)")
951
952 This sets the logic to the appropriate value. If the value is
953 already a LogicGroup, it is accepted, otherwise
954 the string is tokenised and parsed.
955
956 The logic is then validated with a call to validate_logic()
957
958 raise LogicParseError: if there is a syntax error in the logic
959 """
960 if isinstance(value, constraints.LogicGroup):
961 logic = value
962 else:
963 logic = self._logic_parser.parse(value)
964 if self.do_verification: self.validate_logic(logic)
965 self._logic = logic
966 return self
967
969 """
970 Validates the query logic
971 =========================
972
973 Attempts to validate the logic by checking
974 that every coded_constraint is included
975 at least once
976
977 @raise QueryError: if not every coded constraint is represented
978 """
979 if logic is None: logic = self._logic
980 logic_codes = set(logic.get_codes())
981 for con in self.coded_constraints:
982 if con.code not in logic_codes:
983 raise QueryError("Constraint " + con.code + repr(con)
984 + " is not mentioned in the logic: " + str(logic))
985
987 """
988 Gets the sort order when none has been specified
989 ================================================
990
991 This method is called to determine the sort order if
992 none is specified
993
994 @raise QueryError: if the view is empty
995
996 @rtype: L{intermine.pathfeatures.SortOrderList}
997 """
998 try:
999 return SortOrderList((self.views[0], SortOrder.ASC))
1000 except IndexError:
1001 raise QueryError("Query view is empty")
1002
1004 """
1005 Return a sort order for the query
1006 =================================
1007
1008 This method returns the sort order if set, otherwise
1009 it returns the default sort order
1010
1011 @raise QueryError: if the view is empty
1012
1013 @rtype: L{intermine.pathfeatures.SortOrderList}
1014 """
1015 if self._sort_order_list.is_empty():
1016 return self.get_default_sort_order()
1017 else:
1018 return self._sort_order_list
1019
1021 """
1022 Adds a sort order to the query
1023 ==============================
1024
1025 example::
1026
1027 Query.add_sort_order("Gene.name", "DESC")
1028
1029 This method adds a sort order to the query.
1030 A query can have multiple sort orders, which are
1031 assessed in sequence.
1032
1033 If a query has two sort-orders, for example,
1034 the first being "Gene.organism.name asc",
1035 and the second being "Gene.name desc", you would have
1036 the list of genes grouped by organism, with the
1037 lists within those groupings in reverse alphabetical
1038 order by gene name.
1039
1040 This method will try to validate the sort order
1041 by calling validate_sort_order()
1042
1043 Also available as Query.order_by
1044 """
1045 so = SortOrder(str(path), direction)
1046 so.path = self.prefix_path(so.path)
1047 if self.do_verification: self.validate_sort_order(so)
1048 self._sort_order_list.append(so)
1049 return self
1050
1052 """
1053 Check the validity of the sort order
1054 ====================================
1055
1056 Checks that the sort order paths are:
1057 - valid paths
1058 - in the view
1059
1060 @raise QueryError: if the sort order is not in the view
1061 @raise ModelError: if the path is invalid
1062
1063 """
1064 if not so_elems:
1065 so_elems = self._sort_order_list
1066
1067 for so in so_elems:
1068 self.model.validate_path(so.path, self.get_subclass_dict())
1069 if so.path not in self.views:
1070 raise QueryError("Sort order element is not in the view: " + so.path)
1071
1073 """
1074 Return the current mapping of class to subclass
1075 ===============================================
1076
1077 This method returns a mapping of classes used
1078 by the model for assessing whether certain paths are valid. For
1079 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
1080 you can refer to the .presentCall attributes of fly atlas results.
1081 MicroArrayResults do not have this attribute, and a path such as::
1082
1083 Gene.microArrayResult.presentCall
1084
1085 would be marked as invalid unless the dictionary is provided.
1086
1087 Users most likely will not need to ever call this method.
1088
1089 @rtype: dict(string, string)
1090 """
1091 subclass_dict = {}
1092 for c in self.constraints:
1093 if isinstance(c, constraints.SubClassConstraint):
1094 subclass_dict[c.path] = c.subclass
1095 return subclass_dict
1096
1097 - def results(self, row="object", start=0, size=None):
1098 """
1099 Return an iterator over result rows
1100 ===================================
1101
1102 Usage::
1103
1104 >>> for gene in query.results():
1105 ... print gene.symbol
1106
1107 Note that if your query contains any kind of collection,
1108 it is highly likely that start and size won't do what
1109 you think, as they operate only on the underlying
1110 rows used to build up the returned objects. If you want rows
1111 back, you are recommeded to use the simpler rows method.
1112
1113 @param row: the format for the row. Defaults to "object". Valid options are
1114 "rr", "dict", "list", "jsonrows", "object", jsonobjects", "tsv", "csv".
1115 @type row: string
1116
1117 @rtype: L{intermine.webservice.ResultIterator}
1118
1119 @raise WebserviceError: if the request is unsuccessful
1120 """
1121 path = self.get_results_path()
1122 params = self.to_query_params()
1123 params["start"] = start
1124 if size:
1125 params["size"] = size
1126 view = self.views
1127 cld = self.root
1128 return self.service.get_results(path, params, row, view, cld)
1129
1130 - def rows(self, start=0, size=None):
1131 """
1132 Return the results as rows of data
1133 ==================================
1134
1135 This is a shortcut for results("rr")
1136
1137 Usage::
1138
1139 >>> for row in query.rows(start=10, size=10):
1140 ... print row["proteins.name"]
1141
1142 @rtype: iterable<intermine.webservice.ResultRow>
1143 """
1144 return self.results(row="rr", start=start, size=size)
1145
1146 - def one(self, row="jsonobjects"):
1147 """Return one result, and raise an error if the result size is not 1"""
1148 if row == "jsonobjects":
1149 if self.count() == 1:
1150 return self.first(row)
1151 else:
1152 ret = None
1153 for obj in self.results():
1154 if ret is not None:
1155 raise QueryError("More than one result received")
1156 else:
1157 ret = obj
1158 if ret is None:
1159 raise QueryError("No results received")
1160
1161 return ret
1162 else:
1163 c = self.count()
1164 if (c != 1):
1165 raise QueryError("Result size is not one: got %d results" % (c))
1166 else:
1167 return self.first(row)
1168
1169 - def first(self, row="jsonobjects", start=0):
1170 """Return the first result, or None if the results are empty"""
1171 if row == "jsonobjects":
1172 size = None
1173 else:
1174 size = 1
1175 try:
1176 return self.results(row, start=start, size=size).next()
1177 except StopIteration:
1178 return None
1179
1181 """
1182 Get a list of result rows
1183 =========================
1184
1185 This method is a shortcut so that you do not have to
1186 do a list comprehension yourself on the iterator that
1187 is normally returned. If you have a very large result
1188 set (and these can get up to 100's of thousands or rows
1189 pretty easily) you will not want to
1190 have the whole list in memory at once, but there may
1191 be other circumstances when you might want to keep the whole
1192 list in one place.
1193
1194 It takes all the same arguments and parameters as Query.results
1195
1196 Also available as Query.all
1197
1198 @see: L{intermine.query.Query.results}
1199
1200 """
1201 rows = self.results(*args, **kwargs)
1202 return [r for r in rows]
1203
1206
1208 """
1209 Return the total number of rows this query returns
1210 ==================================================
1211
1212 Obtain the number of rows a particular query will
1213 return, without having to fetch and parse all the
1214 actual data. This method makes a request to the server
1215 to report the count for the query, and is sugar for a
1216 results call.
1217
1218 Also available as Query.size
1219
1220 @rtype: int
1221 @raise WebserviceError: if the request is unsuccessful.
1222 """
1223 count_str = ""
1224 rows = self.results("count")
1225 for row in rows:
1226 count_str += row
1227 try:
1228 return int(count_str)
1229 except ValueError:
1230 raise WebserviceError("Server returned a non-integer count: " + count_str)
1231
1233 """
1234 Returns the uri to use to create a list from this query
1235 =======================================================
1236
1237 Query.get_list_upload_uri() -> str
1238
1239 This method is used internally when performing list operations
1240 on queries.
1241
1242 @rtype: str
1243 """
1244 return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH
1245
1247 """
1248 Returns the uri to use to create a list from this query
1249 =======================================================
1250
1251 Query.get_list_append_uri() -> str
1252
1253 This method is used internally when performing list operations
1254 on queries.
1255
1256 @rtype: str
1257 """
1258 return self.service.root + self.service.QUERY_LIST_APPEND_PATH
1259
1260
1262 """
1263 Returns the path section pointing to the REST resource
1264 ======================================================
1265
1266 Query.get_results_path() -> str
1267
1268 Internally, this just calls a constant property
1269 in intermine.service.Service
1270
1271 @rtype: str
1272 """
1273 return self.service.QUERY_PATH
1274
1275
1277 """
1278 Returns the child objects of the query
1279 ======================================
1280
1281 This method is used during the serialisation of queries
1282 to xml. It is unlikely you will need access to this as a whole.
1283 Consider using "path_descriptions", "joins", "constraints" instead
1284
1285 @see: Query.path_descriptions
1286 @see: Query.joins
1287 @see: Query.constraints
1288
1289 @return: the child element of this query
1290 @rtype: list
1291 """
1292 return sum([self.path_descriptions, self.joins, self.constraints], [])
1293
1295 """
1296 Returns the parameters to be passed to the webservice
1297 =====================================================
1298
1299 The query is responsible for producing its own query
1300 parameters. These consist simply of:
1301 - query: the xml representation of the query
1302
1303 @rtype: dict
1304
1305 """
1306 xml = self.to_xml()
1307 params = {'query' : xml }
1308 return params
1309
1311 """
1312 Returns a DOM node representing the query
1313 =========================================
1314
1315 This is an intermediate step in the creation of the
1316 xml serialised version of the query. You probably
1317 won't need to call this directly.
1318
1319 @rtype: xml.minidom.Node
1320 """
1321 impl = getDOMImplementation()
1322 doc = impl.createDocument(None, "query", None)
1323 query = doc.documentElement
1324
1325 query.setAttribute('name', self.name)
1326 query.setAttribute('model', self.model.name)
1327 query.setAttribute('view', ' '.join(self.views))
1328 query.setAttribute('sortOrder', str(self.get_sort_order()))
1329 query.setAttribute('longDescription', self.description)
1330 if len(self.coded_constraints) > 1:
1331 query.setAttribute('constraintLogic', str(self.get_logic()))
1332
1333 for c in self.children():
1334 element = doc.createElement(c.child_type)
1335 for name, value in c.to_dict().items():
1336 if isinstance(value, (set, list)):
1337 for v in value:
1338 subelement = doc.createElement(name)
1339 text = doc.createTextNode(v)
1340 subelement.appendChild(text)
1341 element.appendChild(subelement)
1342 else:
1343 element.setAttribute(name, value)
1344 query.appendChild(element)
1345 return query
1346
1348 """
1349 Return an XML serialisation of the query
1350 ========================================
1351
1352 This method serialises the current state of the query to an
1353 xml string, suitable for storing, or sending over the
1354 internet to the webservice.
1355
1356 @return: the serialised xml string
1357 @rtype: string
1358 """
1359 n = self.to_Node()
1360 return n.toxml()
1361
1376
1378 """
1379 Performs a deep clone
1380 =====================
1381
1382 This method will produce a clone that is independent,
1383 and can be altered without affecting the original,
1384 but starts off with the exact same state as it.
1385
1386 The only shared elements should be the model
1387 and the service, which are shared by all queries
1388 that refer to the same webservice.
1389
1390 @return: same class as caller
1391 """
1392 newobj = self.__class__(self.model)
1393 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict"]:
1394 setattr(newobj, attr, deepcopy(getattr(self, attr)))
1395
1396 for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]:
1397 setattr(newobj, attr, getattr(self, attr))
1398 return newobj
1399
1401 """
1402 A Class representing a predefined query
1403 =======================================
1404
1405 Templates are ways of saving queries
1406 and allowing others to run them
1407 simply. They are the main interface
1408 to querying in the webapp
1409
1410 SYNOPSIS
1411 --------
1412
1413 example::
1414
1415 service = Service("http://www.flymine.org/query/service")
1416 template = service.get_template("Gene_Pathways")
1417 for row in template.results(A={"value":"eve"}):
1418 process_row(row)
1419 ...
1420
1421 A template is a subclass of query that comes predefined. They
1422 are typically retrieved from the webservice and run by specifying
1423 the values for their existing constraints. They are a concise
1424 and powerful way of running queries in the webapp.
1425
1426 Being subclasses of query, everything is true of them that is true
1427 of a query. They are just less work, as you don't have to design each
1428 one. Also, you can store your own templates in the web-app, and then
1429 access them as a private webservice method, from anywhere, making them
1430 a kind of query in the cloud - for this you will need to authenticate
1431 by providing log in details to the service.
1432
1433 The most significant difference is how constraint values are specified
1434 for each set of results.
1435
1436 @see: L{Template.results}
1437
1438 """
1440 """
1441 Constructor
1442 ===========
1443
1444 Instantiation is identical that of queries. As with queries,
1445 these are best obtained from the intermine.webservice.Service
1446 factory methods.
1447
1448 @see: L{intermine.webservice.Service.get_template}
1449 """
1450 super(Template, self).__init__(*args, **kwargs)
1451 self.constraint_factory = constraints.TemplateConstraintFactory()
1452 @property
1454 """
1455 Return the list of constraints you can edit
1456 ===========================================
1457
1458 Template.editable_constraints -> list(intermine.constraints.Constraint)
1459
1460 Templates have a concept of editable constraints, which
1461 is a way of hiding complexity from users. An underlying query may have
1462 five constraints, but only expose the one that is actually
1463 interesting. This property returns this subset of constraints
1464 that have the editable flag set to true.
1465 """
1466 isEditable = lambda x: x.editable
1467 return filter(isEditable, self.constraints)
1468
1470 """
1471 Returns the query parameters needed for the webservice
1472 ======================================================
1473
1474 Template.to_query_params() -> dict(string, string)
1475
1476 Overrides the method of the same name in query to provide the
1477 parameters needed by the templates results service. These
1478 are slightly more complex:
1479 - name: The template's name
1480 - for each constraint: (where [i] is an integer incremented for each constraint)
1481 - constraint[i]: the path
1482 - op[i]: the operator
1483 - value[i]: the value
1484 - code[i]: the code
1485 - extra[i]: the extra value for ternary constraints (optional)
1486
1487
1488 @rtype: dict
1489 """
1490 p = {'name' : self.name}
1491 i = 1
1492 for c in self.editable_constraints:
1493 if not c.switched_on: next
1494 for k, v in c.to_dict().items():
1495 if k == "extraValue": k = "extra"
1496 if k == "path": k = "constraint"
1497 p[k + str(i)] = v
1498 i += 1
1499 return p
1500
1502 """
1503 Returns the path section pointing to the REST resource
1504 ======================================================
1505
1506 Template.get_results_path() S{->} str
1507
1508 Internally, this just calls a constant property
1509 in intermine.service.Service
1510
1511 This overrides the method of the same name in Query
1512
1513 @return: the path to the REST resource
1514 @rtype: string
1515 """
1516 return self.service.TEMPLATEQUERY_PATH
1517
1519 """
1520 Gets a template to run
1521 ======================
1522
1523 Template.get_adjusted_template(con_values) S{->} Template
1524
1525 When templates are run, they are first cloned, and their
1526 values are changed to those desired. This leaves the original
1527 template unchanged so it can be run again with different
1528 values. This method does the cloning and changing of constraint
1529 values
1530
1531 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1532
1533 @rtype: L{Template}
1534 """
1535 clone = self.clone()
1536 for code, options in con_values.items():
1537 con = clone.get_constraint(code)
1538 if not con.editable:
1539 raise ConstraintError("There is a constraint '" + code
1540 + "' on this query, but it is not editable")
1541 for key, value in options.items():
1542 setattr(con, key, value)
1543 return clone
1544
1545 - def results(self, row="object", start=0, size=None, **con_values):
1546 """
1547 Get an iterator over result rows
1548 ================================
1549
1550 This method returns the same values with the
1551 same options as the method of the same name in
1552 Query (see intermine.query.Query). The main difference in in the
1553 arguments.
1554
1555 The template result methods also accept a key-word pair
1556 set of arguments that are used to supply values
1557 to the editable constraints. eg::
1558
1559 template.results(
1560 A = {"value": "eve"},
1561 B = {"op": ">", "value": 5000}
1562 )
1563
1564 The keys should be codes for editable constraints (you can inspect these
1565 with Template.editable_constraints) and the values should be a dictionary
1566 of constraint properties to replace. You can replace the values for
1567 "op" (operator), "value", and "extra_value" and "values" in the case of
1568 ternary and multi constraints.
1569
1570 @rtype: L{intermine.webservice.ResultIterator}
1571 """
1572 clone = self.get_adjusted_template(con_values)
1573 return super(Template, clone).results(row, start, size)
1574
1576 """
1577 Get a list of result rows
1578 =========================
1579
1580 This method performs the same as the method of the
1581 same name in Query, and it shares the semantics of
1582 Template.results().
1583
1584 @see: L{intermine.query.Query.get_results_list}
1585 @see: L{intermine.query.Template.results}
1586
1587 @rtype: list
1588
1589 """
1590 clone = self.get_adjusted_template(con_values)
1591 return super(Template, clone).get_results_list(row, start, size)
1592
1597
1598 - def rows(self, start=0, size=None, **con_values):
1602
1603 - def count(self, **con_values):
1604 """
1605 Return the total number of rows this template returns
1606 =====================================================
1607
1608 Obtain the number of rows a particular query will
1609 return, without having to fetch and parse all the
1610 actual data. This method makes a request to the server
1611 to report the count for the query, and is sugar for a
1612 results call.
1613
1614 @rtype: int
1615 @raise WebserviceError: if the request is unsuccessful.
1616 """
1617 clone = self.get_adjusted_template(con_values)
1618 return super(Template, clone).count()
1619
1623
1626
1629