1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from intermine.util import openAnything, ReadableException
6 from intermine.pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 from intermine.model import Column, Class
8 import constraints
9
10 """
11 Classes representing queries against webservices
12 ================================================
13
14 Representations of queries, and templates.
15
16 """
17
18 __author__ = "Alex Kalderimis"
19 __organization__ = "InterMine"
20 __license__ = "LGPL"
21 __contact__ = "dev@intermine.org"
22
23
24 -class Query(object):
25 """
26 A Class representing a structured database query
27 ================================================
28
29 Objects of this class have properties that model the
30 attributes of the query, and methods for performing
31 the request.
32
33 SYNOPSIS
34 --------
35
36 example:
37
38 >>> service = Service("http://www.flymine.org/query/service")
39 >>> query = service.new_query()
40 >>>
41 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
42 >>> query.add_sort_order("Gene.pathways.name")
43 >>>
44 >>> query.add_constraint("Gene", "LOOKUP", "eve")
45 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
46 >>>
47 >>> query.set_logic("A or B")
48 >>>
49 >>> for row in query.rows():
50 ... handle_row(row)
51
52 OR, using an SQL style DSL:
53
54 >>> s = Service("www.flymine.org/query")
55 >>> query = s.query("Gene").\\
56 ... select("*", "pathways.*").\\
57 ... where("symbol", "=", "H").\\
58 ... outerjoin("pathways").\\
59 ... order_by("symbol")
60 >>> for row in query.rows(start=10, size=5):
61 ... handle_row(row)
62
63 OR, for a more SQL-alchemy, ORM style:
64
65 >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles):
66 ... handle(gene)
67
68 Query objects represent structured requests for information over the database
69 housed at the datawarehouse whose webservice you are querying. They utilise
70 some of the concepts of relational databases, within an object-related
71 ORM context. If you don't know what that means, don't worry: you
72 don't need to write SQL, and the queries will be fast.
73
74 To make things slightly more familiar to those with knowledge of SQL, some syntactical
75 sugar is provided to make constructing queries a bit more recognisable.
76
77 PRINCIPLES
78 ----------
79
80 The data model represents tables in the databases as classes, with records
81 within tables as instances of that class. The columns of the database are the
82 fields of that object::
83
84 The Gene table - showing two records/objects
85 +---------------------------------------------------+
86 | id | symbol | length | cyto-location | organism |
87 +----------------------------------------+----------+
88 | 01 | eve | 1539 | 46C10-46C10 | 01 |
89 +----------------------------------------+----------+
90 | 02 | zen | 1331 | 84A5-84A5 | 01 |
91 +----------------------------------------+----------+
92 ...
93
94 The organism table - showing one record/object
95 +----------------------------------+
96 | id | name | taxon id |
97 +----------------------------------+
98 | 01 | D. melanogaster | 7227 |
99 +----------------------------------+
100
101 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
102 everything except the id columns). The other columns (such as "organism" in the gene table)
103 are ones that reference records of other tables (ie. other objects), and are called
104 references. You can refer to any field in any class, that has a connection,
105 however tenuous, with a table, by using dotted path notation::
106
107 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
108
109 These paths, and the connections between records and tables they represent,
110 are the basis for the structure of InterMine queries.
111
112 THE STUCTURE OF A QUERY
113 -----------------------
114
115 A query has two principle sets of properties:
116 - its view: the set of output columns
117 - its constraints: the set of rules for what to include
118
119 A query must have at least one output column in its view, but constraints
120 are optional - if you don't include any, you will get back every record
121 from the table (every object of that type)
122
123 In addition, the query must be coherent: if you have information about
124 an organism, and you want a list of genes, then the "Gene" table
125 should be the basis for your query, and as such the Gene class, which
126 represents this table, should be the root of all the paths that appear in it:
127
128 So, to take a simple example::
129
130 I have an organism name, and I want a list of genes:
131
132 The view is the list of things I want to know about those genes:
133
134 >>> query.add_view("Gene.name")
135 >>> query.add_view("Gene.length")
136 >>> query.add_view("Gene.proteins.sequence.length")
137
138 Note I can freely mix attributes and references, as long as every view ends in
139 an attribute (a meaningful value). As a short-cut I can also write:
140
141 >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
142
143 or:
144
145 >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length")
146
147 They are all equivalent. You can also use common SQL style shortcuts such as "*" for all
148 attribute fields:
149
150 >>> query.add_views("Gene.*")
151
152 You can also use "select" as a synonymn for "add_view"
153
154 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
155
156 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
157
158 (note, here I can use "where" as a synonymn for "add_constraint")
159
160 If I run this query, I will get literally millions of results -
161 it needs to be filtered further:
162
163 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
164
165 If that doesn't restrict things enough I can add more filters:
166
167 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
168
169 Now I am guaranteed to get only information on genes I am interested in.
170
171 Note, though, that because I have included the link (or "join") from Gene -> Protein,
172 this, by default, means that I only want genes that have protein information associated
173 with them. If in fact I want information on all genes, and just want to know the
174 protein information if it is available, then I can specify that with:
175
176 >>> query.add_join("Gene.proteins", "OUTER")
177
178 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
179 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
180 (as unlikely as that sounds), I can specify the logic for that too:
181
182 >>> query.set_logic("A and (B or C)")
183
184 Each letter refers to one of the constraints - the codes are assigned in the order you add
185 the constraints. If you want to be absolutely certain about the constraints you mean, you
186 can use the constraint objects themselves:
187
188 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
189 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
190 >>>
191 >>> query.set_logic(gene_is_eve | gene_is_zen)
192
193 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
194
195 Putting it all together:
196
197 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
198 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
199 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
200 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
201 >>> query.add_join("Gene.proteins", "OUTER")
202 >>> query.set_logic("A and (B or C)")
203
204 This can be made more concise and readable with a little DSL sugar:
205
206 >>> query = service.query("Gene")
207 >>> query.select("name", "length", "proteins.sequence.length").\
208 ... where('organism.name' '=', 'D. melanogaster').\
209 ... where("proteins.sequence.length", "<", 500).\
210 ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\
211 ... outerjoin('proteins').\
212 ... set_logic("A and (B or C)")
213
214 And the query is defined.
215
216 Result Processing: Rows
217 -----------------------
218
219 calling ".rows()" on a query will return an iterator of rows, where each row
220 is a ResultRow object, which can be treated as both a list and a dictionary.
221
222 Which means you can refer to columns by name:
223
224 >>> for row in query.rows():
225 ... print "name is %s" % (row["name"])
226 ... print "length is %d" % (row["length"])
227
228 As well as using list indices:
229
230 >>> for row in query.rows():
231 ... print "The first column is %s" % (row[0])
232
233 Iterating over a row iterates over the cell values as a list:
234
235 >>> for row in query.rows():
236 ... for column in row:
237 ... do_something(column)
238
239 Here each row will have a gene name, a gene length, and a sequence length, eg:
240
241 >>> print row.to_l
242 ["even skipped", "1359", "376"]
243
244 To make that clearer, you can ask for a dictionary instead of a list:
245
246 >>> for row in query.rows()
247 ... print row.to_d
248 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
249
250
251 If you just want the raw results, for printing to a file, or for piping to another program,
252 you can request strings instead:
253
254 >>> for row in query.result("string")
255 ... print(row)
256
257 Result Processing: Results
258 --------------------------
259
260 Results can also be processing on a record by record basis. If you have a query that
261 has output columns of "Gene.symbol", "Gene.pathways.name" and "Gene.proteins.proteinDomains.primaryIdentifier",
262 than processing it by records will return one object per gene, and that gene will have a property
263 named "pathways" which contains objects which have a name property. Likewise there will be a
264 proteins property which holds a list of proteinDomains which all have a primaryIdentifier property, and so on.
265 This allows a more object orientated approach to database records, familiar to users of
266 other ORMs.
267
268 This is the format used when you choose to iterate over a query directly, or can be explicitly
269 chosen by invoking L{intermine.query.Query.results}:
270
271 >>> for gene in query:
272 ... print gene.name, map(lambda x: x.name, gene.pathways)
273
274 The structure of the object and the information it contains depends entirely
275 on the output columns selected. The values may be None, of course, but also any valid values of an object
276 (according to the data model) will also be None if they were not selected for output. Attempts
277 to access invalid properties (such as gene.favourite_colour) will cause exceptions to be thrown.
278
279 Getting us to Generate your Code
280 --------------------------------
281
282 Not that you have to actually write any of this! The webapp will happily
283 generate the code for any query (and template) you can build in it. A good way to get
284 started is to use the webapp to generate your code, and then run it as scripts
285 to speed up your queries. You can always tinker with and edit the scripts you download.
286
287 To get generated queries, look for the "python" link at the bottom of query-builder and
288 template form pages, it looks a bit like this::
289
290 . +=====================================+=============
291 | |
292 | Perl | Python | Java [Help] |
293 | |
294 +==============================================
295
296 """
297
298 SO_SPLIT_PATTERN = re.compile("\s*(asc|desc)\s*", re.I)
299 LOGIC_SPLIT_PATTERN = re.compile("\s*(?:and|or|\(|\))\s*", re.I)
300 TRAILING_OP_PATTERN = re.compile("\s*(and|or)\s*$", re.I)
301 LEADING_OP_PATTERN = re.compile("^\s*(and|or)\s*", re.I)
302 LOGIC_OPS = ["and", "or"]
303 LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS]
304
305 - def __init__(self, model, service=None, validate=True, root=None):
306 """
307 Construct a new Query
308 =====================
309
310 Construct a new query for making database queries
311 against an InterMine data warehouse.
312
313 Normally you would not need to use this constructor
314 directly, but instead use the factory method on
315 intermine.webservice.Service, which will handle construction
316 for you.
317
318 @param model: an instance of L{intermine.model.Model}. Required
319 @param service: an instance of l{intermine.service.Service}. Optional,
320 but you will not be able to make requests without one.
321 @param validate: a boolean - defaults to True. If set to false, the query
322 will not try and validate itself. You should not set this to false.
323
324 """
325 self.model = model
326 if root is None:
327 self.root = root
328 else:
329 self.root = model.make_path(root).root
330
331 self.name = ''
332 self.description = ''
333 self.service = service
334 self.do_verification = validate
335 self.path_descriptions = []
336 self.joins = []
337 self.constraint_dict = {}
338 self.uncoded_constraints = []
339 self.views = []
340 self._sort_order_list = SortOrderList()
341 self._logic_parser = constraints.LogicParser(self)
342 self._logic = None
343 self.constraint_factory = constraints.ConstraintFactory()
344
345
346 self.c = self.column
347 self.filter = self.where
348 self.add_column = self.add_view
349 self.add_columns = self.add_view
350 self.add_views = self.add_view
351 self.select = self.add_view
352 self.order_by = self.add_sort_order
353 self.all = self.get_results_list
354 self.size = self.count
355
357 """Return an iterator over all the objects returned by this query"""
358 return self.results("jsonobjects")
359
361 """Return the number of rows this query will return."""
362 return self.count()
363
364 @classmethod
365 - def from_xml(cls, xml, *args, **kwargs):
366 """
367 Deserialise a query serialised to XML
368 =====================================
369
370 This method is used to instantiate serialised queries.
371 It is used by intermine.webservice.Service objects
372 to instantiate Template objects and it can be used
373 to read in queries you have saved to a file.
374
375 @param xml: The xml as a file name, url, or string
376
377 @raise QueryParseError: if the query cannot be parsed
378 @raise ModelError: if the query has illegal paths in it
379 @raise ConstraintError: if the constraints don't make sense
380
381 @rtype: L{Query}
382 """
383 obj = cls(*args, **kwargs)
384 obj.do_verification = False
385 f = openAnything(xml)
386 doc = minidom.parse(f)
387 f.close()
388
389 queries = doc.getElementsByTagName('query')
390 assert len(queries) == 1, "wrong number of queries in xml"
391 q = queries[0]
392 obj.name = q.getAttribute('name')
393 obj.description = q.getAttribute('description')
394 obj.add_view(q.getAttribute('view'))
395 for p in q.getElementsByTagName('pathDescription'):
396 path = p.getAttribute('pathString')
397 description = p.getAttribute('description')
398 obj.add_path_description(path, description)
399 for j in q.getElementsByTagName('join'):
400 path = j.getAttribute('path')
401 style = j.getAttribute('style')
402 obj.add_join(path, style)
403 for c in q.getElementsByTagName('constraint'):
404 args = {}
405 args['path'] = c.getAttribute('path')
406 if args['path'] is None:
407 if c.parentNode.tagName != "node":
408 msg = "Constraints must have a path"
409 raise QueryParseError(msg)
410 args['path'] = c.parentNode.getAttribute('path')
411 args['op'] = c.getAttribute('op')
412 args['value'] = c.getAttribute('value')
413 args['code'] = c.getAttribute('code')
414 args['subclass'] = c.getAttribute('type')
415 args['editable'] = c.getAttribute('editable')
416 args['optional'] = c.getAttribute('switchable')
417 args['extra_value'] = c.getAttribute('extraValue')
418 args['loopPath'] = c.getAttribute('loopPath')
419 values = []
420 for val_e in c.getElementsByTagName('value'):
421 texts = []
422 for node in val_e.childNodes:
423 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
424 values.append(' '.join(texts))
425 if len(values) > 0: args["values"] = values
426 for k, v in args.items():
427 if v is None or v == '': del args[k]
428 if "loopPath" in args:
429 args["op"] = {
430 "=" : "IS",
431 "!=": "IS NOT"
432 }.get(args["op"])
433 con = obj.add_constraint(**args)
434 if not con:
435 raise ConstraintError("error adding constraint with args: " + args)
436
437 def group(iterator, count):
438 itr = iter(iterator)
439 while True:
440 yield tuple([itr.next() for i in range(count)])
441
442 if q.getAttribute('sortOrder') is not None:
443 sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder'))
444 if len(sos) == 1:
445 if sos[0] in obj.views:
446 obj.add_sort_order(sos[0])
447 else:
448 sos.pop()
449 for path, direction in group(sos, 2):
450 if path in obj.views:
451 obj.add_sort_order(path, direction)
452
453 if q.getAttribute('constraintLogic') is not None:
454 logic = q.getAttribute('constraintLogic')
455 used_codes = set(obj.constraint_dict.keys())
456 logic_codes = set(Query.LOGIC_SPLIT_PATTERN.split(logic))
457 if "" in logic_codes:
458 logic_codes.remove("")
459 irrelevant_codes = logic_codes - used_codes
460 for c in irrelevant_codes:
461 pattern = re.compile("((and|or)\s+)?\\b" + c + "\\b(\s+(and|or))?", re.I)
462 logic = pattern.sub("", logic)
463
464 logic = re.sub("\(\s*\)", "", logic)
465
466 logic = Query.LEADING_OP_PATTERN.sub("", logic)
467 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
468 for left, right in Query.LOGIC_PRODUCT:
469 if left == right:
470 repl = left
471 else:
472 repl = "and"
473 pattern = re.compile(left + "\s*" + right, re.I)
474 logic = pattern.sub(repl, logic)
475 logic = logic.strip().lstrip()
476 try:
477 if len(logic) > 0:
478 obj.set_logic(logic)
479 except Exception, e:
480 raise Exception("Error parsing " + q.getAttribute('constraintLogic') + " => " + repr(logic) + " with views: " + repr(used_codes) + e.message)
481
482 obj.verify()
483
484 return obj
485
487 """Return the XML serialisation of this query"""
488 return self.to_xml()
489
491 """
492 Validate the query
493 ==================
494
495 Invalid queries will fail to run, and it is not always
496 obvious why. The validation routine checks to see that
497 the query will not cause errors on execution, and tries to
498 provide informative error messages.
499
500 This method is called immediately after a query is fully
501 deserialised.
502
503 @raise ModelError: if the paths are invalid
504 @raise QueryError: if there are errors in query construction
505 @raise ConstraintError: if there are errors in constraint construction
506
507 """
508 self.verify_views()
509 self.verify_constraint_paths()
510 self.verify_join_paths()
511 self.verify_pd_paths()
512 self.validate_sort_order()
513 self.do_verification = True
514
516 """
517 Add one or more views to the list of output columns
518 ===================================================
519
520 example::
521
522 query.add_view("Gene.name Gene.organism.name")
523
524 This is the main method for adding views to the list
525 of output columns. As well as appending views, it
526 will also split a single, space or comma delimited
527 string into multiple paths, and flatten out lists, or any
528 combination. It will also immediately try to validate
529 the views.
530
531 Output columns must be valid paths according to the
532 data model, and they must represent attributes of tables
533
534 @see: intermine.model.Model
535 @see: intermine.model.Path
536 @see: intermine.model.Attribute
537 """
538 views = []
539 for p in paths:
540 if isinstance(p, (set, list)):
541 views.extend(list(p))
542 elif isinstance(p, Class):
543 views.append(p.name + ".*")
544 elif isinstance(p, Column):
545 if p._path.is_attribute():
546 views.append(str(p))
547 else:
548 views.append(str(p) + ".*")
549 else:
550 views.extend(re.split("(?:,?\s+|,)", p))
551
552 views = map(self.prefix_path, views)
553
554 views_to_add = []
555 for view in views:
556 if view.endswith(".*"):
557 view = re.sub("\.\*$", "", view)
558 path = self.model.make_path(view, self.get_subclass_dict())
559 cd = path.end_class
560 attr_views = map(lambda x: view + "." + x.name, cd.attributes)
561 views_to_add.extend(attr_views)
562 else:
563 views_to_add.append(view)
564
565 if self.do_verification:
566 self.verify_views(views_to_add)
567
568 self.views.extend(views_to_add)
569
570 return self
571
581
583 """
584 Clear the output column list
585 ============================
586
587 Deletes all entries currently in the view list.
588 """
589 self.views = []
590
592 """
593 Check to see if the views given are valid
594 =========================================
595
596 This method checks to see if the views:
597 - are valid according to the model
598 - represent attributes
599
600 @see: L{intermine.model.Attribute}
601
602 @raise intermine.model.ModelError: if the paths are invalid
603 @raise ConstraintError: if the paths are not attributes
604 """
605 if views is None: views = self.views
606 for path in views:
607 path = self.model.make_path(path, self.get_subclass_dict())
608 if not path.is_attribute():
609 raise ConstraintError("'" + str(path)
610 + "' does not represent an attribute")
611
613 """
614 Add a constraint (filter on records)
615 ====================================
616
617 example::
618
619 query.add_constraint("Gene.symbol", "=", "zen")
620
621 This method will try to make a constraint from the arguments
622 given, trying each of the classes it knows of in turn
623 to see if they accept the arguments. This allows you
624 to add constraints of different types without having to know
625 or care what their classes or implementation details are.
626 All constraints derive from intermine.constraints.Constraint,
627 and they all have a path attribute, but are otherwise diverse.
628
629 Before adding the constraint to the query, this method
630 will also try to check that the constraint is valid by
631 calling Query.verify_constraint_paths()
632
633 @see: L{intermine.constraints}
634
635 @rtype: L{intermine.constraints.Constraint}
636 """
637 if len(args) == 1 and len(kwargs) == 0:
638 if isinstance(args[0], tuple):
639 con = self.constraint_factory.make_constraint(*args[0])
640 else:
641 con = args[0]
642 else:
643 con = self.constraint_factory.make_constraint(*args, **kwargs)
644
645 con.path = self.prefix_path(con.path)
646 if self.do_verification: self.verify_constraint_paths([con])
647 if hasattr(con, "code"):
648 self.constraint_dict[con.code] = con
649 else:
650 self.uncoded_constraints.append(con)
651
652 return con
653
654 - def where(self, *args, **kwargs):
655 """
656 Add a constraint to the query
657 =============================
658
659 In contrast to add_constraint, this method also adds all attributes to the query
660 if no view has been set, and returns self to support method chaining.
661 """
662 if len(self.views) == 0:
663 self.add_view(self.root)
664
665 self.add_constraint(*args, **kwargs)
666 return self
667
670
672 """
673 Check that the constraints are valid
674 ====================================
675
676 This method will check the path attribute of each constraint.
677 In addition it will:
678 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
679 - Check that TernaryConstraints have a Reference as theirs
680 - Check that SubClassConstraints have a correct subclass relationship
681 - Check that LoopConstraints have a valid loopPath, of a compatible type
682 - Check that ListConstraints refer to an object
683
684 @param cons: The constraints to check (defaults to all constraints on the query)
685
686 @raise ModelError: if the paths are not valid
687 @raise ConstraintError: if the constraints do not satisfy the above rules
688
689 """
690 if cons is None: cons = self.constraints
691 for con in cons:
692 pathA = self.model.make_path(con.path, self.get_subclass_dict())
693 if isinstance(con, constraints.TernaryConstraint):
694 if pathA.get_class() is None:
695 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
696 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
697 if not pathA.is_attribute():
698 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
699 elif isinstance(con, constraints.SubClassConstraint):
700 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
701 if not pathB.get_class().isa(pathA.get_class()):
702 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
703 elif isinstance(con, constraints.LoopConstraint):
704 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
705 for path in [pathA, pathB]:
706 if not path.get_class():
707 raise ConstraintError("'" + str(path) + "' does not refer to an object")
708 (classA, classB) = (pathA.get_class(), pathB.get_class())
709 if not classA.isa(classB) and not classB.isa(classA):
710 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
711 elif isinstance(con, constraints.ListConstraint):
712 if not pathA.get_class():
713 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
714
715 @property
717 """
718 Returns the constraints of the query
719 ====================================
720
721 Query.constraints S{->} list(intermine.constraints.Constraint)
722
723 Constraints are returned in the order of their code (normally
724 the order they were added to the query) and with any
725 subclass contraints at the end.
726
727 @rtype: list(Constraint)
728 """
729 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
730 ret.extend(self.uncoded_constraints)
731 return ret
732
734 """
735 Returns the constraint with the given code
736 ==========================================
737
738 Returns the constraint with the given code, if if exists.
739 If no such constraint exists, it throws a ConstraintError
740
741 @return: the constraint corresponding to the given code
742 @rtype: L{intermine.constraints.CodedConstraint}
743 """
744 if code in self.constraint_dict:
745 return self.constraint_dict[code]
746 else:
747 raise ConstraintError("There is no constraint with the code '"
748 + code + "' on this query")
749
751 """
752 Add a join statement to the query
753 =================================
754
755 example::
756
757 query.add_join("Gene.proteins", "OUTER")
758
759 A join statement is used to determine if references should
760 restrict the result set by only including those references
761 exist. For example, if one had a query with the view::
762
763 "Gene.name", "Gene.proteins.name"
764
765 Then in the normal case (that of an INNER join), we would only
766 get Genes that also have at least one protein that they reference.
767 Simply by asking for this output column you are placing a
768 restriction on the information you get back.
769
770 If in fact you wanted all genes, regardless of whether they had
771 proteins associated with them or not, but if they did
772 you would rather like to know _what_ proteins, then you need
773 to specify this reference to be an OUTER join::
774
775 query.add_join("Gene.proteins", "OUTER")
776
777 Now you will get many more rows of results, some of which will
778 have "null" values where the protein name would have been,
779
780 This method will also attempt to validate the join by calling
781 Query.verify_join_paths(). Joins must have a valid path, the
782 style can be either INNER or OUTER (defaults to OUTER,
783 as the user does not need to specify inner joins, since all
784 references start out as inner joins), and the path
785 must be a reference.
786
787 @raise ModelError: if the path is invalid
788 @raise TypeError: if the join style is invalid
789
790 @rtype: L{intermine.pathfeatures.Join}
791 """
792 join = Join(*args, **kwargs)
793 join.path = self.prefix_path(join.path)
794 if self.do_verification: self.verify_join_paths([join])
795 self.joins.append(join)
796 return self
797
799 """Alias for add_join(column, "OUTER")"""
800 return self.add_join(str(column), "OUTER")
801
803 """
804 Check that the joins are valid
805 ==============================
806
807 Joins must have valid paths, and they must refer to references.
808
809 @raise ModelError: if the paths are invalid
810 @raise QueryError: if the paths are not references
811 """
812 if joins is None: joins = self.joins
813 for join in joins:
814 path = self.model.make_path(join.path, self.get_subclass_dict())
815 if not path.is_reference():
816 raise QueryError("'" + join.path + "' is not a reference")
817
819 """
820 Add a path description to the query
821 ===================================
822
823 example::
824
825 query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain")
826
827 This allows you to alias the components of long paths to
828 improve the way they display column headers in a variety of circumstances.
829 In the above example, if the view included the unwieldy path
830 "Gene.proteins.proteinDomains.primaryIdentifier", it would (depending on the
831 mine) be displayed as "Protein Domain > DB Identifer". These
832 setting are taken into account by the webservice when generating
833 column headers for flat-file results with the columnheaders parameter given, and
834 always supplied when requesting jsontable results.
835
836 @rtype: L{intermine.pathfeatures.PathDescription}
837
838 """
839 path_description = PathDescription(*args, **kwargs)
840 path_description.path = self.prefix_path(path_description.path)
841 if self.do_verification: self.verify_pd_paths([path_description])
842 self.path_descriptions.append(path_description)
843 return path_description
844
846 """
847 Check that the path of the path description is valid
848 ====================================================
849
850 Checks for consistency with the data model
851
852 @raise ModelError: if the paths are invalid
853 """
854 if pds is None: pds = self.path_descriptions
855 for pd in pds:
856 self.model.validate_path(pd.path, self.get_subclass_dict())
857
858 @property
860 """
861 Returns the list of constraints that have a code
862 ================================================
863
864 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
865
866 This returns an up to date list of the constraints that can
867 be used in a logic expression. The only kind of constraint
868 that this excludes, at present, is SubClassConstraints
869
870 @rtype: list(L{intermine.constraints.CodedConstraint})
871 """
872 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
873
875 """
876 Returns the logic expression for the query
877 ==========================================
878
879 This returns the up to date logic expression. The default
880 value is the representation of all coded constraints and'ed together.
881
882 If the logic is empty and there are no constraints, returns an
883 empty string.
884
885 The LogicGroup object stringifies to a string that can be parsed to
886 obtain itself (eg: "A and (B or C or D)").
887
888 @rtype: L{intermine.constraints.LogicGroup}
889 """
890 if self._logic is None:
891 if len(self.coded_constraints) > 0:
892 return reduce(lambda x, y: x+y, self.coded_constraints)
893 else:
894 return ""
895 else:
896 return self._logic
897
899 """
900 Sets the Logic given the appropriate input
901 ==========================================
902
903 example::
904
905 Query.set_logic("A and (B or C)")
906
907 This sets the logic to the appropriate value. If the value is
908 already a LogicGroup, it is accepted, otherwise
909 the string is tokenised and parsed.
910
911 The logic is then validated with a call to validate_logic()
912
913 raise LogicParseError: if there is a syntax error in the logic
914 """
915 if isinstance(value, constraints.LogicGroup):
916 logic = value
917 else:
918 logic = self._logic_parser.parse(value)
919 if self.do_verification: self.validate_logic(logic)
920 self._logic = logic
921 return self
922
924 """
925 Validates the query logic
926 =========================
927
928 Attempts to validate the logic by checking
929 that every coded_constraint is included
930 at least once
931
932 @raise QueryError: if not every coded constraint is represented
933 """
934 if logic is None: logic = self._logic
935 logic_codes = set(logic.get_codes())
936 for con in self.coded_constraints:
937 if con.code not in logic_codes:
938 raise QueryError("Constraint " + con.code + repr(con)
939 + " is not mentioned in the logic: " + str(logic))
940
942 """
943 Gets the sort order when none has been specified
944 ================================================
945
946 This method is called to determine the sort order if
947 none is specified
948
949 @raise QueryError: if the view is empty
950
951 @rtype: L{intermine.pathfeatures.SortOrderList}
952 """
953 try:
954 return SortOrderList((self.views[0], SortOrder.ASC))
955 except IndexError:
956 raise QueryError("Query view is empty")
957
959 """
960 Return a sort order for the query
961 =================================
962
963 This method returns the sort order if set, otherwise
964 it returns the default sort order
965
966 @raise QueryError: if the view is empty
967
968 @rtype: L{intermine.pathfeatures.SortOrderList}
969 """
970 if self._sort_order_list.is_empty():
971 return self.get_default_sort_order()
972 else:
973 return self._sort_order_list
974
976 """
977 Adds a sort order to the query
978 ==============================
979
980 example::
981
982 Query.add_sort_order("Gene.name", "DESC")
983
984 This method adds a sort order to the query.
985 A query can have multiple sort orders, which are
986 assessed in sequence.
987
988 If a query has two sort-orders, for example,
989 the first being "Gene.organism.name asc",
990 and the second being "Gene.name desc", you would have
991 the list of genes grouped by organism, with the
992 lists within those groupings in reverse alphabetical
993 order by gene name.
994
995 This method will try to validate the sort order
996 by calling validate_sort_order()
997 """
998 so = SortOrder(str(path), direction)
999 so.path = self.prefix_path(so.path)
1000 if self.do_verification: self.validate_sort_order(so)
1001 self._sort_order_list.append(so)
1002 return self
1003
1005 """
1006 Check the validity of the sort order
1007 ====================================
1008
1009 Checks that the sort order paths are:
1010 - valid paths
1011 - in the view
1012
1013 @raise QueryError: if the sort order is not in the view
1014 @raise ModelError: if the path is invalid
1015
1016 """
1017 if not so_elems:
1018 so_elems = self._sort_order_list
1019
1020 for so in so_elems:
1021 self.model.validate_path(so.path, self.get_subclass_dict())
1022 if so.path not in self.views:
1023 raise QueryError("Sort order element is not in the view: " + so.path)
1024
1026 """
1027 Return the current mapping of class to subclass
1028 ===============================================
1029
1030 This method returns a mapping of classes used
1031 by the model for assessing whether certain paths are valid. For
1032 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
1033 you can refer to the .presentCall attributes of fly atlas results.
1034 MicroArrayResults do not have this attribute, and a path such as::
1035
1036 Gene.microArrayResult.presentCall
1037
1038 would be marked as invalid unless the dictionary is provided.
1039
1040 Users most likely will not need to ever call this method.
1041
1042 @rtype: dict(string, string)
1043 """
1044 subclass_dict = {}
1045 for c in self.constraints:
1046 if isinstance(c, constraints.SubClassConstraint):
1047 subclass_dict[c.path] = c.subclass
1048 return subclass_dict
1049
1050 - def results(self, row="object", start=0, size=None):
1051 """
1052 Return an iterator over result rows
1053 ===================================
1054
1055 Usage::
1056
1057 >>> for gene in query.results():
1058 ... print gene.symbol
1059
1060 Note that if your query contains any kind of collection,
1061 it is highly likely that start and size won't do what
1062 you think, as they operate only on the underlying
1063 rows used to build up the returned objects. If you want rows
1064 back, you are recommeded to use the simpler rows method.
1065
1066 @param row: the format for the row. Defaults to "object". Valid options are
1067 "rr", "dict", "list", "jsonrows", "object", jsonobjects", "tsv", "csv".
1068 @type row: string
1069
1070 @rtype: L{intermine.webservice.ResultIterator}
1071
1072 @raise WebserviceError: if the request is unsuccessful
1073 """
1074 path = self.get_results_path()
1075 params = self.to_query_params()
1076 params["start"] = start
1077 if size:
1078 params["size"] = size
1079 view = self.views
1080 cld = self.root
1081 return self.service.get_results(path, params, row, view, cld)
1082
1083 - def rows(self, start=0, size=None):
1084 """
1085 Return the results as rows of data
1086 ==================================
1087
1088 This is a shortcut for results("rr")
1089
1090 Usage::
1091
1092 >>> for row in query.rows(start=10, size=10):
1093 ... print row["proteins.name"]
1094
1095 @rtype: iterable<intermine.webservice.ResultRow>
1096 """
1097 return self.results(row="rr", start=start, size=size)
1098
1099 - def one(self, row="jsonobjects"):
1100 """Return one result, and raise an error if the result size is not 1"""
1101 if row == "jsonobjects":
1102 if self.count() == 1:
1103 return self.first(row)
1104 else:
1105 ret = None
1106 for obj in self.results():
1107 if ret is not None:
1108 raise QueryError("More than one result received")
1109 else:
1110 ret = obj
1111 if ret is None:
1112 raise QueryError("No results received")
1113
1114 return ret
1115 else:
1116 c = self.count()
1117 if (c != 1):
1118 raise QueryError("Result size is not one: got %d results" % (c))
1119 else:
1120 return self.first(row)
1121
1122 - def first(self, row="jsonobjects", start=0):
1123 """Return the first result, or None if the results are empty"""
1124 if row == "jsonobjects":
1125 size = None
1126 else:
1127 size = 1
1128 try:
1129 return self.results(row, start=start, size=size).next()
1130 except StopIteration:
1131 return None
1132
1134 """
1135 Get a list of result rows
1136 =========================
1137
1138 This method is a shortcut so that you do not have to
1139 do a list comprehension yourself on the iterator that
1140 is normally returned. If you have a very large result
1141 set (and these can get up to 100's of thousands or rows
1142 pretty easily) you will not want to
1143 have the whole list in memory at once, but there may
1144 be other circumstances when you might want to keep the whole
1145 list in one place.
1146
1147 It takes all the same arguments and parameters as Query.results
1148
1149 aliased as 'all'
1150
1151 @see: L{intermine.query.Query.results}
1152
1153 """
1154 rows = self.results(*args, **kwargs)
1155 return [r for r in rows]
1156
1159
1161 """
1162 Return the total number of rows this query returns
1163 ==================================================
1164
1165 Obtain the number of rows a particular query will
1166 return, without having to fetch and parse all the
1167 actual data. This method makes a request to the server
1168 to report the count for the query, and is sugar for a
1169 results call.
1170
1171 @rtype: int
1172 @raise WebserviceError: if the request is unsuccessful.
1173 """
1174 count_str = ""
1175 rows = self.results("count")
1176 for row in rows:
1177 count_str += row
1178 try:
1179 return int(count_str)
1180 except ValueError:
1181 raise WebserviceError("Server returned a non-integer count: " + count_str)
1182
1184 """
1185 Returns the uri to use to create a list from this query
1186 =======================================================
1187
1188 Query.get_list_upload_uri() -> str
1189
1190 This method is used internally when performing list operations
1191 on queries.
1192
1193 @rtype: str
1194 """
1195 return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH
1196
1198 """
1199 Returns the uri to use to create a list from this query
1200 =======================================================
1201
1202 Query.get_list_append_uri() -> str
1203
1204 This method is used internally when performing list operations
1205 on queries.
1206
1207 @rtype: str
1208 """
1209 return self.service.root + self.service.QUERY_LIST_APPEND_PATH
1210
1211
1213 """
1214 Returns the path section pointing to the REST resource
1215 ======================================================
1216
1217 Query.get_results_path() -> str
1218
1219 Internally, this just calls a constant property
1220 in intermine.service.Service
1221
1222 @rtype: str
1223 """
1224 return self.service.QUERY_PATH
1225
1226
1228 """
1229 Returns the child objects of the query
1230 ======================================
1231
1232 This method is used during the serialisation of queries
1233 to xml. It is unlikely you will need access to this as a whole.
1234 Consider using "path_descriptions", "joins", "constraints" instead
1235
1236 @see: Query.path_descriptions
1237 @see: Query.joins
1238 @see: Query.constraints
1239
1240 @return: the child element of this query
1241 @rtype: list
1242 """
1243 return sum([self.path_descriptions, self.joins, self.constraints], [])
1244
1246 """
1247 Returns the parameters to be passed to the webservice
1248 =====================================================
1249
1250 The query is responsible for producing its own query
1251 parameters. These consist simply of:
1252 - query: the xml representation of the query
1253
1254 @rtype: dict
1255
1256 """
1257 xml = self.to_xml()
1258 params = {'query' : xml }
1259 return params
1260
1262 """
1263 Returns a DOM node representing the query
1264 =========================================
1265
1266 This is an intermediate step in the creation of the
1267 xml serialised version of the query. You probably
1268 won't need to call this directly.
1269
1270 @rtype: xml.minidom.Node
1271 """
1272 impl = getDOMImplementation()
1273 doc = impl.createDocument(None, "query", None)
1274 query = doc.documentElement
1275
1276 query.setAttribute('name', self.name)
1277 query.setAttribute('model', self.model.name)
1278 query.setAttribute('view', ' '.join(self.views))
1279 query.setAttribute('sortOrder', str(self.get_sort_order()))
1280 query.setAttribute('longDescription', self.description)
1281 if len(self.coded_constraints) > 1:
1282 query.setAttribute('constraintLogic', str(self.get_logic()))
1283
1284 for c in self.children():
1285 element = doc.createElement(c.child_type)
1286 for name, value in c.to_dict().items():
1287 if isinstance(value, (set, list)):
1288 for v in value:
1289 subelement = doc.createElement(name)
1290 text = doc.createTextNode(v)
1291 subelement.appendChild(text)
1292 element.appendChild(subelement)
1293 else:
1294 element.setAttribute(name, value)
1295 query.appendChild(element)
1296 return query
1297
1299 """
1300 Return an XML serialisation of the query
1301 ========================================
1302
1303 This method serialises the current state of the query to an
1304 xml string, suitable for storing, or sending over the
1305 internet to the webservice.
1306
1307 @return: the serialised xml string
1308 @rtype: string
1309 """
1310 n = self.to_Node()
1311 return n.toxml()
1312
1327
1329 """
1330 Performs a deep clone
1331 =====================
1332
1333 This method will produce a clone that is independent,
1334 and can be altered without affecting the original,
1335 but starts off with the exact same state as it.
1336
1337 The only shared elements should be the model
1338 and the service, which are shared by all queries
1339 that refer to the same webservice.
1340
1341 @return: same class as caller
1342 """
1343 newobj = self.__class__(self.model)
1344 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict"]:
1345 setattr(newobj, attr, deepcopy(getattr(self, attr)))
1346
1347 for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]:
1348 setattr(newobj, attr, getattr(self, attr))
1349 return newobj
1350
1352 """
1353 A Class representing a predefined query
1354 =======================================
1355
1356 Templates are ways of saving queries
1357 and allowing others to run them
1358 simply. They are the main interface
1359 to querying in the webapp
1360
1361 SYNOPSIS
1362 --------
1363
1364 example::
1365
1366 service = Service("http://www.flymine.org/query/service")
1367 template = service.get_template("Gene_Pathways")
1368 for row in template.results(A={"value":"eve"}):
1369 process_row(row)
1370 ...
1371
1372 A template is a subclass of query that comes predefined. They
1373 are typically retrieved from the webservice and run by specifying
1374 the values for their existing constraints. They are a concise
1375 and powerful way of running queries in the webapp.
1376
1377 Being subclasses of query, everything is true of them that is true
1378 of a query. They are just less work, as you don't have to design each
1379 one. Also, you can store your own templates in the web-app, and then
1380 access them as a private webservice method, from anywhere, making them
1381 a kind of query in the cloud - for this you will need to authenticate
1382 by providing log in details to the service.
1383
1384 The most significant difference is how constraint values are specified
1385 for each set of results.
1386
1387 @see: L{Template.results}
1388
1389 """
1391 """
1392 Constructor
1393 ===========
1394
1395 Instantiation is identical that of queries. As with queries,
1396 these are best obtained from the intermine.webservice.Service
1397 factory methods.
1398
1399 @see: L{intermine.webservice.Service.get_template}
1400 """
1401 super(Template, self).__init__(*args, **kwargs)
1402 self.constraint_factory = constraints.TemplateConstraintFactory()
1403 @property
1405 """
1406 Return the list of constraints you can edit
1407 ===========================================
1408
1409 Template.editable_constraints -> list(intermine.constraints.Constraint)
1410
1411 Templates have a concept of editable constraints, which
1412 is a way of hiding complexity from users. An underlying query may have
1413 five constraints, but only expose the one that is actually
1414 interesting. This property returns this subset of constraints
1415 that have the editable flag set to true.
1416 """
1417 isEditable = lambda x: x.editable
1418 return filter(isEditable, self.constraints)
1419
1421 """
1422 Returns the query parameters needed for the webservice
1423 ======================================================
1424
1425 Template.to_query_params() -> dict(string, string)
1426
1427 Overrides the method of the same name in query to provide the
1428 parameters needed by the templates results service. These
1429 are slightly more complex:
1430 - name: The template's name
1431 - for each constraint: (where [i] is an integer incremented for each constraint)
1432 - constraint[i]: the path
1433 - op[i]: the operator
1434 - value[i]: the value
1435 - code[i]: the code
1436 - extra[i]: the extra value for ternary constraints (optional)
1437
1438
1439 @rtype: dict
1440 """
1441 p = {'name' : self.name}
1442 i = 1
1443 for c in self.editable_constraints:
1444 if not c.switched_on: next
1445 for k, v in c.to_dict().items():
1446 if k == "extraValue": k = "extra"
1447 if k == "path": k = "constraint"
1448 p[k + str(i)] = v
1449 i += 1
1450 return p
1451
1453 """
1454 Returns the path section pointing to the REST resource
1455 ======================================================
1456
1457 Template.get_results_path() S{->} str
1458
1459 Internally, this just calls a constant property
1460 in intermine.service.Service
1461
1462 This overrides the method of the same name in Query
1463
1464 @return: the path to the REST resource
1465 @rtype: string
1466 """
1467 return self.service.TEMPLATEQUERY_PATH
1468
1470 """
1471 Gets a template to run
1472 ======================
1473
1474 Template.get_adjusted_template(con_values) S{->} Template
1475
1476 When templates are run, they are first cloned, and their
1477 values are changed to those desired. This leaves the original
1478 template unchanged so it can be run again with different
1479 values. This method does the cloning and changing of constraint
1480 values
1481
1482 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1483
1484 @rtype: L{Template}
1485 """
1486 clone = self.clone()
1487 for code, options in con_values.items():
1488 con = clone.get_constraint(code)
1489 if not con.editable:
1490 raise ConstraintError("There is a constraint '" + code
1491 + "' on this query, but it is not editable")
1492 for key, value in options.items():
1493 setattr(con, key, value)
1494 return clone
1495
1496 - def results(self, row="object", start=0, size=None, **con_values):
1497 """
1498 Get an iterator over result rows
1499 ================================
1500
1501 This method returns the same values with the
1502 same options as the method of the same name in
1503 Query (see intermine.query.Query). The main difference in in the
1504 arguments.
1505
1506 The template result methods also accept a key-word pair
1507 set of arguments that are used to supply values
1508 to the editable constraints. eg::
1509
1510 template.results(
1511 A = {"value": "eve"},
1512 B = {"op": ">", "value": 5000}
1513 )
1514
1515 The keys should be codes for editable constraints (you can inspect these
1516 with Template.editable_constraints) and the values should be a dictionary
1517 of constraint properties to replace. You can replace the values for
1518 "op" (operator), "value", and "extra_value" and "values" in the case of
1519 ternary and multi constraints.
1520
1521 @rtype: L{intermine.webservice.ResultIterator}
1522 """
1523 clone = self.get_adjusted_template(con_values)
1524 return super(Template, clone).results(row, start, size)
1525
1527 """
1528 Get a list of result rows
1529 =========================
1530
1531 This method performs the same as the method of the
1532 same name in Query, and it shares the semantics of
1533 Template.results().
1534
1535 @see: L{intermine.query.Query.get_results_list}
1536 @see: L{intermine.query.Template.results}
1537
1538 @rtype: list
1539
1540 """
1541 clone = self.get_adjusted_template(con_values)
1542 return super(Template, clone).get_results_list(row, start, size)
1543
1548
1549 - def rows(self, start=0, size=None, **con_values):
1553
1554 - def count(self, **con_values):
1555 """
1556 Return the total number of rows this template returns
1557 =====================================================
1558
1559 Obtain the number of rows a particular query will
1560 return, without having to fetch and parse all the
1561 actual data. This method makes a request to the server
1562 to report the count for the query, and is sugar for a
1563 results call.
1564
1565 @rtype: int
1566 @raise WebserviceError: if the request is unsuccessful.
1567 """
1568 clone = self.get_adjusted_template(con_values)
1569 return super(Template, clone).count()
1570
1574
1577
1580