1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from .util import openAnything, ReadableException
6 from .pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 from .model import Column, Class
8 import constraints
9
10 """
11 Classes representing queries against webservices
12 ================================================
13
14 Representations of queries, and templates.
15
16 """
17
18 __author__ = "Alex Kalderimis"
19 __organization__ = "InterMine"
20 __license__ = "LGPL"
21 __contact__ = "dev@intermine.org"
22
23
24 -class Query(object):
25 """
26 A Class representing a structured database query
27 ================================================
28
29 Objects of this class have properties that model the
30 attributes of the query, and methods for performing
31 the request.
32
33 SYNOPSIS
34 --------
35
36 example:
37
38 >>> service = Service("http://www.flymine.org/query/service")
39 >>> query = service.new_query()
40 >>>
41 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
42 >>> query.add_sort_order("Gene.pathways.name")
43 >>>
44 >>> query.add_constraint("Gene", "LOOKUP", "eve")
45 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
46 >>>
47 >>> query.set_logic("A or B")
48 >>>
49 >>> for row in query.results():
50 ... handle_row(row)
51
52 OR, using an SQL style DSL:
53
54 >>> s = Service("www.flymine.org/query")
55 >>> query = s.query("Gene").\
56 ... select("*", "pathways.*").\
57 ... where("symbol", "=", "H").\
58 ... outerjoin("pathways").\
59 ... order_by("symbol")
60 >>> for row in query.results():
61 ... handle_row(row)
62
63 OR, for a more SQL-alchemy, ORM style:
64
65 >>>
66 >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles):
67 ... handle(gene)
68
69 Query objects represent structured requests for information over the database
70 housed at the datawarehouse whose webservice you are querying. They utilise
71 some of the concepts of relational databases, within an object-related
72 ORM context. If you don't know what that means, don't worry: you
73 don't need to write SQL, and the queries will be fast.
74
75 To make things slightly more familiar to those with knowledge of SQL, some syntactical
76 sugar is provided to make constructing queries a bit more recognisable.
77
78 PRINCIPLES
79 ----------
80
81 The data model represents tables in the databases as classes, with records
82 within tables as instances of that class. The columns of the database are the
83 fields of that object::
84
85 The Gene table - showing two records/objects
86 +---------------------------------------------------+
87 | id | symbol | length | cyto-location | organism |
88 +----------------------------------------+----------+
89 | 01 | eve | 1539 | 46C10-46C10 | 01 |
90 +----------------------------------------+----------+
91 | 02 | zen | 1331 | 84A5-84A5 | 01 |
92 +----------------------------------------+----------+
93 ...
94
95 The organism table - showing one record/object
96 +----------------------------------+
97 | id | name | taxon id |
98 +----------------------------------+
99 | 01 | D. melanogaster | 7227 |
100 +----------------------------------+
101
102 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
103 everything except the id columns). The other columns (such as "organism" in the gene table)
104 are ones that reference records of other tables (ie. other objects), and are called
105 references. You can refer to any field in any class, that has a connection,
106 however tenuous, with a table, by using dotted path notation::
107
108 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
109
110 These paths, and the connections between records and tables they represent,
111 are the basis for the structure of InterMine queries.
112
113 THE STUCTURE OF A QUERY
114 -----------------------
115
116 A query has two principle sets of properties:
117 - its view: the set of output columns
118 - its constraints: the set of rules for what to include
119
120 A query must have at least one output column in its view, but constraints
121 are optional - if you don't include any, you will get back every record
122 from the table (every object of that type)
123
124 In addition, the query must be coherent: if you have information about
125 an organism, and you want a list of genes, then the "Gene" table
126 should be the basis for your query, and as such the Gene class, which
127 represents this table, should be the root of all the paths that appear in it:
128
129 So, to take a simple example::
130
131 I have an organism name, and I want a list of genes:
132
133 The view is the list of things I want to know about those genes:
134
135 >>> query.add_view("Gene.name")
136 >>> query.add_view("Gene.length")
137 >>> query.add_view("Gene.proteins.sequence.length")
138
139 Note I can freely mix attributes and references, as long as every view ends in
140 an attribute (a meaningful value). As a short-cut I can also write:
141
142 >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
143
144 or:
145
146 >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length")
147
148 They are all equivalent. You can also use common SQL style shortcuts such as "*" for all
149 attribute fields:
150
151 >>> query.add_views("Gene.*")
152
153 You can also use "select" as a synonymn for "add_view"
154
155 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
156
157 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
158
159 (note, here I can use "where" as a synonymn for "add_constraint")
160
161 If I run this query, I will get literally millions of results -
162 it needs to be filtered further:
163
164 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
165
166 If that doesn't restrict things enough I can add more filters:
167
168 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
169
170 Now I am guaranteed to get only information on genes I am interested in.
171
172 Note, though, that because I have included the link (or "join") from Gene -> Protein,
173 this, by default, means that I only want genes that have protein information associated
174 with them. If in fact I want information on all genes, and just want to know the
175 protein information if it is available, then I can specify that with:
176
177 >>> query.add_join("Gene.proteins", "OUTER")
178
179 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
180 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
181 (as unlikely as that sounds), I can specify the logic for that too:
182
183 >>> query.set_logic("A and (B or C)")
184
185 Each letter refers to one of the constraints - the codes are assigned in the order you add
186 the constraints. If you want to be absolutely certain about the constraints you mean, you
187 can use the constraint objects themselves:
188
189 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
190 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
191 >>>
192 >>> query.set_logic(gene_is_eve | gene_is_zen)
193
194 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
195
196 Putting it all together:
197
198 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
199 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
200 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
201 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
202 >>> query.add_join("Gene.proteins", "OUTER")
203 >>> query.set_logic("A and (B or C)")
204
205 This can be made more concise and readable with a little DSL sugar:
206
207 >>> query = service.query("Gene")
208 >>> query.select("name", "length", "proteins.sequence.length").\
209 ... where('organism.name' '=', 'D. melanogaster').\
210 ... where("proteins.sequence.length", "<", 500).\
211 ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\
212 ... outerjoin('proteins').\
213 ... set_logic("A and (B or C)")
214
215 And the query is defined.
216
217 Result Processing
218 -----------------
219
220 calling ".results()" on a query will return an iterator of rows, where each row
221 is a ResultRow object, which can be treated as both a list and a dictionary.
222
223 Which means you can refer to columns by name:
224
225 >>> for row in query.results():
226 ... print "name is %s" % (row["name"])
227 ... print "length is %d" % (row["length"])
228
229 As well as using list indices:
230
231 >>> for row in query.results():
232 ... print "The first column is %s" % (row[0])
233
234 Iterating over a row iterates over the cell values as a list:
235
236 >>> for row in query.results():
237 ... for column in row:
238 ... do_something(column)
239
240 Here each row will have a gene name, a gene length, and a sequence length, eg:
241
242 >>> print row.to_l
243 ["even skipped", "1359", "376"]
244
245 To make that clearer, you can ask for a dictionary instead of a list:
246
247 >>> for row in query.result()
248 ... print row.to_d
249 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
250
251
252 If you just want the raw results, for printing to a file, or for piping to another program,
253 you can request strings instead:
254
255 >>> for row in query.result("string")
256 ... print(row)
257
258
259 Getting us to Generate your Code
260 --------------------------------
261
262 Not that you have to actually write any of this! The webapp will happily
263 generate the code for any query (and template) you can build in it. A good way to get
264 started is to use the webapp to generate your code, and then run it as scripts
265 to speed up your queries. You can always tinker with and edit the scripts you download.
266
267 To get generated queries, look for the "python" link at the bottom of query-builder and
268 template form pages, it looks a bit like this::
269
270 . +=====================================+=============
271 | |
272 | Perl | Python | Java [Help] |
273 | |
274 +==============================================
275
276 """
277 - def __init__(self, model, service=None, validate=True, root=None):
278 """
279 Construct a new Query
280 =====================
281
282 Construct a new query for making database queries
283 against an InterMine data warehouse.
284
285 Normally you would not need to use this constructor
286 directly, but instead use the factory method on
287 intermine.webservice.Service, which will handle construction
288 for you.
289
290 @param model: an instance of L{intermine.model.Model}. Required
291 @param service: an instance of l{intermine.service.Service}. Optional,
292 but you will not be able to make requests without one.
293 @param validate: a boolean - defaults to True. If set to false, the query
294 will not try and validate itself. You should not set this to false.
295
296 """
297 self.model = model
298 if root is None:
299 self.root = root
300 else:
301 self.root = model.make_path(root).root
302
303 self.name = ''
304 self.description = ''
305 self.service = service
306 self.do_verification = validate
307 self.path_descriptions = []
308 self.joins = []
309 self.constraint_dict = {}
310 self.uncoded_constraints = []
311 self.views = []
312 self._sort_order_list = SortOrderList()
313 self._logic_parser = constraints.LogicParser(self)
314 self._logic = None
315 self.constraint_factory = constraints.ConstraintFactory()
316
317
318 self.c = self.column
319 self.filter = self.where
320 self.add_column = self.add_view
321 self.add_columns = self.add_view
322 self.add_views = self.add_view
323 self.select = self.add_view
324 self.order_by = self.add_sort_order
325 self.all = self.get_results_list
326 self.rows = self.results
327
329 return self.results("jsonobjects")
330
331 @classmethod
332 - def from_xml(cls, xml, *args, **kwargs):
333 """
334 Deserialise a query serialised to XML
335 =====================================
336
337 This method is used to instantiate serialised queries.
338 It is used by intermine.webservice.Service objects
339 to instantiate Template objects and it can be used
340 to read in queries you have saved to a file.
341
342 @param xml: The xml as a file name, url, or string
343
344 @raise QueryParseError: if the query cannot be parsed
345 @raise ModelError: if the query has illegal paths in it
346 @raise ConstraintError: if the constraints don't make sense
347
348 @rtype: L{Query}
349 """
350 obj = cls(*args, **kwargs)
351 obj.do_verification = False
352 f = openAnything(xml)
353 doc = minidom.parse(f)
354 f.close()
355
356 queries = doc.getElementsByTagName('query')
357 assert len(queries) == 1, "wrong number of queries in xml"
358 q = queries[0]
359 obj.name = q.getAttribute('name')
360 obj.description = q.getAttribute('description')
361 obj.add_view(q.getAttribute('view'))
362 for p in q.getElementsByTagName('pathDescription'):
363 path = p.getAttribute('pathString')
364 description = p.getAttribute('description')
365 obj.add_path_description(path, description)
366 for j in q.getElementsByTagName('join'):
367 path = j.getAttribute('path')
368 style = j.getAttribute('style')
369 obj.add_join(path, style)
370 for c in q.getElementsByTagName('constraint'):
371 args = {}
372 args['path'] = c.getAttribute('path')
373 if args['path'] is None:
374 if c.parentNode.tagName != "node":
375 msg = "Constraints must have a path"
376 raise QueryParseError(msg)
377 args['path'] = c.parentNode.getAttribute('path')
378 args['op'] = c.getAttribute('op')
379 args['value'] = c.getAttribute('value')
380 args['code'] = c.getAttribute('code')
381 args['subclass'] = c.getAttribute('type')
382 args['editable'] = c.getAttribute('editable')
383 args['optional'] = c.getAttribute('switchable')
384 args['extra_value'] = c.getAttribute('extraValue')
385 args['loopPath'] = c.getAttribute('loopPath')
386 values = []
387 for val_e in c.getElementsByTagName('value'):
388 texts = []
389 for node in val_e.childNodes:
390 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
391 values.append(' '.join(texts))
392 if len(values) > 0: args["values"] = values
393 for k, v in args.items():
394 if v is None or v == '': del args[k]
395 if "loopPath" in args:
396 args["op"] = {
397 "=" : "IS",
398 "!=": "IS NOT"
399 }.get(args["op"])
400 con = obj.add_constraint(**args)
401 if not con:
402 raise ConstraintError("error adding constraint with args: " + args)
403 obj.verify()
404
405 return obj
406
408 """
409 Validate the query
410 ==================
411
412 Invalid queries will fail to run, and it is not always
413 obvious why. The validation routine checks to see that
414 the query will not cause errors on execution, and tries to
415 provide informative error messages.
416
417 This method is called immediately after a query is fully
418 deserialised.
419
420 @raise ModelError: if the paths are invalid
421 @raise QueryError: if there are errors in query construction
422 @raise ConstraintError: if there are errors in constraint construction
423
424 """
425 self.verify_views()
426 self.verify_constraint_paths()
427 self.verify_join_paths()
428 self.verify_pd_paths()
429 self.validate_sort_order()
430 self.do_verification = True
431
433 """
434 Add one or more views to the list of output columns
435 ===================================================
436
437 example::
438
439 query.add_view("Gene.name Gene.organism.name")
440
441 This is the main method for adding views to the list
442 of output columns. As well as appending views, it
443 will also split a single, space or comma delimited
444 string into multiple paths, and flatten out lists, or any
445 combination. It will also immediately try to validate
446 the views.
447
448 Output columns must be valid paths according to the
449 data model, and they must represent attributes of tables
450
451 @see: intermine.model.Model
452 @see: intermine.model.Path
453 @see: intermine.model.Attribute
454 """
455 views = []
456 for p in paths:
457 if isinstance(p, (set, list)):
458 views.extend(list(p))
459 elif isinstance(p, Class):
460 views.append(p.name + ".*")
461 elif isinstance(p, Column):
462 if p._path.is_attribute():
463 views.append(str(p))
464 else:
465 views.append(str(p) + ".*")
466 else:
467 views.extend(re.split("(?:,?\s+|,)", p))
468
469 views = map(self.prefix_path, views)
470
471 views_to_add = []
472 for view in views:
473 if view.endswith(".*"):
474 view = re.sub("\.\*$", "", view)
475 path = self.model.make_path(view, self.get_subclass_dict())
476 cd = path.end_class
477 attr_views = map(lambda x: view + "." + x.name, cd.attributes)
478 views_to_add.extend(attr_views)
479 else:
480 views_to_add.append(view)
481
482 if self.do_verification:
483 self.verify_views(views_to_add)
484
485 self.views.extend(views_to_add)
486
487 return self
488
498
500 """
501 Clear the output column list
502 ============================
503
504 Deletes all entries currently in the view list.
505 """
506 self.views = []
507
509 """
510 Check to see if the views given are valid
511 =========================================
512
513 This method checks to see if the views:
514 - are valid according to the model
515 - represent attributes
516
517 @see: L{intermine.model.Attribute}
518
519 @raise intermine.model.ModelError: if the paths are invalid
520 @raise ConstraintError: if the paths are not attributes
521 """
522 if views is None: views = self.views
523 for path in views:
524 path = self.model.make_path(path, self.get_subclass_dict())
525 if not path.is_attribute():
526 raise ConstraintError("'" + str(path)
527 + "' does not represent an attribute")
528
530 """
531 Add a constraint (filter on records)
532 ====================================
533
534 example::
535
536 query.add_constraint("Gene.symbol", "=", "zen")
537
538 This method will try to make a constraint from the arguments
539 given, trying each of the classes it knows of in turn
540 to see if they accept the arguments. This allows you
541 to add constraints of different types without having to know
542 or care what their classes or implementation details are.
543 All constraints derive from intermine.constraints.Constraint,
544 and they all have a path attribute, but are otherwise diverse.
545
546 Before adding the constraint to the query, this method
547 will also try to check that the constraint is valid by
548 calling Query.verify_constraint_paths()
549
550 @see: L{intermine.constraints}
551
552 @rtype: L{intermine.constraints.Constraint}
553 """
554 if len(args) == 1 and len(kwargs) == 0:
555 if isinstance(args[0], tuple):
556 con = self.constraint_factory.make_constraint(*args[0])
557 else:
558 con = args[0]
559 else:
560 con = self.constraint_factory.make_constraint(*args, **kwargs)
561
562 con.path = self.prefix_path(con.path)
563 if self.do_verification: self.verify_constraint_paths([con])
564 if hasattr(con, "code"):
565 self.constraint_dict[con.code] = con
566 else:
567 self.uncoded_constraints.append(con)
568
569 return con
570
571 - def where(self, *args, **kwargs):
572 """
573 Add a constraint to the query
574 =============================
575
576 In contrast to add_constraint, this method also adds all attributes to the query
577 if no view has been set, and returns self to support method chaining.
578 """
579 if len(self.views) == 0:
580 self.add_view(self.root)
581
582 self.add_constraint(*args, **kwargs)
583 return self
584
587
589 """
590 Check that the constraints are valid
591 ====================================
592
593 This method will check the path attribute of each constraint.
594 In addition it will:
595 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
596 - Check that TernaryConstraints have a Reference as theirs
597 - Check that SubClassConstraints have a correct subclass relationship
598 - Check that LoopConstraints have a valid loopPath, of a compatible type
599 - Check that ListConstraints refer to an object
600
601 @param cons: The constraints to check (defaults to all constraints on the query)
602
603 @raise ModelError: if the paths are not valid
604 @raise ConstraintError: if the constraints do not satisfy the above rules
605
606 """
607 if cons is None: cons = self.constraints
608 for con in cons:
609 pathA = self.model.make_path(con.path, self.get_subclass_dict())
610 if isinstance(con, constraints.TernaryConstraint):
611 if pathA.get_class() is None:
612 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
613 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
614 if not pathA.is_attribute():
615 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
616 elif isinstance(con, constraints.SubClassConstraint):
617 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
618 if not pathB.get_class().isa(pathA.get_class()):
619 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
620 elif isinstance(con, constraints.LoopConstraint):
621 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
622 for path in [pathA, pathB]:
623 if not path.get_class():
624 raise ConstraintError("'" + str(path) + "' does not refer to an object")
625 (classA, classB) = (pathA.get_class(), pathB.get_class())
626 if not classA.isa(classB) and not classB.isa(classA):
627 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
628 elif isinstance(con, constraints.ListConstraint):
629 if not pathA.get_class():
630 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
631
632 @property
634 """
635 Returns the constraints of the query
636 ====================================
637
638 Query.constraints S{->} list(intermine.constraints.Constraint)
639
640 Constraints are returned in the order of their code (normally
641 the order they were added to the query) and with any
642 subclass contraints at the end.
643
644 @rtype: list(Constraint)
645 """
646 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
647 ret.extend(self.uncoded_constraints)
648 return ret
649
651 """
652 Returns the constraint with the given code
653 ==========================================
654
655 Returns the constraint with the given code, if if exists.
656 If no such constraint exists, it throws a ConstraintError
657
658 @return: the constraint corresponding to the given code
659 @rtype: L{intermine.constraints.CodedConstraint}
660 """
661 if code in self.constraint_dict:
662 return self.constraint_dict[code]
663 else:
664 raise ConstraintError("There is no constraint with the code '"
665 + code + "' on this query")
666
668 """
669 Add a join statement to the query
670 =================================
671
672 example::
673
674 query.add_join("Gene.proteins", "OUTER")
675
676 A join statement is used to determine if references should
677 restrict the result set by only including those references
678 exist. For example, if one had a query with the view::
679
680 "Gene.name", "Gene.proteins.name"
681
682 Then in the normal case (that of an INNER join), we would only
683 get Genes that also have at least one protein that they reference.
684 Simply by asking for this output column you are placing a
685 restriction on the information you get back.
686
687 If in fact you wanted all genes, regardless of whether they had
688 proteins associated with them or not, but if they did
689 you would rather like to know _what_ proteins, then you need
690 to specify this reference to be an OUTER join::
691
692 query.add_join("Gene.proteins", "OUTER")
693
694 Now you will get many more rows of results, some of which will
695 have "null" values where the protein name would have been,
696
697 This method will also attempt to validate the join by calling
698 Query.verify_join_paths(). Joins must have a valid path, the
699 style can be either INNER or OUTER (defaults to OUTER,
700 as the user does not need to specify inner joins, since all
701 references start out as inner joins), and the path
702 must be a reference.
703
704 @raise ModelError: if the path is invalid
705 @raise TypeError: if the join style is invalid
706
707 @rtype: L{intermine.pathfeatures.Join}
708 """
709 join = Join(*args, **kwargs)
710 join.path = self.prefix_path(join.path)
711 if self.do_verification: self.verify_join_paths([join])
712 self.joins.append(join)
713 return self
714
716 """Alias for add_join(column, "OUTER")"""
717 return self.add_join(str(column), "OUTER")
718
720 """
721 Check that the joins are valid
722 ==============================
723
724 Joins must have valid paths, and they must refer to references.
725
726 @raise ModelError: if the paths are invalid
727 @raise QueryError: if the paths are not references
728 """
729 if joins is None: joins = self.joins
730 for join in joins:
731 path = self.model.make_path(join.path, self.get_subclass_dict())
732 if not path.is_reference():
733 raise QueryError("'" + join.path + "' is not a reference")
734
736 """
737 Add a path description to the query
738 ===================================
739
740 example::
741
742 query.add_path_description("Gene.symbol", "The symbol for this gene")
743
744 If you wish you can add annotations to your query that describe
745 what the component paths are and what they do - this is only really
746 useful if you plan to keep your query (perhaps as xml) or store it
747 as a template.
748
749 @rtype: L{intermine.pathfeatures.PathDescription}
750
751 """
752 path_description = PathDescription(*args, **kwargs)
753 path_description.path = self.prefix_path(path_description.path)
754 if self.do_verification: self.verify_pd_paths([path_description])
755 self.path_descriptions.append(path_description)
756 return path_description
757
759 """
760 Check that the path of the path description is valid
761 ====================================================
762
763 Checks for consistency with the data model
764
765 @raise ModelError: if the paths are invalid
766 """
767 if pds is None: pds = self.path_descriptions
768 for pd in pds:
769 self.model.validate_path(pd.path, self.get_subclass_dict())
770
771 @property
773 """
774 Returns the list of constraints that have a code
775 ================================================
776
777 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
778
779 This returns an up to date list of the constraints that can
780 be used in a logic expression. The only kind of constraint
781 that this excludes, at present, is SubClassConstraints
782
783 @rtype: list(L{intermine.constraints.CodedConstraint})
784 """
785 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
786
788 """
789 Returns the logic expression for the query
790 ==========================================
791
792 This returns the up to date logic expression. The default
793 value is the representation of all coded constraints and'ed together.
794
795 The LogicGroup object stringifies to a string that can be parsed to
796 obtain itself (eg: "A and (B or C or D)").
797
798 @rtype: L{intermine.constraints.LogicGroup}
799 """
800 if self._logic is None:
801 return reduce(lambda x, y: x+y, self.coded_constraints)
802 else:
803 return self._logic
804
806 """
807 Sets the Logic given the appropriate input
808 ==========================================
809
810 example::
811
812 Query.set_logic("A and (B or C)")
813
814 This sets the logic to the appropriate value. If the value is
815 already a LogicGroup, it is accepted, otherwise
816 the string is tokenised and parsed.
817
818 The logic is then validated with a call to validate_logic()
819
820 raise LogicParseError: if there is a syntax error in the logic
821 """
822 if isinstance(value, constraints.LogicGroup):
823 logic = value
824 else:
825 logic = self._logic_parser.parse(value)
826 if self.do_verification: self.validate_logic(logic)
827 self._logic = logic
828 return self
829
831 """
832 Validates the query logic
833 =========================
834
835 Attempts to validate the logic by checking
836 that every coded_constraint is included
837 at least once
838
839 @raise QueryError: if not every coded constraint is represented
840 """
841 if logic is None: logic = self._logic
842 logic_codes = set(logic.get_codes())
843 for con in self.coded_constraints:
844 if con.code not in logic_codes:
845 raise QueryError("Constraint " + con.code + repr(con)
846 + " is not mentioned in the logic: " + str(logic))
847
849 """
850 Gets the sort order when none has been specified
851 ================================================
852
853 This method is called to determine the sort order if
854 none is specified
855
856 @raise QueryError: if the view is empty
857
858 @rtype: L{intermine.pathfeatures.SortOrderList}
859 """
860 try:
861 return SortOrderList((self.views[0], SortOrder.ASC))
862 except IndexError:
863 raise QueryError("Query view is empty")
864
866 """
867 Return a sort order for the query
868 =================================
869
870 This method returns the sort order if set, otherwise
871 it returns the default sort order
872
873 @raise QueryError: if the view is empty
874
875 @rtype: L{intermine.pathfeatures.SortOrderList}
876 """
877 if self._sort_order_list.is_empty():
878 return self.get_default_sort_order()
879 else:
880 return self._sort_order_list
881
883 """
884 Adds a sort order to the query
885 ==============================
886
887 example::
888
889 Query.add_sort_order("Gene.name", "DESC")
890
891 This method adds a sort order to the query.
892 A query can have multiple sort orders, which are
893 assessed in sequence.
894
895 If a query has two sort-orders, for example,
896 the first being "Gene.organism.name asc",
897 and the second being "Gene.name desc", you would have
898 the list of genes grouped by organism, with the
899 lists within those groupings in reverse alphabetical
900 order by gene name.
901
902 This method will try to validate the sort order
903 by calling validate_sort_order()
904 """
905 so = SortOrder(str(path), direction)
906 so.path = self.prefix_path(so.path)
907 if self.do_verification: self.validate_sort_order(so)
908 self._sort_order_list.append(so)
909 return self
910
912 """
913 Check the validity of the sort order
914 ====================================
915
916 Checks that the sort order paths are:
917 - valid paths
918 - in the view
919
920 @raise QueryError: if the sort order is not in the view
921 @raise ModelError: if the path is invalid
922
923 """
924 if not so_elems:
925 so_elems = self._sort_order_list
926
927 for so in so_elems:
928 self.model.validate_path(so.path, self.get_subclass_dict())
929 if so.path not in self.views:
930 raise QueryError("Sort order element is not in the view: " + so.path)
931
933 """
934 Return the current mapping of class to subclass
935 ===============================================
936
937 This method returns a mapping of classes used
938 by the model for assessing whether certain paths are valid. For
939 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
940 you can refer to the .presentCall attributes of fly atlas results.
941 MicroArrayResults do not have this attribute, and a path such as::
942
943 Gene.microArrayResult.presentCall
944
945 would be marked as invalid unless the dictionary is provided.
946
947 Users most likely will not need to ever call this method.
948
949 @rtype: dict(string, string)
950 """
951 subclass_dict = {}
952 for c in self.constraints:
953 if isinstance(c, constraints.SubClassConstraint):
954 subclass_dict[c.path] = c.subclass
955 return subclass_dict
956
957 - def results(self, row="rr", start=0, size=None):
958 """
959 Return an iterator over result rows
960 ===================================
961
962 Usage::
963
964 for row in query.results():
965 do_sth_with(row)
966
967 @param row: the format for the row. Defaults to "rr". Valid options are
968 "rr", "dict", "list", "jsonrows", "jsonobject", "tsv", "csv".
969 @type row: string
970
971 @rtype: L{intermine.webservice.ResultIterator}
972
973 @raise WebserviceError: if the request is unsuccessful
974 """
975 path = self.get_results_path()
976 params = self.to_query_params()
977 params["start"] = start
978 if size:
979 params["size"] = size
980 view = self.views
981 cld = self.root
982 return self.service.get_results(path, params, row, view, cld)
983
984 - def one(self, row="rr"):
985 """Return one result, and raise an error if the result size is not 1"""
986 c = self.count()
987 if (c != 1):
988 raise "Result size is not one: got " + str(c) + " results"
989 else:
990 return self.first(row)
991
992 - def first(self, row="rr", start=0):
993 """Return the first result, or None if the results are empty"""
994 size = None if row == "jsonobjects" else 1
995 try:
996 return self.results(row, start=start, size=size).next()
997 except StopIteration:
998 return None
999
1001 """
1002 Get a list of result rows
1003 =========================
1004
1005 This method is a shortcut so that you do not have to
1006 do a list comprehension yourself on the iterator that
1007 is normally returned. If you have a very large result
1008 set (and these can get up to 100's of thousands or rows
1009 pretty easily) you will not want to
1010 have the whole list in memory at once, but there may
1011 be other circumstances when you might want to keep the whole
1012 list in one place.
1013
1014 It takes all the same arguments and parameters as Query.results
1015
1016 aliased as 'all'
1017
1018 @see: L{intermine.query.results}
1019
1020 """
1021 rows = self.results(*args, **kwargs)
1022 return [r for r in rows]
1023
1025 """
1026 Return the total number of rows this query returns
1027 ==================================================
1028
1029 Obtain the number of rows a particular query will
1030 return, without having to fetch and parse all the
1031 actual data. This method makes a request to the server
1032 to report the count for the query, and is sugar for a
1033 results call.
1034
1035 @rtype: int
1036 @raise WebserviceError: if the request is unsuccessful.
1037 """
1038 count_str = ""
1039 rows = self.results("count")
1040 for row in rows:
1041 count_str += row
1042 try:
1043 return int(count_str)
1044 except ValueError:
1045 raise WebserviceError("Server returned a non-integer count: " + count_str)
1046
1048 """
1049 Returns the uri to use to create a list from this query
1050 =======================================================
1051
1052 Query.get_list_upload_uri() -> str
1053
1054 This method is used internally when performing list operations
1055 on queries.
1056
1057 @rtype: str
1058 """
1059 return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH
1060
1062 """
1063 Returns the uri to use to create a list from this query
1064 =======================================================
1065
1066 Query.get_list_append_uri() -> str
1067
1068 This method is used internally when performing list operations
1069 on queries.
1070
1071 @rtype: str
1072 """
1073 return self.service.root + self.service.QUERY_LIST_APPEND_PATH
1074
1075
1077 """
1078 Returns the path section pointing to the REST resource
1079 ======================================================
1080
1081 Query.get_results_path() -> str
1082
1083 Internally, this just calls a constant property
1084 in intermine.service.Service
1085
1086 @rtype: str
1087 """
1088 return self.service.QUERY_PATH
1089
1090
1092 """
1093 Returns the child objects of the query
1094 ======================================
1095
1096 This method is used during the serialisation of queries
1097 to xml. It is unlikely you will need access to this as a whole.
1098 Consider using "path_descriptions", "joins", "constraints" instead
1099
1100 @see: Query.path_descriptions
1101 @see: Query.joins
1102 @see: Query.constraints
1103
1104 @return: the child element of this query
1105 @rtype: list
1106 """
1107 return sum([self.path_descriptions, self.joins, self.constraints], [])
1108
1110 """
1111 Returns the parameters to be passed to the webservice
1112 =====================================================
1113
1114 The query is responsible for producing its own query
1115 parameters. These consist simply of:
1116 - query: the xml representation of the query
1117
1118 @rtype: dict
1119
1120 """
1121 xml = self.to_xml()
1122 params = {'query' : xml }
1123 return params
1124
1126 """
1127 Returns a DOM node representing the query
1128 =========================================
1129
1130 This is an intermediate step in the creation of the
1131 xml serialised version of the query. You probably
1132 won't need to call this directly.
1133
1134 @rtype: xml.minidom.Node
1135 """
1136 impl = getDOMImplementation()
1137 doc = impl.createDocument(None, "query", None)
1138 query = doc.documentElement
1139
1140 query.setAttribute('name', self.name)
1141 query.setAttribute('model', self.model.name)
1142 query.setAttribute('view', ' '.join(self.views))
1143 query.setAttribute('sortOrder', str(self.get_sort_order()))
1144 query.setAttribute('longDescription', self.description)
1145 if len(self.coded_constraints) > 1:
1146 query.setAttribute('constraintLogic', str(self.get_logic()))
1147
1148 for c in self.children():
1149 element = doc.createElement(c.child_type)
1150 for name, value in c.to_dict().items():
1151 if isinstance(value, (set, list)):
1152 for v in value:
1153 subelement = doc.createElement(name)
1154 text = doc.createTextNode(v)
1155 subelement.appendChild(text)
1156 element.appendChild(subelement)
1157 else:
1158 element.setAttribute(name, value)
1159 query.appendChild(element)
1160 return query
1161
1163 """
1164 Return an XML serialisation of the query
1165 ========================================
1166
1167 This method serialises the current state of the query to an
1168 xml string, suitable for storing, or sending over the
1169 internet to the webservice.
1170
1171 @return: the serialised xml string
1172 @rtype: string
1173 """
1174 n = self.to_Node()
1175 return n.toxml()
1190
1192 """
1193 Performs a deep clone
1194 =====================
1195
1196 This method will produce a clone that is independent,
1197 and can be altered without affecting the original,
1198 but starts off with the exact same state as it.
1199
1200 The only shared elements should be the model
1201 and the service, which are shared by all queries
1202 that refer to the same webservice.
1203
1204 @return: same class as caller
1205 """
1206 newobj = self.__class__(self.model)
1207 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict"]:
1208 setattr(newobj, attr, deepcopy(getattr(self, attr)))
1209
1210 for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]:
1211 setattr(newobj, attr, getattr(self, attr))
1212 return newobj
1213
1215 """
1216 A Class representing a predefined query
1217 =======================================
1218
1219 Templates are ways of saving queries
1220 and allowing others to run them
1221 simply. They are the main interface
1222 to querying in the webapp
1223
1224 SYNOPSIS
1225 --------
1226
1227 example::
1228
1229 service = Service("http://www.flymine.org/query/service")
1230 template = service.get_template("Gene_Pathways")
1231 for row in template.results(A={"value":"eve"}):
1232 process_row(row)
1233 ...
1234
1235 A template is a subclass of query that comes predefined. They
1236 are typically retrieved from the webservice and run by specifying
1237 the values for their existing constraints. They are a concise
1238 and powerful way of running queries in the webapp.
1239
1240 Being subclasses of query, everything is true of them that is true
1241 of a query. They are just less work, as you don't have to design each
1242 one. Also, you can store your own templates in the web-app, and then
1243 access them as a private webservice method, from anywhere, making them
1244 a kind of query in the cloud - for this you will need to authenticate
1245 by providing log in details to the service.
1246
1247 The most significant difference is how constraint values are specified
1248 for each set of results.
1249
1250 @see: L{Template.results}
1251
1252 """
1254 """
1255 Constructor
1256 ===========
1257
1258 Instantiation is identical that of queries. As with queries,
1259 these are best obtained from the intermine.webservice.Service
1260 factory methods.
1261
1262 @see: L{intermine.webservice.Service.get_template}
1263 """
1264 super(Template, self).__init__(*args, **kwargs)
1265 self.constraint_factory = constraints.TemplateConstraintFactory()
1266 @property
1268 """
1269 Return the list of constraints you can edit
1270 ===========================================
1271
1272 Template.editable_constraints -> list(intermine.constraints.Constraint)
1273
1274 Templates have a concept of editable constraints, which
1275 is a way of hiding complexity from users. An underlying query may have
1276 five constraints, but only expose the one that is actually
1277 interesting. This property returns this subset of constraints
1278 that have the editable flag set to true.
1279 """
1280 isEditable = lambda x: x.editable
1281 return filter(isEditable, self.constraints)
1282
1284 """
1285 Returns the query parameters needed for the webservice
1286 ======================================================
1287
1288 Template.to_query_params() -> dict(string, string)
1289
1290 Overrides the method of the same name in query to provide the
1291 parameters needed by the templates results service. These
1292 are slightly more complex:
1293 - name: The template's name
1294 - for each constraint: (where [i] is an integer incremented for each constraint)
1295 - constraint[i]: the path
1296 - op[i]: the operator
1297 - value[i]: the value
1298 - code[i]: the code
1299 - extra[i]: the extra value for ternary constraints (optional)
1300
1301
1302 @rtype: dict
1303 """
1304 p = {'name' : self.name}
1305 i = 1
1306 for c in self.editable_constraints:
1307 if not c.switched_on: next
1308 for k, v in c.to_dict().items():
1309 k = "extra" if k == "extraValue" else k
1310 k = "constraint" if k == "path" else k
1311 p[k + str(i)] = v
1312 i += 1
1313 return p
1314
1316 """
1317 Returns the path section pointing to the REST resource
1318 ======================================================
1319
1320 Template.get_results_path() S{->} str
1321
1322 Internally, this just calls a constant property
1323 in intermine.service.Service
1324
1325 This overrides the method of the same name in Query
1326
1327 @return: the path to the REST resource
1328 @rtype: string
1329 """
1330 return self.service.TEMPLATEQUERY_PATH
1331
1333 """
1334 Gets a template to run
1335 ======================
1336
1337 Template.get_adjusted_template(con_values) S{->} Template
1338
1339 When templates are run, they are first cloned, and their
1340 values are changed to those desired. This leaves the original
1341 template unchanged so it can be run again with different
1342 values. This method does the cloning and changing of constraint
1343 values
1344
1345 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1346
1347 @rtype: L{Template}
1348 """
1349 clone = self.clone()
1350 for code, options in con_values.items():
1351 con = clone.get_constraint(code)
1352 if not con.editable:
1353 raise ConstraintError("There is a constraint '" + code
1354 + "' on this query, but it is not editable")
1355 for key, value in options.items():
1356 setattr(con, key, value)
1357 return clone
1358
1359 - def results(self, row="rr", start=0, size=None, **con_values):
1360 """
1361 Get an iterator over result rows
1362 ================================
1363
1364 This method returns the same values with the
1365 same options as the method of the same name in
1366 Query (see intermine.query.Query). The main difference in in the
1367 arguments.
1368
1369 The template result methods also accept a key-word pair
1370 set of arguments that are used to supply values
1371 to the editable constraints. eg::
1372
1373 template.results(
1374 A = {"value": "eve"},
1375 B = {"op": ">", "value": 5000}
1376 )
1377
1378 The keys should be codes for editable constraints (you can inspect these
1379 with Template.editable_constraints) and the values should be a dictionary
1380 of constraint properties to replace. You can replace the values for
1381 "op" (operator), "value", and "extra_value" and "values" in the case of
1382 ternary and multi constraints.
1383
1384 @rtype: L{intermine.webservice.ResultIterator}
1385 """
1386 clone = self.get_adjusted_template(con_values)
1387 return super(Template, clone).results(row, start, size)
1388
1390 """
1391 Get a list of result rows
1392 =========================
1393
1394 This method performs the same as the method of the
1395 same name in Query, and it shares the semantics of
1396 Template.results().
1397
1398 @see: L{intermine.query.Query.get_results_list}
1399 @see: L{intermine.query.Template.results}
1400
1401 @rtype: list
1402
1403 """
1404 clone = self.get_adjusted_template(con_values)
1405 return super(Template, clone).get_results_list(row, start, size)
1406
1407 - def count(self, **con_values):
1408 """
1409 Return the total number of rows this template returns
1410 =====================================================
1411
1412 Obtain the number of rows a particular query will
1413 return, without having to fetch and parse all the
1414 actual data. This method makes a request to the server
1415 to report the count for the query, and is sugar for a
1416 results call.
1417
1418 @rtype: int
1419 @raise WebserviceError: if the request is unsuccessful.
1420 """
1421 clone = self.get_adjusted_template(con_values)
1422 return super(Template, clone).count()
1423
1427
1430
1433