1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from .util import openAnything, ReadableException
6 from .pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 import constraints
8
9 """
10 Classes representing queries against webservices
11 ================================================
12
13 Representations of queries, and templates.
14
15 """
16
17 __author__ = "Alex Kalderimis"
18 __organization__ = "InterMine"
19 __license__ = "LGPL"
20 __contact__ = "dev@intermine.org"
21
22
23 -class Query(object):
24 """
25 A Class representing a structured database query
26 ================================================
27
28 Objects of this class have properties that model the
29 attributes of the query, and methods for performing
30 the request.
31
32 SYNOPSIS
33 --------
34
35 example:
36
37 >>> service = Service("http://www.flymine.org/query/service")
38 >>> query = service.new_query()
39 >>>
40 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
41 >>> query.add_sort_order("Gene.pathways.name")
42 >>>
43 >>> query.add_constraint("Gene", "LOOKUP", "eve")
44 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
45 >>>
46 >>> query.set_logic("A or B")
47 >>>
48 >>> for row in query.results():
49 ... handle_row(row)
50
51 Query objects represent structured requests for information over the database
52 housed at the datawarehouse whose webservice you are querying. They utilise
53 some of the concepts of relational databases, within an object-related
54 ORM context. If you don't know what that means, don't worry: you
55 don't need to write SQL, and the queries will be fast.
56
57 PRINCIPLES
58 ----------
59
60 The data model represents tables in the databases as classes, with records
61 within tables as instances of that class. The columns of the database are the
62 fields of that object::
63
64 The Gene table - showing two records/objects
65 +---------------------------------------------------+
66 | id | symbol | length | cyto-location | organism |
67 +----------------------------------------+----------+
68 | 01 | eve | 1539 | 46C10-46C10 | 01 |
69 +----------------------------------------+----------+
70 | 02 | zen | 1331 | 84A5-84A5 | 01 |
71 +----------------------------------------+----------+
72 ...
73
74 The organism table - showing one record/object
75 +----------------------------------+
76 | id | name | taxon id |
77 +----------------------------------+
78 | 01 | D. melanogaster | 7227 |
79 +----------------------------------+
80
81 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
82 everything except the id columns). The other columns (such as "organism" in the gene table)
83 are ones that reference records of other tables (ie. other objects), and are called
84 references. You can refer to any field in any class, that has a connection,
85 however tenuous, with a table, by using dotted path notation::
86
87 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
88
89 These paths, and the connections between records and tables they represent,
90 are the basis for the structure of InterMine queries.
91
92 THE STUCTURE OF A QUERY
93 -----------------------
94
95 A query has two principle sets of properties:
96 - its view: the set of output columns
97 - its constraints: the set of rules for what to include
98
99 A query must have at least one output column in its view, but constraints
100 are optional - if you don't include any, you will get back every record
101 from the table (every object of that type)
102
103 In addition, the query must be coherent: if you have information about
104 an organism, and you want a list of genes, then the "Gene" table
105 should be the basis for your query, and as such the Gene class, which
106 represents this table, should be the root of all the paths that appear in it:
107
108 So, to take a simple example::
109
110 I have an organism name, and I want a list of genes:
111
112 The view is the list of things I want to know about those genes:
113
114 >>> query.add_view("Gene.name")
115 >>> query.add_view("Gene.length")
116 >>> query.add_view("Gene.proteins.sequence.length")
117
118 Note I can freely mix attributes and references, as long as every view ends in
119 an attribute (a meaningful value). As a short-cut I can also write:
120
121 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
122
123 or:
124
125 >>> query.add_view("Gene.name Gene.length Gene.proteins.sequence.length")
126
127 They are all equivalent.
128
129 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
130
131 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
132
133 If I run this query, I will get literally millions of results -
134 it needs to be filtered further:
135
136 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
137
138 If that doesn't restrict things enough I can add more filters:
139
140 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
141
142 Now I am guaranteed to get only information on genes I am interested in.
143
144 Note, though, that because I have included the link (or "join") from Gene -> Protein,
145 this, by default, means that I only want genes that have protein information associated
146 with them. If in fact I want information on all genes, and just want to know the
147 protein information if it is available, then I can specify that with:
148
149 >>> query.add_join("Gene.proteins", "OUTER")
150
151 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
152 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
153 (as unlikely as that sounds), I can specify the logic for that too:
154
155 >>> query.set_logic("A and (B or C)")
156
157 Each letter refers to one of the constraints - the codes are assigned in the order you add
158 the constraints. If you want to be absolutely certain about the constraints you mean, you
159 can use the constraint objects themselves:
160
161 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
162 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
163 >>>
164 >>> query.set_logic(gene_is_eve | gene_is_zen)
165
166 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
167
168 Putting it all together:
169
170 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
171 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
172 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
173 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
174 >>> query.add_join("Gene.proteins", "OUTER")
175 >>> query.set_logic("A and (B or C)")
176
177 And the query is defined.
178
179 Result Processing
180 -----------------
181
182 calling ".results()" on a query will return an iterator of rows, where each row
183 is a list of values, one for each field in the output columns (view) you selected.
184
185 To process these simply use normal iteration syntax:
186
187 >>> for row in query.results():
188 ... for column in row:
189 ... do_something(column)
190
191 Here each row will have a gene name, a gene length, and a sequence length, eg:
192
193 >>> print row
194 ["even skipped", "1359", "376"]
195
196 To make that clearer, you can ask for a dictionary instead of a list:
197
198 >>> for row in query.result("dict")
199 ... print row
200 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
201
202 Which means you can refer to columns by name:
203
204 >>> for row in query.result("dict")
205 ... print "name is", row["Gene.name"]
206 ... print "length is", row["Gene.length"]
207
208 If you just want the raw results, for printing to a file, or for piping to another program,
209 you can request strings instead:
210
211 >>> for row in query.result("string")
212 ... print(row)
213
214
215 Getting us to Generate your Code
216 --------------------------------
217
218 Not that you have to actually write any of this! The webapp will happily
219 generate the code for any query (and template) you can build in it. A good way to get
220 started is to use the webapp to generate your code, and then run it as scripts
221 to speed up your queries. You can always tinker with and edit the scripts you download.
222
223 To get generated queries, look for the "python" link at the bottom of query-builder and
224 template form pages, it looks a bit like this::
225
226 . +=====================================+=============
227 | |
228 | Perl | Python | Java [Help] |
229 | |
230 +==============================================
231
232 """
233 - def __init__(self, model, service=None, validate=True):
234 """
235 Construct a new Query
236 =====================
237
238 Construct a new query for making database queries
239 against an InterMine data warehouse.
240
241 Normally you would not need to use this constructor
242 directly, but instead use the factory method on
243 intermine.webservice.Service, which will handle construction
244 for you.
245
246 @param model: an instance of L{intermine.model.Model}. Required
247 @param service: an instance of l{intermine.service.Service}. Optional,
248 but you will not be able to make requests without one.
249 @param validate: a boolean - defaults to True. If set to false, the query
250 will not try and validate itself. You should not set this to false.
251
252 """
253 self.model = model
254 self.name = ''
255 self.description = ''
256 self.service = service
257 self.do_verification = validate
258 self.path_descriptions = []
259 self.joins = []
260 self.constraint_dict = {}
261 self.uncoded_constraints = []
262 self.views = []
263 self._sort_order_list = SortOrderList()
264 self._logic_parser = constraints.LogicParser(self)
265 self._logic = None
266 self.constraint_factory = constraints.ConstraintFactory()
267
268 @classmethod
269 - def from_xml(cls, xml, *args, **kwargs):
270 """
271 Deserialise a query serialised to XML
272 =====================================
273
274 This method is used to instantiate serialised queries.
275 It is used by intermine.webservice.Service objects
276 to instantiate Template objects and it can be used
277 to read in queries you have saved to a file.
278
279 @param xml: The xml as a file name, url, or string
280
281 @raise QueryParseError: if the query cannot be parsed
282 @raise ModelError: if the query has illegal paths in it
283 @raise ConstraintError: if the constraints don't make sense
284
285 @rtype: L{Query}
286 """
287 obj = cls(*args, **kwargs)
288 obj.do_verification = False
289 f = openAnything(xml)
290 doc = minidom.parse(f)
291 f.close()
292
293 queries = doc.getElementsByTagName('query')
294 assert len(queries) == 1, "wrong number of queries in xml"
295 q = queries[0]
296 obj.name = q.getAttribute('name')
297 obj.description = q.getAttribute('description')
298 obj.add_view(q.getAttribute('view'))
299 for p in q.getElementsByTagName('pathDescription'):
300 path = p.getAttribute('pathString')
301 description = p.getAttribute('description')
302 obj.add_path_description(path, description)
303 for j in q.getElementsByTagName('join'):
304 path = j.getAttribute('path')
305 style = j.getAttribute('style')
306 obj.add_join(path, style)
307 for c in q.getElementsByTagName('constraint'):
308 args = {}
309 args['path'] = c.getAttribute('path')
310 if args['path'] is None:
311 if c.parentNode.tagName != "node":
312 msg = "Constraints must have a path"
313 raise QueryParseError(msg)
314 args['path'] = c.parentNode.getAttribute('path')
315 args['op'] = c.getAttribute('op')
316 args['value'] = c.getAttribute('value')
317 args['code'] = c.getAttribute('code')
318 args['subclass'] = c.getAttribute('type')
319 args['editable'] = c.getAttribute('editable')
320 args['optional'] = c.getAttribute('switchable')
321 args['extra_value'] = c.getAttribute('extraValue')
322 args['loopPath'] = c.getAttribute('loopPath')
323 values = []
324 for val_e in c.getElementsByTagName('value'):
325 texts = []
326 for node in val_e.childNodes:
327 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
328 values.append(' '.join(texts))
329 if len(values) > 0: args["values"] = values
330 for k, v in args.items():
331 if v is None or v == '': del args[k]
332 if "loopPath" in args:
333 args["op"] = {
334 "=" : "IS",
335 "!=": "IS NOT"
336 }.get(args["op"])
337 con = obj.add_constraint(**args)
338 if not con:
339 raise ConstraintError("error adding constraint with args: " + args)
340 obj.verify()
341
342 return obj
343
345 """
346 Validate the query
347 ==================
348
349 Invalid queries will fail to run, and it is not always
350 obvious why. The validation routine checks to see that
351 the query will not cause errors on execution, and tries to
352 provide informative error messages.
353
354 This method is called immediately after a query is fully
355 deserialised.
356
357 @raise ModelError: if the paths are invalid
358 @raise QueryError: if there are errors in query construction
359 @raise ConstraintError: if there are errors in constraint construction
360
361 """
362 self.verify_views()
363 self.verify_constraint_paths()
364 self.verify_join_paths()
365 self.verify_pd_paths()
366 self.validate_sort_order()
367 self.do_verification = True
368
370 """
371 Add one or more views to the list of output columns
372 ===================================================
373
374 example::
375
376 query.add_view("Gene.name Gene.organism.name")
377
378 This is the main method for adding views to the list
379 of output columns. As well as appending views, it
380 will also split a single, space or comma delimited
381 string into multiple paths, and flatten out lists, or any
382 combination. It will also immediately try to validate
383 the views.
384
385 Output columns must be valid paths according to the
386 data model, and they must represent attributes of tables
387
388 @see: intermine.model.Model
389 @see: intermine.model.Path
390 @see: intermine.model.Attribute
391 """
392 views = []
393 for p in paths:
394 if isinstance(p, (set, list)):
395 views.extend(list(p))
396 else:
397 views.extend(re.split("(?:,?\s+|,)", p))
398 if self.do_verification: self.verify_views(views)
399 self.views.extend(views)
400
402 """
403 Check to see if the views given are valid
404 =========================================
405
406 This method checks to see if the views:
407 - are valid according to the model
408 - represent attributes
409
410 @see: L{intermine.model.Attribute}
411
412 @raise intermine.model.ModelError: if the paths are invalid
413 @raise ConstraintError: if the paths are not attributes
414 """
415 if views is None: views = self.views
416 for path in views:
417 path = self.model.make_path(path, self.get_subclass_dict())
418 if not path.is_attribute():
419 raise ConstraintError("'" + str(path)
420 + "' does not represent an attribute")
421
423 """
424 Add a constraint (filter on records)
425 ====================================
426
427 example::
428
429 query.add_constraint("Gene.symbol", "=", "zen")
430
431 This method will try to make a constraint from the arguments
432 given, trying each of the classes it knows of in turn
433 to see if they accept the arguments. This allows you
434 to add constraints of different types without having to know
435 or care what their classes or implementation details are.
436 All constraints derive from intermine.constraints.Constraint,
437 and they all have a path attribute, but are otherwise diverse.
438
439 Before adding the constraint to the query, this method
440 will also try to check that the constraint is valid by
441 calling Query.verify_constraint_paths()
442
443 @see: L{intermine.constraints}
444
445 @rtype: L{intermine.constraints.Constraint}
446 """
447 con = self.constraint_factory.make_constraint(*args, **kwargs)
448 if self.do_verification: self.verify_constraint_paths([con])
449 if hasattr(con, "code"):
450 self.constraint_dict[con.code] = con
451 else:
452 self.uncoded_constraints.append(con)
453
454 return con
455
457 """
458 Check that the constraints are valid
459 ====================================
460
461 This method will check the path attribute of each constraint.
462 In addition it will:
463 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
464 - Check that TernaryConstraints have a Reference as theirs
465 - Check that SubClassConstraints have a correct subclass relationship
466 - Check that LoopConstraints have a valid loopPath, of a compatible type
467 - Check that ListConstraints refer to an object
468
469 @param cons: The constraints to check (defaults to all constraints on the query)
470
471 @raise ModelError: if the paths are not valid
472 @raise ConstraintError: if the constraints do not satisfy the above rules
473
474 """
475 if cons is None: cons = self.constraints
476 for con in cons:
477 pathA = self.model.make_path(con.path, self.get_subclass_dict())
478 if isinstance(con, constraints.TernaryConstraint):
479 if pathA.get_class() is None:
480 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
481 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
482 if not pathA.is_attribute():
483 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
484 elif isinstance(con, constraints.SubClassConstraint):
485 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
486 if not pathB.get_class().isa(pathA.get_class()):
487 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
488 elif isinstance(con, constraints.LoopConstraint):
489 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
490 for path in [pathA, pathB]:
491 if not path.get_class():
492 raise ConstraintError("'" + str(path) + "' does not refer to an object")
493 (classA, classB) = (pathA.get_class(), pathB.get_class())
494 if not classA.isa(classB) and not classB.isa(classA):
495 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
496 elif isinstance(con, constraints.ListConstraint):
497 if not pathA.get_class():
498 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
499
500 @property
502 """
503 Returns the constraints of the query
504 ====================================
505
506 Query.constraints S{->} list(intermine.constraints.Constraint)
507
508 Constraints are returned in the order of their code (normally
509 the order they were added to the query) and with any
510 subclass contraints at the end.
511
512 @rtype: list(Constraint)
513 """
514 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
515 ret.extend(self.uncoded_constraints)
516 return ret
517
519 """
520 Returns the constraint with the given code
521 ==========================================
522
523 Returns the constraint with the given code, if if exists.
524 If no such constraint exists, it throws a ConstraintError
525
526 @return: the constraint corresponding to the given code
527 @rtype: L{intermine.constraints.CodedConstraint}
528 """
529 if code in self.constraint_dict:
530 return self.constraint_dict[code]
531 else:
532 raise ConstraintError("There is no constraint with the code '"
533 + code + "' on this query")
534
536 """
537 Add a join statement to the query
538 =================================
539
540 example::
541
542 query.add_join("Gene.proteins", "OUTER")
543
544 A join statement is used to determine if references should
545 restrict the result set by only including those references
546 exist. For example, if one had a query with the view::
547
548 "Gene.name", "Gene.proteins.name"
549
550 Then in the normal case (that of an INNER join), we would only
551 get Genes that also have at least one protein that they reference.
552 Simply by asking for this output column you are placing a
553 restriction on the information you get back.
554
555 If in fact you wanted all genes, regardless of whether they had
556 proteins associated with them or not, but if they did
557 you would rather like to know _what_ proteins, then you need
558 to specify this reference to be an OUTER join::
559
560 query.add_join("Gene.proteins", "OUTER")
561
562 Now you will get many more rows of results, some of which will
563 have "null" values where the protein name would have been,
564
565 This method will also attempt to validate the join by calling
566 Query.verify_join_paths(). Joins must have a valid path, the
567 style can be either INNER or OUTER (defaults to OUTER,
568 as the user does not need to specify inner joins, since all
569 references start out as inner joins), and the path
570 must be a reference.
571
572 @raise ModelError: if the path is invalid
573 @raise TypeError: if the join style is invalid
574
575 @rtype: L{intermine.pathfeatures.Join}
576 """
577 join = Join(*args, **kwargs)
578 if self.do_verification: self.verify_join_paths([join])
579 self.joins.append(join)
580 return join
581
583 """
584 Check that the joins are valid
585 ==============================
586
587 Joins must have valid paths, and they must refer to references.
588
589 @raise ModelError: if the paths are invalid
590 @raise QueryError: if the paths are not references
591 """
592 if joins is None: joins = self.joins
593 for join in joins:
594 path = self.model.make_path(join.path, self.get_subclass_dict())
595 if not path.is_reference():
596 raise QueryError("'" + join.path + "' is not a reference")
597
599 """
600 Add a path description to the query
601 ===================================
602
603 example::
604
605 query.add_path_description("Gene.symbol", "The symbol for this gene")
606
607 If you wish you can add annotations to your query that describe
608 what the component paths are and what they do - this is only really
609 useful if you plan to keep your query (perhaps as xml) or store it
610 as a template.
611
612 @rtype: L{intermine.pathfeatures.PathDescription}
613
614 """
615 path_description = PathDescription(*args, **kwargs)
616 if self.do_verification: self.verify_pd_paths([path_description])
617 self.path_descriptions.append(path_description)
618 return path_description
619
621 """
622 Check that the path of the path description is valid
623 ====================================================
624
625 Checks for consistency with the data model
626
627 @raise ModelError: if the paths are invalid
628 """
629 if pds is None: pds = self.path_descriptions
630 for pd in pds:
631 self.model.validate_path(pd.path, self.get_subclass_dict())
632
633 @property
635 """
636 Returns the list of constraints that have a code
637 ================================================
638
639 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
640
641 This returns an up to date list of the constraints that can
642 be used in a logic expression. The only kind of constraint
643 that this excludes, at present, is SubClassConstraints
644
645 @rtype: list(L{intermine.constraints.CodedConstraint})
646 """
647 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
648
650 """
651 Returns the logic expression for the query
652 ==========================================
653
654 This returns the up to date logic expression. The default
655 value is the representation of all coded constraints and'ed together.
656
657 The LogicGroup object stringifies to a string that can be parsed to
658 obtain itself (eg: "A and (B or C or D)").
659
660 @rtype: L{intermine.constraints.LogicGroup}
661 """
662 if self._logic is None:
663 return reduce(lambda x, y: x+y, self.coded_constraints)
664 else:
665 return self._logic
666
668 """
669 Sets the Logic given the appropriate input
670 ==========================================
671
672 example::
673
674 Query.set_logic("A and (B or C)")
675
676 This sets the logic to the appropriate value. If the value is
677 already a LogicGroup, it is accepted, otherwise
678 the string is tokenised and parsed.
679
680 The logic is then validated with a call to validate_logic()
681
682 raise LogicParseError: if there is a syntax error in the logic
683 """
684 if isinstance(value, constraints.LogicGroup):
685 logic = value
686 else:
687 logic = self._logic_parser.parse(value)
688 if self.do_verification: self.validate_logic(logic)
689 self._logic = logic
690
692 """
693 Validates the query logic
694 =========================
695
696 Attempts to validate the logic by checking
697 that every coded_constraint is included
698 at least once
699
700 @raise QueryError: if not every coded constraint is represented
701 """
702 if logic is None: logic = self._logic
703 logic_codes = set(logic.get_codes())
704 for con in self.coded_constraints:
705 if con.code not in logic_codes:
706 raise QueryError("Constraint " + con.code + repr(con)
707 + " is not mentioned in the logic: " + str(logic))
708
710 """
711 Gets the sort order when none has been specified
712 ================================================
713
714 This method is called to determine the sort order if
715 none is specified
716
717 @raise QueryError: if the view is empty
718
719 @rtype: L{intermine.pathfeatures.SortOrderList}
720 """
721 try:
722 return SortOrderList((self.views[0], SortOrder.ASC))
723 except IndexError:
724 raise QueryError("Query view is empty")
725
727 """
728 Return a sort order for the query
729 =================================
730
731 This method returns the sort order if set, otherwise
732 it returns the default sort order
733
734 @raise QueryError: if the view is empty
735
736 @rtype: L{intermine.pathfeatures.SortOrderList}
737 """
738 if self._sort_order_list.is_empty():
739 return self.get_default_sort_order()
740 else:
741 return self._sort_order_list
742
744 """
745 Adds a sort order to the query
746 ==============================
747
748 example::
749
750 Query.add_sort_order("Gene.name", "DESC")
751
752 This method adds a sort order to the query.
753 A query can have multiple sort orders, which are
754 assessed in sequence.
755
756 If a query has two sort-orders, for example,
757 the first being "Gene.organism.name asc",
758 and the second being "Gene.name desc", you would have
759 the list of genes grouped by organism, with the
760 lists within those groupings in reverse alphabetical
761 order by gene name.
762
763 This method will try to validate the sort order
764 by calling validate_sort_order()
765 """
766 so = SortOrder(path, direction)
767 if self.do_verification: self.validate_sort_order(so)
768 self._sort_order_list.append(so)
769
771 """
772 Check the validity of the sort order
773 ====================================
774
775 Checks that the sort order paths are:
776 - valid paths
777 - in the view
778
779 @raise QueryError: if the sort order is not in the view
780 @raise ModelError: if the path is invalid
781
782 """
783 if not so_elems:
784 so_elems = self._sort_order_list
785
786 for so in so_elems:
787 self.model.validate_path(so.path, self.get_subclass_dict())
788 if so.path not in self.views:
789 raise QueryError("Sort order element is not in the view: " + so.path)
790
792 """
793 Return the current mapping of class to subclass
794 ===============================================
795
796 This method returns a mapping of classes used
797 by the model for assessing whether certain paths are valid. For
798 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
799 you can refer to the .presentCall attributes of fly atlas results.
800 MicroArrayResults do not have this attribute, and a path such as::
801
802 Gene.microArrayResult.presentCall
803
804 would be marked as invalid unless the dictionary is provided.
805
806 Users most likely will not need to ever call this method.
807
808 @rtype: dict(string, string)
809 """
810 subclass_dict = {}
811 for c in self.constraints:
812 if isinstance(c, constraints.SubClassConstraint):
813 subclass_dict[c.path] = c.subclass
814 return subclass_dict
815
817 """
818 Return an iterator over result rows
819 ===================================
820
821 Usage::
822
823 for row in query.results():
824 do_sth_with(row)
825
826 @param row: the format for the row. Defaults to "list". Valid options are
827 "dict", "list", "jsonrows", "jsonobject", "tsv", "csv".
828 @type row: string
829
830 @rtype: L{intermine.webservice.ResultIterator}
831
832 @raise WebserviceError: if the request is unsuccessful
833 """
834 path = self.get_results_path()
835 params = self.to_query_params()
836 view = self.views
837 return self.service.get_results(path, params, row, view)
838
840 """
841 Returns the path section pointing to the REST resource
842 ======================================================
843
844 Query.get_results_path() -> str
845
846 Internally, this just calls a constant property
847 in intermine.service.Service
848
849 @rtype: str
850 """
851 return self.service.QUERY_PATH
852
854 """
855 Get a list of result rows
856 =========================
857
858 This method is a shortcut so that you do not have to
859 do a list comprehension yourself on the iterator that
860 is normally returned. If you have a very large result
861 set (in the millions of rows) you will not want to
862 have the whole list in memory at once, but there may
863 be other circumstances when you might want to keep the whole
864 list in one place.
865
866 @param rowformat: the format for the row. Defaults to "list". Valid options are
867 "dict", "list", "jsonrows", "jsonobject", "tsv", "csv".
868 @type rowformat: string
869
870 @rtype: list
871
872 @raise WebserviceError: if the request is unsuccessful
873
874 """
875 return self.service.get_results_list(
876 self.get_results_path(),
877 self.to_query_params(),
878 rowformat,
879 self.views)
880
882 """
883 Returns the child objects of the query
884 ======================================
885
886 This method is used during the serialisation of queries
887 to xml. It is unlikely you will need access to this as a whole.
888 Consider using "path_descriptions", "joins", "constraints" instead
889
890 @see: Query.path_descriptions
891 @see: Query.joins
892 @see: Query.constraints
893
894 @return: the child element of this query
895 @rtype: list
896 """
897 return sum([self.path_descriptions, self.joins, self.constraints], [])
898
900 """
901 Returns the parameters to be passed to the webservice
902 =====================================================
903
904 The query is responsible for producing its own query
905 parameters. These consist simply of:
906 - query: the xml representation of the query
907
908 @rtype: dict
909
910 """
911 xml = self.to_xml()
912 params = {'query' : xml }
913 return params
914
916 """
917 Returns a DOM node representing the query
918 =========================================
919
920 This is an intermediate step in the creation of the
921 xml serialised version of the query. You probably
922 won't need to call this directly.
923
924 @rtype: xml.minidom.Node
925 """
926 impl = getDOMImplementation()
927 doc = impl.createDocument(None, "query", None)
928 query = doc.documentElement
929
930 query.setAttribute('name', self.name)
931 query.setAttribute('model', self.model.name)
932 query.setAttribute('view', ' '.join(self.views))
933 query.setAttribute('sortOrder', str(self.get_sort_order()))
934 query.setAttribute('longDescription', self.description)
935 if len(self.coded_constraints) > 1:
936 query.setAttribute('constraintLogic', str(self.get_logic()))
937
938 for c in self.children():
939 element = doc.createElement(c.child_type)
940 for name, value in c.to_dict().items():
941 if isinstance(value, (set, list)):
942 for v in value:
943 subelement = doc.createElement(name)
944 text = doc.createTextNode(v)
945 subelement.appendChild(text)
946 element.appendChild(subelement)
947 else:
948 element.setAttribute(name, value)
949 query.appendChild(element)
950 return query
951
953 """
954 Return an XML serialisation of the query
955 ========================================
956
957 This method serialises the current state of the query to an
958 xml string, suitable for storing, or sending over the
959 internet to the webservice.
960
961 @return: the serialised xml string
962 @rtype: string
963 """
964 n = self.to_Node()
965 return n.toxml()
980
982 """
983 Performs a deep clone
984 =====================
985
986 This method will produce a clone that is independent,
987 and can be altered without affecting the original,
988 but starts off with the exact same state as it.
989
990 The only shared elements should be the model
991 and the service, which are shared by all queries
992 that refer to the same webservice.
993
994 @return: same class as caller
995 """
996 newobj = self.__class__(self.model)
997 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict"]:
998 setattr(newobj, attr, deepcopy(getattr(self, attr)))
999
1000 for attr in ["name", "description", "service", "do_verification", "constraint_factory"]:
1001 setattr(newobj, attr, getattr(self, attr))
1002 return newobj
1003
1005 """
1006 A Class representing a predefined query
1007 =======================================
1008
1009 Templates are ways of saving queries
1010 and allowing others to run them
1011 simply. They are the main interface
1012 to querying in the webapp
1013
1014 SYNOPSIS
1015 --------
1016
1017 example::
1018
1019 service = Service("http://www.flymine.org/query/service")
1020 template = service.get_template("Gene_Pathways")
1021 for row in template.results(A={"value":"eve"}):
1022 process_row(row)
1023 ...
1024
1025 A template is a subclass of query that comes predefined. They
1026 are typically retrieved from the webservice and run by specifying
1027 the values for their existing constraints. They are a concise
1028 and powerful way of running queries in the webapp.
1029
1030 Being subclasses of query, everything is true of them that is true
1031 of a query. They are just less work, as you don't have to design each
1032 one. Also, you can store your own templates in the web-app, and then
1033 access them as a private webservice method, from anywhere, making them
1034 a kind of query in the cloud - for this you will need to authenticate
1035 by providing log in details to the service.
1036
1037 The most significant difference is how constraint values are specified
1038 for each set of results.
1039
1040 @see: L{Template.results}
1041
1042 """
1044 """
1045 Constructor
1046 ===========
1047
1048 Instantiation is identical that of queries. As with queries,
1049 these are best obtained from the intermine.webservice.Service
1050 factory methods.
1051
1052 @see: L{intermine.webservice.Service.get_template}
1053 """
1054 super(Template, self).__init__(*args, **kwargs)
1055 self.constraint_factory = constraints.TemplateConstraintFactory()
1056 @property
1058 """
1059 Return the list of constraints you can edit
1060 ===========================================
1061
1062 Template.editable_constraints -> list(intermine.constraints.Constraint)
1063
1064 Templates have a concept of editable constraints, which
1065 is a way of hiding complexity from users. An underlying query may have
1066 five constraints, but only expose the one that is actually
1067 interesting. This property returns this subset of constraints
1068 that have the editable flag set to true.
1069 """
1070 isEditable = lambda x: x.editable
1071 return filter(isEditable, self.constraints)
1072
1074 """
1075 Returns the query parameters needed for the webservice
1076 ======================================================
1077
1078 Template.to_query_params() -> dict(string, string)
1079
1080 Overrides the method of the same name in query to provide the
1081 parameters needed by the templates results service. These
1082 are slightly more complex:
1083 - name: The template's name
1084 - for each constraint: (where [i] is an integer incremented for each constraint)
1085 - constraint[i]: the path
1086 - op[i]: the operator
1087 - value[i]: the value
1088 - code[i]: the code
1089 - extra[i]: the extra value for ternary constraints (optional)
1090
1091 """
1092 p = {'name' : self.name}
1093 i = 1
1094 for c in self.editable_constraints:
1095 if not c.switched_on: next
1096 for k, v in c.to_dict().items():
1097 k = "extra" if k == "extraValue" else k
1098 k = "constraint" if k == "path" else k
1099 p[k + str(i)] = v
1100 i += 1
1101 return p
1102
1104 """
1105 Returns the path section pointing to the REST resource
1106 ======================================================
1107
1108 Template.get_results_path() S{->} str
1109
1110 Internally, this just calls a constant property
1111 in intermine.service.Service
1112
1113 This overrides the method of the same name in Query
1114
1115 @return: the path to the REST resource
1116 @rtype: string
1117 """
1118 return self.service.TEMPLATEQUERY_PATH
1119
1121 """
1122 Gets a template to run
1123 ======================
1124
1125 Template.get_adjusted_template(con_values) S{->} Template
1126
1127 When templates are run, they are first cloned, and their
1128 values are changed to those desired. This leaves the original
1129 template unchanged so it can be run again with different
1130 values. This method does the cloning and changing of constraint
1131 values
1132
1133 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1134
1135 @rtype: L{Template}
1136 """
1137 clone = self.clone()
1138 for code, options in con_values.items():
1139 con = clone.get_constraint(code)
1140 if not con.editable:
1141 raise ConstraintError("There is a constraint '" + code
1142 + "' on this query, but it is not editable")
1143 for key, value in options.items():
1144 setattr(con, key, value)
1145 return clone
1146
1147 - def results(self, row="list", **con_values):
1148 """
1149 Get an iterator over result rows
1150 ================================
1151
1152 This method returns the same values with the
1153 same options as the method of the same name in
1154 Query (see intermine.query.Query). The main difference in in the
1155 arguments.
1156
1157 The template result methods also accept a key-word pair
1158 set of arguments that are used to supply values
1159 to the editable constraints. eg::
1160
1161 template.results(
1162 A = {"value": "eve"},
1163 B = {"op": ">", "value": 5000}
1164 )
1165
1166 The keys should be codes for editable constraints (you can inspect these
1167 with Template.editable_constraints) and the values should be a dictionary
1168 of constraint properties to replace. You can replace the values for
1169 "op" (operator), "value", and "extra_value" and "values" in the case of
1170 ternary and multi constraints.
1171
1172 @rtype: L{intermine.webservice.ResultIterator}
1173 """
1174 clone = self.get_adjusted_template(con_values)
1175 return super(Template, clone).results(row)
1176
1178 """
1179 Get a list of result rows
1180 =========================
1181
1182 This method performs the same as the method of the
1183 same name in Query, and it shares the semantics of
1184 Template.results().
1185
1186 @see: L{intermine.query.Query.get_results_list}
1187 @see: L{intermine.query.Template.results}
1188
1189 @rtype: list
1190
1191 """
1192 clone = self.get_adjusted_template(con_values)
1193 return super(Template, clone).get_results_list(row)
1194
1197
1200
1203