1 import re
2 import string
3 from intermine.pathfeatures import PathFeature, PATH_PATTERN
4 from intermine.util import ReadableException
7 """
8 A class representing constraints on a query
9 ===========================================
10
11 All constraints inherit from this class, which
12 simply defines the type of element for the
13 purposes of serialisation.
14 """
15 child_type = "constraint"
16
18 """
19 A class representing nodes in a logic graph
20 ===========================================
21
22 Objects which can be represented as nodes
23 in the AST of a constraint logic graph should
24 inherit from this class, which defines
25 methods for overloading built-in operations.
26 """
27
29 """
30 Overloads +
31 ===========
32
33 Logic may be defined by using addition to sum
34 logic nodes::
35
36 > query.set_logic(con_a + con_b + con_c)
37 > str(query.logic)
38 ... A and B and C
39
40 """
41 if not isinstance(other, LogicNode):
42 return NotImplemented
43 else:
44 return LogicGroup(self, 'AND', other)
45
47 """
48 Overloads &
49 ===========
50
51 Logic may be defined by using the & operator::
52
53 > query.set_logic(con_a & con_b)
54 > sr(query.logic)
55 ... A and B
56
57 """
58 if not isinstance(other, LogicNode):
59 return NotImplemented
60 else:
61 return LogicGroup(self, 'AND', other)
62
64 """
65 Overloads |
66 ===========
67
68 Logic may be defined by using the | operator::
69
70 > query.set_logic(con_a | con_b)
71 > str(query.logic)
72 ... A or B
73
74 """
75 if not isinstance(other, LogicNode):
76 return NotImplemented
77 else:
78 return LogicGroup(self, 'OR', other)
79
81 """
82 A logic node that represents two sub-nodes joined in some way
83 =============================================================
84
85 A logic group is a logic node with two child nodes, which are
86 either connected by AND or by OR logic.
87 """
88
89 LEGAL_OPS = frozenset(['AND', 'OR'])
90
91 - def __init__(self, left, op, right, parent=None):
92 """
93 Constructor
94 ===========
95
96 Makes a new node composes of two nodes (left and right),
97 and some operator.
98
99 Groups may have a reference to their parent.
100 """
101 if not op in self.LEGAL_OPS:
102 raise TypeError(op + " is not a legal logical operation")
103 self.parent = parent
104 self.left = left
105 self.right = right
106 self.op = op
107 for node in [self.left, self.right]:
108 if isinstance(node, LogicGroup):
109 node.parent = self
110
112 """
113 Provide a sensible representation of a node
114 """
115 return '<' + self.__class__.__name__ + ': ' + str(self) + '>'
116
118 """
119 Provide a human readable version of the group. The
120 string version should be able to be parsed back into the
121 original logic group.
122 """
123 core = ' '.join(map(str, [self.left, self.op.lower(), self.right]))
124 if self.parent and self.op != self.parent.op:
125 return '(' + core + ')'
126 else:
127 return core
129 """
130 Get a list of all constraint codes used in this group.
131 """
132 codes = []
133 for node in [self.left, self.right]:
134 if isinstance(node, LogicGroup):
135 codes.extend(node.get_codes())
136 else:
137 codes.append(node.code)
138 return codes
139
141 """
142 An error representing problems in parsing constraint logic.
143 """
144 pass
145
147 """
148 Parses logic strings into logic groups
149 ======================================
150
151 Instances of this class are used to parse logic strings into
152 abstract syntax trees, and then logic groups. This aims to provide
153 robust parsing of logic strings, with the ability to identify syntax
154 errors in such strings.
155 """
156
158 """
159 Constructor
160 ===========
161
162 Parsers need access to the query they are parsing for, in
163 order to reference the constraints on the query.
164
165 @param query: The parent query object
166 @type query: intermine.query.Query
167 """
168 self._query = query
169
171 """
172 Get the constraint with the given code
173 ======================================
174
175 This method fetches the constraint from the
176 parent query with the matching code.
177
178 @see: intermine.query.Query.get_constraint
179 @rtype: intermine.constraints.CodedConstraint
180 """
181 return self._query.get_constraint(code)
182
184 """
185 Get the priority for a given operator
186 =====================================
187
188 Operators have a specific precedence, from highest
189 to lowest:
190 - ()
191 - AND
192 - OR
193
194 This method returns an integer which can be
195 used to compare operator priorities.
196
197 @rtype: int
198 """
199 return {
200 "AND": 2,
201 "OR" : 1,
202 "(" : 3,
203 ")" : 3
204 }.get(op)
205
206 ops = {
207 "AND" : "AND",
208 "&" : "AND",
209 "&&" : "AND",
210 "OR" : "OR",
211 "|" : "OR",
212 "||" : "OR",
213 "(" : "(",
214 ")" : ")"
215 }
216
217 - def parse(self, logic_str):
218 """
219 Parse a logic string into an abstract syntax tree
220 =================================================
221
222 Takes a string such as "A and B or C and D", and parses it
223 into a structure which represents this logic as a binary
224 abstract syntax tree. The above string would parse to
225 "(A and B) or (C and D)", as AND binds more tightly than OR.
226
227 Note that only singly rooted trees are parsed.
228
229 @param logic_str: The logic defininition as a string
230 @type logic_str: string
231
232 @rtype: LogicGroup
233
234 @raise LogicParseError: if there is a syntax error in the logic
235 """
236 def flatten(l):
237 """Flatten out a list which contains both values and sublists"""
238 ret = []
239 for item in l:
240 if isinstance(item, list):
241 ret.extend(item)
242 else:
243 ret.append(item)
244 return ret
245 def canonical(x, d):
246 if x in d:
247 return d[x]
248 else:
249 return re.split("\b", x)
250 def dedouble(x):
251 if re.search("[()]", x):
252 return list(x)
253 else:
254 return x
255
256 logic_str = logic_str.upper()
257 tokens = re.split("\s+", logic_str)
258 tokens = flatten([canonical(x, self.ops) for x in tokens])
259 tokens = flatten([dedouble(x) for x in tokens])
260 self.check_syntax(tokens)
261 postfix_tokens = self.infix_to_postfix(tokens)
262 abstract_syntax_tree = self.postfix_to_tree(postfix_tokens)
263 return abstract_syntax_tree
264
266 """
267 Check the syntax for errors before parsing
268 ==========================================
269
270 Syntax is checked before parsing to provide better errors,
271 which should hopefully lead to more informative error messages.
272
273 This checks for:
274 - correct operator positions (cannot put two codes next to each other without intervening operators)
275 - correct grouping (all brackets are matched, and contain valid expressions)
276
277 @param infix_tokens: The input parsed into a list of tokens.
278 @type infix_tokens: iterable
279
280 @raise LogicParseError: if there is a problem.
281 """
282 need_an_op = False
283 need_binary_op_or_closing_bracket = False
284 processed = []
285 open_brackets = 0
286 for token in infix_tokens:
287 if token not in self.ops:
288 if need_an_op:
289 raise LogicParseError("Expected an operator after: '" + ' '.join(processed) + "'"
290 + " - but got: '" + token + "'")
291 if need_binary_op_or_closing_bracket:
292 raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'"
293 + " - expected an operator or a closing bracket")
294
295 need_an_op = True
296 else:
297 need_an_op = False
298 if token == "(":
299 if processed and processed[-1] not in self.ops:
300 raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'"
301 + " - got an unexpeced opening bracket")
302 if need_binary_op_or_closing_bracket:
303 raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'"
304 + " - expected an operator or a closing bracket")
305
306 open_brackets += 1
307 elif token == ")":
308 need_binary_op_or_closing_bracket = True
309 open_brackets -= 1
310 else:
311 need_binary_op_or_closing_bracket = False
312 processed.append(token)
313 if open_brackets != 0:
314 if open_brackets < 0:
315 message = "Unmatched closing bracket in: "
316 else:
317 message = "Unmatched opening bracket in: "
318 raise LogicParseError(message + '"' + ' '.join(infix_tokens) + '"')
319
320 - def infix_to_postfix(self, infix_tokens):
321 """
322 Convert a list of infix tokens to postfix notation
323 ==================================================
324
325 Take in a set of infix tokens and return the set parsed
326 to a postfix sequence.
327
328 @param infix_tokens: The list of tokens
329 @type infix_tokens: iterable
330
331 @rtype: list
332 """
333 stack = []
334 postfix_tokens = []
335 for token in infix_tokens:
336 if token not in self.ops:
337 postfix_tokens.append(token)
338 else:
339 op = token
340 if op == "(":
341 stack.append(token)
342 elif op == ")":
343 while stack:
344 last_op = stack.pop()
345 if last_op == "(":
346 if stack:
347 previous_op = stack.pop()
348 if previous_op != "(": postfix_tokens.append(previous_op)
349 break
350 else:
351 postfix_tokens.append(last_op)
352 else:
353 while stack and self.get_priority(stack[-1]) <= self.get_priority(op):
354 prev_op = stack.pop()
355 if prev_op != "(": postfix_tokens.append(prev_op)
356 stack.append(op)
357 while stack: postfix_tokens.append(stack.pop())
358 return postfix_tokens
359
360 - def postfix_to_tree(self, postfix_tokens):
361 """
362 Convert a set of structured tokens to a single LogicGroup
363 =========================================================
364
365 Convert a set of tokens in postfix notation to a single
366 LogicGroup object.
367
368 @param postfix_tokens: A list of tokens in postfix notation.
369 @type postfix_tokens: list
370
371 @rtype: LogicGroup
372
373 @raise AssertionError: is the tree doesn't have a unique root.
374 """
375 stack = []
376 for token in postfix_tokens:
377 if token not in self.ops:
378 stack.append(token)
379 else:
380 op = token
381 right = stack.pop()
382 left = stack.pop()
383 if not isinstance(right, LogicGroup): right = self.get_constraint(right)
384 if not isinstance(left, LogicGroup): left = self.get_constraint(left)
385 stack.append(LogicGroup(left, op, right))
386 assert len(stack) == 1, "Tree doesn't have a unique root"
387 return stack.pop()
388
390 """
391 A parent class for all constraints that have codes
392 ==================================================
393
394 Constraints that have codes are the principal logical
395 filters on queries, and need to be refered to individually
396 (hence the codes). They will all have a logical operation they
397 embody, and so have a reference to an operator.
398
399 This class is not meant to be instantiated directly, but instead
400 inherited from to supply default behaviour.
401 """
402
403 OPS = set([])
404
405 - def __init__(self, path, op, code="A"):
406 """
407 Constructor
408 ===========
409
410 @param path: The path to constrain
411 @type path: string
412
413 @param op: The operation to apply - must be in the OPS set
414 @type op: string
415 """
416 if op not in self.OPS:
417 raise TypeError(op + " not in " + str(self.OPS))
418 self.op = op
419 self.code = code
420 super(CodedConstraint, self).__init__(path)
421
423 """
424 Stringify to the code they are refered to by.
425 """
426 return self.code
428 """
429 Provide a human readable representation of the logic.
430 This method is called by repr.
431 """
432 s = super(CodedConstraint, self).to_string()
433 return " ".join([s, self.op])
434
436 """
437 Return a dict object which can be used to construct a
438 DOM element with the appropriate attributes.
439 """
440 d = super(CodedConstraint, self).to_dict()
441 d.update(op=self.op, code=self.code)
442 return d
443
445 """
446 Constraints which have just a path and an operator
447 ==================================================
448
449 These constraints are simple assertions about the
450 object/value refered to by the path. The set of valid
451 operators is:
452 - IS NULL
453 - IS NOT NULL
454
455 """
456 OPS = set(['IS NULL', 'IS NOT NULL'])
457
459 """
460 Constraints which have an operator and a value
461 ==============================================
462
463 These constraints assert a relationship between the
464 value represented by the path (it must be a representation
465 of a value, ie an Attribute) and another value - ie. the
466 operator takes two parameters.
467
468 In all case the 'left' side of the relationship is the path,
469 and the 'right' side is the supplied value.
470
471 Valid operators are:
472 - = (equal to)
473 - != (not equal to)
474 - < (less than)
475 - > (greater than)
476 - <= (less than or equal to)
477 - >= (greater than or equal to)
478 - LIKE (same as equal to, but with implied wildcards)
479 - CONTAINS (same as equal to, but with implied wildcards)
480 - NOT LIKE (same as not equal to, but with implied wildcards)
481
482 """
483 OPS = set(['=', '!=', '<', '>', '<=', '>=', 'LIKE', 'NOT LIKE', 'CONTAINS'])
484 - def __init__(self, path, op, value, code="A"):
485 """
486 Constructor
487 ===========
488
489 @param path: The path to constrain
490 @type path: string
491
492 @param op: The relationship between the value represented by the path and the value provided (must be a valid operator)
493 @type op: string
494
495 @param value: The value to compare the stored value to
496 @type value: string or number
497
498 @param code: The code for this constraint (default = "A")
499 @type code: string
500 """
501 self.value = value
502 super(BinaryConstraint, self).__init__(path, op, code)
503
505 """
506 Provide a human readable representation of the logic.
507 This method is called by repr.
508 """
509 s = super(BinaryConstraint, self).to_string()
510 return " ".join([s, str(self.value)])
512 """
513 Return a dict object which can be used to construct a
514 DOM element with the appropriate attributes.
515 """
516 d = super(BinaryConstraint, self).to_dict()
517 d.update(value=str(self.value))
518 return d
519
521 """
522 Constraints which refer to an objects membership of lists
523 =========================================================
524
525 These constraints assert a membership relationship between the
526 object represented by the path (it must always be an object, ie.
527 a Reference or a Class) and a List. Lists are collections of
528 objects in the database which are stored in InterMine
529 datawarehouses. These lists must be set up before the query is run, either
530 manually in the webapp or by using the webservice API list
531 upload feature.
532
533 Valid operators are:
534 - IN
535 - NOT IN
536
537 """
538 OPS = set(['IN', 'NOT IN'])
539 - def __init__(self, path, op, list_name, code="A"):
542
544 """
545 Provide a human readable representation of the logic.
546 This method is called by repr.
547 """
548 s = super(ListConstraint, self).to_string()
549 return " ".join([s, str(self.list_name)])
551 """
552 Return a dict object which can be used to construct a
553 DOM element with the appropriate attributes.
554 """
555 d = super(ListConstraint, self).to_dict()
556 d.update(value=str(self.list_name))
557 return d
558
560 """
561 Constraints with refer to object identity
562 =========================================
563
564 These constraints assert that two paths refer to the same
565 object.
566
567 Valid operators:
568 - IS
569 - IS NOT
570
571 The operators IS and IS NOT map to the ops "=" and "!=" when they
572 are used in XML serialisation.
573
574 """
575 OPS = set(['IS', 'IS NOT'])
576 SERIALISED_OPS = {'IS':'=', 'IS NOT':'!='}
577 - def __init__(self, path, op, loopPath, code="A"):
578 """
579 Constructor
580 ===========
581
582 @param path: The path to constrain
583 @type path: string
584
585 @param op: The relationship between the path and the path provided (must be a valid operator)
586 @type op: string
587
588 @param loopPath: The path to check for identity against
589 @type loopPath: string
590
591 @param code: The code for this constraint (default = "A")
592 @type code: string
593 """
594 self.loopPath = loopPath
595 super(LoopConstraint, self).__init__(path, op, code)
596
598 """
599 Provide a human readable representation of the logic.
600 This method is called by repr.
601 """
602 s = super(LoopConstraint, self).to_string()
603 return " ".join([s, self.loopPath])
605 """
606 Return a dict object which can be used to construct a
607 DOM element with the appropriate attributes.
608 """
609 d = super(LoopConstraint, self).to_dict()
610 d.update(loopPath=self.loopPath, op=self.SERIALISED_OPS[self.op])
611 return d
612
614 """
615 Constraints for broad, general searching over all fields
616 ========================================================
617
618 These constraints request a wide-ranging search for matching
619 fields over all aspects of an object, including up to coercion
620 from related classes.
621
622 Valid operators:
623 - LOOKUP
624
625 To aid disambiguation, Ternary constaints accept an extra_value as
626 well as the main value.
627 """
628 OPS = set(['LOOKUP'])
629 - def __init__(self, path, op, value, extra_value=None, code="A"):
630 """
631 Constructor
632 ===========
633
634 @param path: The path to constrain. Here is must be a class, or a reference to a class.
635 @type path: string
636
637 @param op: The relationship between the path and the path provided (must be a valid operator)
638 @type op: string
639
640 @param value: The value to check other fields against.
641 @type value: string
642
643 @param extra_value: A further value for disambiguation. The meaning of this value varies by class
644 and configuration. For example, if the class of the object is Gene, then
645 extra_value will refer to the Organism.
646 @type extra_value: string
647
648 @param code: The code for this constraint (default = "A")
649 @type code: string
650 """
651 self.extra_value = extra_value
652 super(TernaryConstraint, self).__init__(path, op, value, code)
653
655 """
656 Provide a human readable representation of the logic.
657 This method is called by repr.
658 """
659 s = super(TernaryConstraint, self).to_string()
660 if self.extra_value is None:
661 return s
662 else:
663 return " ".join([s, 'IN', self.extra_value])
665 """
666 Return a dict object which can be used to construct a
667 DOM element with the appropriate attributes.
668 """
669 d = super(TernaryConstraint, self).to_dict()
670 if self.extra_value is not None:
671 d.update(extraValue=self.extra_value)
672 return d
673
675 """
676 Constraints for checking membership of a set of values
677 ======================================================
678
679 These constraints require the value they constrain to be
680 either a member of a set of values, or not a member.
681
682 Valid operators:
683 - ONE OF
684 - NONE OF
685
686 These constraints are similar in use to List constraints, with
687 the following differences:
688 - The list in this case is a defined set of values that is passed
689 along with the query itself, rather than anything stored
690 independently on a server.
691 - The object of the constaint is the value of an attribute, rather
692 than an object's identity.
693 """
694 OPS = set(['ONE OF', 'NONE OF'])
695 - def __init__(self, path, op, values, code="A"):
696 """
697 Constructor
698 ===========
699
700 @param path: The path to constrain. Here it must be an attribute of some object.
701 @type path: string
702
703 @param op: The relationship between the path and the path provided (must be a valid operator)
704 @type op: string
705
706 @param values: The set of values which the object of the constraint either must or must not belong to.
707 @type values: set or list
708
709 @param code: The code for this constraint (default = "A")
710 @type code: string
711 """
712 if not isinstance(values, (set, list)):
713 raise TypeError("values must be a set or a list, not " + str(type(values)))
714 self.values = values
715 super(MultiConstraint, self).__init__(path, op, code)
716
718 """
719 Provide a human readable representation of the logic.
720 This method is called by repr.
721 """
722 s = super(MultiConstraint, self).to_string()
723 return ' '.join([s, str(self.values)])
725 """
726 Return a dict object which can be used to construct a
727 DOM element with the appropriate attributes.
728 """
729 d = super(MultiConstraint, self).to_dict()
730 d.update(value=self.values)
731 return d
732
734 """
735 Constraints on the class of a reference
736 =======================================
737
738 If an object has a reference X to another object of type A,
739 and type B extends type A, then any object of type B may be
740 the value of the reference X. If you only want to see X's
741 which are B's, this may be achieved with subclass constraints,
742 which allow the type of an object to be limited to one of the
743 subclasses (at any depth) of the class type required
744 by the attribute.
745
746 These constraints do not use operators. Since they cannot be
747 conditional (eg. "A is a B or A is a C" would not be possible
748 in an InterMine query), they do not have codes
749 and cannot be referenced in logic expressions.
750 """
752 """
753 Constructor
754 ===========
755
756 @param path: The path to constrain. This must refer to a class or a reference to a class.
757 @type path: str
758
759 @param subclass: The class to subclass the path to. This must be a simple class name (not a dotted name)
760 @type subclass: str
761 """
762 if not PATH_PATTERN.match(subclass):
763 raise TypeError
764 self.subclass = subclass
765 super(SubClassConstraint, self).__init__(path)
767 """
768 Provide a human readable representation of the logic.
769 This method is called by repr.
770 """
771 s = super(SubClassConstraint, self).to_string()
772 return s + ' ISA ' + self.subclass
774 """
775 Return a dict object which can be used to construct a
776 DOM element with the appropriate attributes.
777 """
778 d = super(SubClassConstraint, self).to_dict()
779 d.update(type=self.subclass)
780 return d
781
784 """
785 A mixin to supply the behaviour and state of constraints on templates
786 =====================================================================
787
788 Constraints on templates can also be designated as "on", "off" or "locked", which refers
789 to whether they are active or not. Inactive constraints are still configured, but behave
790 as if absent for the purpose of results. In addition, template constraints can be
791 editable or not. Only values for editable constraints can be provided when requesting results,
792 and only constraints that can participate in logic expressions can be editable.
793 """
794 REQUIRED = "locked"
795 OPTIONAL_ON = "on"
796 OPTIONAL_OFF = "off"
797 - def __init__(self, editable=True, optional="locked"):
798 """
799 Constructor
800 ===========
801
802 @param editable: Whether or not this constraint should accept new values.
803 @type editable: bool
804
805 @param optional: Whether a value for this constraint must be provided when running.
806 @type optional: "locked", "on" or "off"
807 """
808 self.editable = editable
809 if optional == TemplateConstraint.REQUIRED:
810 self.optional = False
811 self.switched_on = True
812 else:
813 self.optional = True
814 if optional == TemplateConstraint.OPTIONAL_ON:
815 self.switched_on = True
816 elif optional == TemplateConstraint.OPTIONAL_OFF:
817 self.switched_on = False
818 else:
819 raise TypeError("Bad value for optional")
820
821 @property
823 """
824 True if a value must be provided for this constraint.
825
826 @rtype: bool
827 """
828 return not self.optional
829
830 @property
832 """
833 True if this constraint is currently inactive.
834
835 @rtype: bool
836 """
837 return not self.switched_on
838
840 """
841 Returns either "locked", "on" or "off".
842 """
843 if not self.optional:
844 return "locked"
845 else:
846 if self.switched_on:
847 return "on"
848 else:
849 return "off"
850
852 """
853 Make sure this constraint is active
854 ===================================
855
856 @raise ValueError: if the constraint is not editable and optional
857 """
858 if self.editable and self.optional:
859 self.switched_on = True
860 else:
861 raise ValueError, "This constraint is not switchable"
862
864 """
865 Make sure this constraint is inactive
866 =====================================
867
868 @raise ValueError: if the constraint is not editable and optional
869 """
870 if self.editable and self.optional:
871 self.switched_on = False
872 else:
873 raise ValueError, "This constraint is not switchable"
874
876 """
877 Provide a template specific human readable representation of the
878 constraint. This method is called by repr.
879 """
880 if self.editable:
881 editable = "editable"
882 else:
883 editable = "non-editable"
884 return '(' + editable + ", " + self.get_switchable_status() + ')'
886 """
887 A static function to use when building template constraints.
888 ============================================================
889
890 dict -> (dict, dict)
891
892 Splits a dictionary of arguments into two separate dictionaries, one with
893 arguments for the main constraint, and one with arguments for the template
894 portion of the behaviour
895 """
896 c_args = {}
897 t_args = {}
898 for k, v in args.items():
899 if k == "editable":
900 t_args[k] = v == "true"
901 elif k == "optional":
902 t_args[k] = v
903 else:
904 c_args[k] = v
905 return (c_args, t_args)
906
919
932
945
958
971
984
997
999 """
1000 A factory for creating constraints from a set of arguments.
1001 ===========================================================
1002
1003 A constraint factory is responsible for finding an appropriate
1004 constraint class for the given arguments and instantiating the
1005 constraint.
1006 """
1007 CONSTRAINT_CLASSES = set([
1008 UnaryConstraint, BinaryConstraint, TernaryConstraint,
1009 MultiConstraint, SubClassConstraint, LoopConstraint,
1010 ListConstraint])
1011
1013 """
1014 Constructor
1015 ===========
1016
1017 Creates a new ConstraintFactory
1018 """
1019 self._codes = iter(string.ascii_uppercase)
1020
1022 """
1023 Return the available constraint code.
1024
1025 @return: A single uppercase character
1026 @rtype: str
1027 """
1028 return self._codes.next()
1029
1031 """
1032 Create a constraint from a set of arguments.
1033 ============================================
1034
1035 Finds a suitable constraint class, and instantiates it.
1036
1037 @rtype: Constraint
1038 """
1039 for CC in self.CONSTRAINT_CLASSES:
1040 try:
1041 c = CC(*args, **kwargs)
1042 if hasattr(c, "code"): c.code = self.get_next_code()
1043 return c
1044 except TypeError, e:
1045 pass
1046 raise TypeError("No matching constraint class found for "
1047 + str(args) + ", " + str(kwargs))
1048
1065