1 import re
2 import string
3 from .pathfeatures import PathFeature, PATH_PATTERN
4 from .util import ReadableException
7 """
8 A class representing constraints on a query
9 ===========================================
10
11 All constraints inherit from this class, which
12 simply defines the type of element for the
13 purposes of serialisation.
14 """
15 child_type = "constraint"
16
18 """
19 A class representing nodes in a logic graph
20 ===========================================
21
22 Objects which can be represented as nodes
23 in the AST of a constraint logic graph should
24 inherit from this class, which defines
25 methods for overloading built-in operations.
26 """
27
29 """
30 Overloads +
31 ===========
32
33 Logic may be defined by using addition to sum
34 logic nodes::
35
36 > query.set_logic(con_a + con_b + con_c)
37 > str(query.logic)
38 ... A and B and C
39
40 """
41 if not isinstance(other, LogicNode):
42 return NotImplemented
43 else:
44 return LogicGroup(self, 'AND', other)
45
47 """
48 Overloads &
49 ===========
50
51 Logic may be defined by using the & operator::
52
53 > query.set_logic(con_a & con_b)
54 > sr(query.logic)
55 ... A and B
56
57 """
58 if not isinstance(other, LogicNode):
59 return NotImplemented
60 else:
61 return LogicGroup(self, 'AND', other)
62
64 """
65 Overloads |
66 ===========
67
68 Logic may be defined by using the | operator::
69
70 > query.set_logic(con_a | con_b)
71 > str(query.logic)
72 ... A or B
73
74 """
75 if not isinstance(other, LogicNode):
76 return NotImplemented
77 else:
78 return LogicGroup(self, 'OR', other)
79
81 """
82 A logic node that represents two sub-nodes joined in some way
83 =============================================================
84
85 A logic group is a logic node with two child nodes, which are
86 either connected by AND or by OR logic.
87 """
88
89 LEGAL_OPS = frozenset(['AND', 'OR'])
90
91 - def __init__(self, left, op, right, parent=None):
92 """
93 Constructor
94 ===========
95
96 Makes a new node composes of two nodes (left and right),
97 and some operator.
98
99 Groups may have a reference to their parent.
100 """
101 if not op in self.LEGAL_OPS:
102 raise TypeError(op + " is not a legal logical operation")
103 self.parent = parent
104 self.left = left
105 self.right = right
106 self.op = op
107 for node in [self.left, self.right]:
108 if isinstance(node, LogicGroup):
109 node.parent = self
110
112 """
113 Provide a sensible representation of a node
114 """
115 return '<' + self.__class__.__name__ + ': ' + str(self) + '>'
116
118 """
119 Provide a human readable version of the group. The
120 string version should be able to be parsed back into the
121 original logic group.
122 """
123 core = ' '.join(map(str, [self.left, self.op.lower(), self.right]))
124 return '(' + core + ')' if self.parent and self.op != self.parent.op else core
126 """
127 Get a list of all constraint codes used in this group.
128 """
129 codes = []
130 for node in [self.left, self.right]:
131 if isinstance(node, LogicGroup):
132 codes.extend(node.get_codes())
133 else:
134 codes.append(node.code)
135 return codes
136
138 """
139 An error representing problems in parsing constraint logic.
140 """
141 pass
142
144 """
145 Parses logic strings into logic groups
146 ======================================
147
148 Instances of this class are used to parse logic strings into
149 abstract syntax trees, and then logic groups. This aims to provide
150 robust parsing of logic strings, with the ability to identify syntax
151 errors in such strings.
152 """
153
155 """
156 Constructor
157 ===========
158
159 Parsers need access to the query they are parsing for, in
160 order to reference the constraints on the query.
161
162 @param query: The parent query object
163 @type query: intermine.query.Query
164 """
165 self._query = query
166
168 """
169 Get the constraint with the given code
170 ======================================
171
172 This method fetches the constraint from the
173 parent query with the matching code.
174
175 @see: intermine.query.Query.get_constraint
176 @rtype: intermine.constraints.CodedConstraint
177 """
178 return self._query.get_constraint(code)
179
181 """
182 Get the priority for a given operator
183 =====================================
184
185 Operators have a specific precedence, from highest
186 to lowest:
187 - ()
188 - AND
189 - OR
190
191 This method returns an integer which can be
192 used to compare operator priorities.
193
194 @rtype: int
195 """
196 return {
197 "AND": 2,
198 "OR" : 1,
199 "(" : 3,
200 ")" : 3
201 }.get(op)
202
203 ops = {
204 "AND" : "AND",
205 "&" : "AND",
206 "&&" : "AND",
207 "OR" : "OR",
208 "|" : "OR",
209 "||" : "OR",
210 "(" : "(",
211 ")" : ")"
212 }
213
214 - def parse(self, logic_str):
215 """
216 Parse a logic string into an abstract syntax tree
217 =================================================
218
219 Takes a string such as "A and B or C and D", and parses it
220 into a structure which represents this logic as a binary
221 abstract syntax tree. The above string would parse to
222 "(A and B) or (C and D)", as AND binds more tightly than OR.
223
224 Note that only singly rooted trees are parsed.
225
226 @param logic_str: The logic defininition as a string
227 @type logic_str: string
228
229 @rtype: LogicGroup
230
231 @raise LogicParseError: if there is a syntax error in the logic
232 """
233 def flatten(l):
234 """Flatten out a list which contains both values and sublists"""
235 ret = []
236 for item in l:
237 if isinstance(item, list):
238 ret.extend(item)
239 else:
240 ret.append(item)
241 return ret
242
243 logic_str = logic_str.upper()
244 tokens = re.split("\s+", logic_str)
245 tokens = flatten([self.ops[x] if x in self.ops else re.split("\b", x) for x in tokens])
246 tokens = flatten([list(x) if re.search("[()]", x) else x for x in tokens])
247 self.check_syntax(tokens)
248 postfix_tokens = self.infix_to_postfix(tokens)
249 abstract_syntax_tree = self.postfix_to_tree(postfix_tokens)
250 return abstract_syntax_tree
251
253 """
254 Check the syntax for errors before parsing
255 ==========================================
256
257 Syntax is checked before parsing to provide better errors,
258 which should hopefully lead to more informative error messages.
259
260 This checks for:
261 - correct operator positions (cannot put two codes next to each other without intervening operators)
262 - correct grouping (all brackets are matched, and contain valid expressions)
263
264 @param infix_tokens: The input parsed into a list of tokens.
265 @type infix_tokens: iterable
266
267 @raise LogicParseError: if there is a problem.
268 """
269 need_an_op = False
270 need_binary_op_or_closing_bracket = False
271 processed = []
272 open_brackets = 0
273 for token in infix_tokens:
274 if token not in self.ops:
275 if need_an_op:
276 raise LogicParseError("Expected an operator after: '" + ' '.join(processed) + "'"
277 + " - but got: '" + token + "'")
278 if need_binary_op_or_closing_bracket:
279 raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'"
280 + " - expected an operator or a closing bracket")
281
282 need_an_op = True
283 else:
284 need_an_op = False
285 if token == "(":
286 if processed and processed[-1] not in self.ops:
287 raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'"
288 + " - got an unexpeced opening bracket")
289 if need_binary_op_or_closing_bracket:
290 raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'"
291 + " - expected an operator or a closing bracket")
292
293 open_brackets += 1
294 elif token == ")":
295 need_binary_op_or_closing_bracket = True
296 open_brackets -= 1
297 else:
298 need_binary_op_or_closing_bracket = False
299 processed.append(token)
300 if open_brackets != 0:
301 if open_brackets < 0:
302 message = "Unmatched closing bracket in: "
303 else:
304 message = "Unmatched opening bracket in: "
305 raise LogicParseError(message + '"' + ' '.join(infix_tokens) + '"')
306
307 - def infix_to_postfix(self, infix_tokens):
308 """
309 Convert a list of infix tokens to postfix notation
310 ==================================================
311
312 Take in a set of infix tokens and return the set parsed
313 to a postfix sequence.
314
315 @param infix_tokens: The list of tokens
316 @type infix_tokens: iterable
317
318 @rtype: list
319 """
320 stack = []
321 postfix_tokens = []
322 for token in infix_tokens:
323 if token not in self.ops:
324 postfix_tokens.append(token)
325 else:
326 op = token
327 if op == "(":
328 stack.append(token)
329 elif op == ")":
330 while stack:
331 last_op = stack.pop()
332 if last_op == "(":
333 if stack:
334 previous_op = stack.pop()
335 if previous_op != "(": postfix_tokens.append(previous_op)
336 break
337 else:
338 postfix_tokens.append(last_op)
339 else:
340 while stack and self.get_priority(stack[-1]) <= self.get_priority(op):
341 prev_op = stack.pop()
342 if prev_op != "(": postfix_tokens.append(prev_op)
343 stack.append(op)
344 while stack: postfix_tokens.append(stack.pop())
345 return postfix_tokens
346
347 - def postfix_to_tree(self, postfix_tokens):
348 """
349 Convert a set of structured tokens to a single LogicGroup
350 =========================================================
351
352 Convert a set of tokens in postfix notation to a single
353 LogicGroup object.
354
355 @param postfix_tokens: A list of tokens in postfix notation.
356 @type postfix_tokens: list
357
358 @rtype: LogicGroup
359
360 @raise AssertionError: is the tree doesn't have a unique root.
361 """
362 stack = []
363 for token in postfix_tokens:
364 if token not in self.ops:
365 stack.append(token)
366 else:
367 op = token
368 right = stack.pop()
369 left = stack.pop()
370 right = right if isinstance(right, LogicGroup) else self.get_constraint(right)
371 left = left if isinstance(left, LogicGroup) else self.get_constraint(left)
372 stack.append(LogicGroup(left, op, right))
373 assert len(stack) == 1, "Tree doesn't have a unique root"
374 return stack.pop()
375
377 """
378 A parent class for all constraints that have codes
379 ==================================================
380
381 Constraints that have codes are the principal logical
382 filters on queries, and need to be refered to individually
383 (hence the codes). They will all have a logical operation they
384 embody, and so have a reference to an operator.
385
386 This class is not meant to be instantiated directly, but instead
387 inherited from to supply default behaviour.
388 """
389
390 OPS = set([])
391
392 - def __init__(self, path, op, code="A"):
393 """
394 Constructor
395 ===========
396
397 @param path: The path to constrain
398 @type path: string
399
400 @param op: The operation to apply - must be in the OPS set
401 @type op: string
402 """
403 if op not in self.OPS:
404 raise TypeError(op + " not in " + str(self.OPS))
405 self.op = op
406 self.code = code
407 super(CodedConstraint, self).__init__(path)
408
410 """
411 Stringify to the code they are refered to by.
412 """
413 return self.code
415 """
416 Provide a human readable representation of the logic.
417 This method is called by repr.
418 """
419 s = super(CodedConstraint, self).to_string()
420 return " ".join([s, self.op])
421
423 """
424 Return a dict object which can be used to construct a
425 DOM element with the appropriate attributes.
426 """
427 d = super(CodedConstraint, self).to_dict()
428 d.update(op=self.op, code=self.code)
429 return d
430
432 """
433 Constraints which have just a path and an operator
434 ==================================================
435
436 These constraints are simple assertions about the
437 object/value refered to by the path. The set of valid
438 operators is:
439 - IS NULL
440 - IS NOT NULL
441
442 """
443 OPS = set(['IS NULL', 'IS NOT NULL'])
444
446 """
447 Constraints which have an operator and a value
448 ==============================================
449
450 These constraints assert a relationship between the
451 value represented by the path (it must be a representation
452 of a value, ie an Attribute) and another value - ie. the
453 operator takes two parameters.
454
455 In all case the 'left' side of the relationship is the path,
456 and the 'right' side is the supplied value.
457
458 Valid operators are:
459 - = (equal to)
460 - != (not equal to)
461 - < (less than)
462 - > (greater than)
463 - <= (less than or equal to)
464 - >= (greater than or equal to)
465 - LIKE (same as equal to, but with implied wildcards)
466 - CONTAINS (same as equal to, but with implied wildcards)
467 - NOT LIKE (same as not equal to, but with implied wildcards)
468
469 """
470 OPS = set(['=', '!=', '<', '>', '<=', '>=', 'LIKE', 'NOT LIKE', 'CONTAINS'])
471 - def __init__(self, path, op, value, code="A"):
472 """
473 Constructor
474 ===========
475
476 @param path: The path to constrain
477 @type path: string
478
479 @param op: The relationship between the value represented by the path and the value provided (must be a valid operator)
480 @type op: string
481
482 @param value: The value to compare the stored value to
483 @type value: string or number
484
485 @param code: The code for this constraint (default = "A")
486 @type code: string
487 """
488 self.value = value
489 super(BinaryConstraint, self).__init__(path, op, code)
490
492 """
493 Provide a human readable representation of the logic.
494 This method is called by repr.
495 """
496 s = super(BinaryConstraint, self).to_string()
497 return " ".join([s, str(self.value)])
499 """
500 Return a dict object which can be used to construct a
501 DOM element with the appropriate attributes.
502 """
503 d = super(BinaryConstraint, self).to_dict()
504 d.update(value=str(self.value))
505 return d
506
508 """
509 Constraints which refer to an objects membership of lists
510 =========================================================
511
512 These constraints assert a membership relationship between the
513 object represented by the path (it must always be an object, ie.
514 a Reference or a Class) and a List. Lists are collections of
515 objects in the database which are stored in InterMine
516 datawarehouses. These lists must be set up before the query is run, either
517 manually in the webapp or by using the webservice API list
518 upload feature.
519
520 Valid operators are:
521 - IN
522 - NOT IN
523
524 """
525 OPS = set(['IN', 'NOT IN'])
526 - def __init__(self, path, op, list_name, code="A"):
529
531 """
532 Provide a human readable representation of the logic.
533 This method is called by repr.
534 """
535 s = super(ListConstraint, self).to_string()
536 return " ".join([s, str(self.list_name)])
538 """
539 Return a dict object which can be used to construct a
540 DOM element with the appropriate attributes.
541 """
542 d = super(ListConstraint, self).to_dict()
543 d.update(value=str(self.list_name))
544 return d
545
547 """
548 Constraints with refer to object identity
549 =========================================
550
551 These constraints assert that two paths refer to the same
552 object.
553
554 Valid operators:
555 - IS
556 - IS NOT
557
558 The operators IS and IS NOT map to the ops "=" and "!=" when they
559 are used in XML serialisation.
560
561 """
562 OPS = set(['IS', 'IS NOT'])
563 SERIALISED_OPS = {'IS':'=', 'IS NOT':'!='}
564 - def __init__(self, path, op, loopPath, code="A"):
565 """
566 Constructor
567 ===========
568
569 @param path: The path to constrain
570 @type path: string
571
572 @param op: The relationship between the path and the path provided (must be a valid operator)
573 @type op: string
574
575 @param loopPath: The path to check for identity against
576 @type value: string
577
578 @param code: The code for this constraint (default = "A")
579 @type code: string
580 """
581 self.loopPath = loopPath
582 super(LoopConstraint, self).__init__(path, op, code)
583
585 """
586 Provide a human readable representation of the logic.
587 This method is called by repr.
588 """
589 s = super(LoopConstraint, self).to_string()
590 return " ".join([s, self.loopPath])
592 """
593 Return a dict object which can be used to construct a
594 DOM element with the appropriate attributes.
595 """
596 d = super(LoopConstraint, self).to_dict()
597 d.update(loopPath=self.loopPath, op=self.SERIALISED_OPS[self.op])
598 return d
599
601 """
602 Constraints for broad, general searching over all fields
603 ========================================================
604
605 These constraints request a wide-ranging search for matching
606 fields over all aspects of an object, including up to coercion
607 from related classes.
608
609 Valid operators:
610 - LOOKUP
611
612 To aid disambiguation, Ternary constaints accept an extra_value as
613 well as the main value.
614 """
615 OPS = set(['LOOKUP'])
616 - def __init__(self, path, op, value, extra_value=None, code="A"):
617 """
618 Constructor
619 ===========
620
621 @param path: The path to constrain. Here is must be a class, or a reference to a class.
622 @type path: string
623
624 @param op: The relationship between the path and the path provided (must be a valid operator)
625 @type op: string
626
627 @param value: The value to check other fields against.
628 @type value: string
629
630 @param extra_value: A further value for disambiguation. The meaning of this value varies by class
631 and configuration. For example, if the class of the object is Gene, then
632 extra_value will refer to the Organism.
633 @type value: string
634
635 @param code: The code for this constraint (default = "A")
636 @type code: string
637 """
638 self.extra_value = extra_value
639 super(TernaryConstraint, self).__init__(path, op, value, code)
640
642 """
643 Provide a human readable representation of the logic.
644 This method is called by repr.
645 """
646 s = super(TernaryConstraint, self).to_string()
647 if self.extra_value is None:
648 return s
649 else:
650 return " ".join([s, 'IN', self.extra_value])
652 """
653 Return a dict object which can be used to construct a
654 DOM element with the appropriate attributes.
655 """
656 d = super(TernaryConstraint, self).to_dict()
657 if self.extra_value is not None:
658 d.update(extraValue=self.extra_value)
659 return d
660
662 """
663 Constraints for checking membership of a set of values
664 ======================================================
665
666 These constraints require the value they constrain to be
667 either a member of a set of values, or not a member.
668
669 Valid operators:
670 - ONE OF
671 - NONE OF
672
673 These constraints are similar in use to List constraints, with
674 the following differences:
675 - The list in this case is a defined set of values that is passed
676 along with the query itself, rather than anything stored
677 independently on a server.
678 - The object of the constaint is the value of an attribute, rather
679 than an object's identity.
680 """
681 OPS = set(['ONE OF', 'NONE OF'])
682 - def __init__(self, path, op, values, code="A"):
683 """
684 Constructor
685 ===========
686
687 @param path: The path to constrain. Here it must be an attribute of some object.
688 @type path: string
689
690 @param op: The relationship between the path and the path provided (must be a valid operator)
691 @type op: string
692
693 @param values: The set of values which the object of the constraint either must or must not belong to.
694 @type value: string
695
696 @param code: The code for this constraint (default = "A")
697 @type code: string
698 """
699 if not isinstance(values, (set, list)):
700 raise TypeError("values must be a set or a list, not " + str(type(values)))
701 self.values = values
702 super(MultiConstraint, self).__init__(path, op, code)
703
705 """
706 Provide a human readable representation of the logic.
707 This method is called by repr.
708 """
709 s = super(MultiConstraint, self).to_string()
710 return ' '.join([s, str(self.values)])
712 """
713 Return a dict object which can be used to construct a
714 DOM element with the appropriate attributes.
715 """
716 d = super(MultiConstraint, self).to_dict()
717 d.update(value=self.values)
718 return d
719
721 """
722 Constraints on the class of a reference
723 =======================================
724
725 If an object has a reference X to another object of type A,
726 and type B extends type A, then any object of type B may be
727 the value of the reference X. If you only want to see X's
728 which are B's, this may be achieved with subclass constraints,
729 which allow the type of an object to be limited to one of the
730 subclasses (at any depth) of the class type required
731 by the attribute.
732
733 These constraints do not use operators. Since they cannot be
734 conditional (eg. "A is a B or A is a C" would not be possible
735 in an InterMine query), they do not have codes
736 and cannot be referenced in logic expressions.
737 """
739 """
740 Constructor
741 ===========
742
743 @param path: The path to constrain. This must refer to a class or a reference to a class.
744 @type path: str
745
746 @param subclass: The class to subclass the path to. This must be a simple class name (not a dotted name)
747 @type subclass: str
748 """
749 if not PATH_PATTERN.match(subclass):
750 raise TypeError
751 self.subclass = subclass
752 super(SubClassConstraint, self).__init__(path)
754 """
755 Provide a human readable representation of the logic.
756 This method is called by repr.
757 """
758 s = super(SubClassConstraint, self).to_string()
759 return s + ' ISA ' + self.subclass
761 """
762 Return a dict object which can be used to construct a
763 DOM element with the appropriate attributes.
764 """
765 d = super(SubClassConstraint, self).to_dict()
766 d.update(type=self.subclass)
767 return d
768
771 """
772 A mixin to supply the behaviour and state of constraints on templates
773 =====================================================================
774
775 Constraints on templates can also be designated as "on", "off" or "locked", which refers
776 to whether they are active or not. Inactive constraints are still configured, but behave
777 as if absent for the purpose of results. In addition, template constraints can be
778 editable or not. Only values for editable constraints can be provided when requesting results,
779 and only constraints that can participate in logic expressions can be editable.
780 """
781 REQUIRED = "locked"
782 OPTIONAL_ON = "on"
783 OPTIONAL_OFF = "off"
784 - def __init__(self, editable=True, optional="locked"):
785 """
786 Constructor
787 ===========
788
789 @param editable: Whether or not this constraint should accept new values.
790 @type editable: bool
791
792 @param optional: Whether a value for this constraint must be provided when running.
793 @type optional: "locked", "on" or "off"
794 """
795 self.editable = editable
796 if optional == TemplateConstraint.REQUIRED:
797 self.optional = False
798 self.switched_on = True
799 else:
800 self.optional = True
801 if optional == TemplateConstraint.OPTIONAL_ON:
802 self.switched_on = True
803 elif optional == TemplateConstraint.OPTIONAL_OFF:
804 self.switched_on = False
805 else:
806 raise TypeError("Bad value for optional")
807
808 @property
810 """
811 True if a value must be provided for this constraint.
812
813 @rtype: bool
814 """
815 return not self.optional
816
817 @property
819 """
820 True if this constraint is currently inactive.
821
822 @rtype: bool
823 """
824 return not self.switched_on
825
827 """
828 Returns either "locked", "on" or "off".
829 """
830 if not self.optional:
831 return "locked"
832 else:
833 switch = "on" if self.switched_on else "off"
834 return switch
835
837 """
838 Provide a template specific human readable representation of the
839 constraint. This method is called by repr.
840 """
841 editable = "editable" if self.editable else "non-editable"
842 return '(' + editable + ", " + self.get_switchable_status() + ')'
844 """
845 A static function to use when building template constraints.
846 ------------------------------------------------------------
847
848 dict -> (dict, dict)
849
850 Splits a dictionary of arguments into two separate dictionaries, one with
851 arguments for the main constraint, and one with arguments for the template
852 portion of the behaviour
853 """
854 c_args = {}
855 t_args = {}
856 for k, v in args.items():
857 if k == "editable":
858 t_args[k] = v == "true"
859 elif k == "optional":
860 t_args[k] = v
861 else:
862 c_args[k] = v
863 return (c_args, t_args)
864
877
890
903
916
929
942
955
957 """
958 A factory for creating constraints from a set of arguments.
959 ===========================================================
960
961 A constraint factory is responsible for finding an appropriate
962 constraint class for the given arguments and instantiating the
963 constraint.
964 """
965 CONSTRAINT_CLASSES = set([
966 UnaryConstraint, BinaryConstraint, TernaryConstraint,
967 MultiConstraint, SubClassConstraint, LoopConstraint,
968 ListConstraint])
969
971 """
972 Constructor
973 -----------
974
975 Creates a new ConstraintFactory
976 """
977 self._codes = iter(string.ascii_uppercase)
978
980 """
981 Return the available constraint code.
982
983 @return: A single uppercase character
984 @rtype: str
985 """
986 return self._codes.next()
987
989 """
990 Create a constraint from a set of arguments.
991 --------------------------------------------
992
993 Finds a suitable constraint class, and instantiates it.
994
995 @rtype: Constraint
996 """
997 for CC in self.CONSTRAINT_CLASSES:
998 try:
999 c = CC(*args, **kwargs)
1000 if hasattr(c, "code"): c.code = self.get_next_code()
1001 return c
1002 except TypeError, e:
1003 pass
1004 raise TypeError("No matching constraint class found for "
1005 + str(args) + ", " + str(kwargs))
1006
1023