1 from xml.dom import minidom
2 import weakref
3 import re
4
5 from intermine.util import openAnything, ReadableException
6
7 """
8 Classes representing the data model
9 ===================================
10
11 Representations of tables and columns, and behaviour
12 for validating connections between them.
13
14 """
15
16 __author__ = "Alex Kalderimis"
17 __organization__ = "InterMine"
18 __license__ = "LGPL"
19 __contact__ = "dev@intermine.org"
22 """
23 A class representing columns on database tables
24 ===============================================
25
26 The base class for attributes, references and collections. All
27 columns in DB tables are represented by fields
28
29 SYNOPSIS
30 --------
31
32 >>> service = Service("http://www.flymine.org/query/service")
33 >>> model = service.model
34 >>> cd = model.get_class("Gene")
35 >>> print "Gene has", len(cd.fields), "fields"
36 >>> for field in gene_cd.fields:
37 ... print " - ", field
38 Gene has 45 fields
39 - CDSs is a group of CDS objects, which link back to this as gene
40 - GLEANRsymbol is a String
41 - UTRs is a group of UTR objects, which link back to this as gene
42 - alleles is a group of Allele objects, which link back to this as gene
43 - chromosome is a Chromosome
44 - chromosomeLocation is a Location
45 - clones is a group of CDNAClone objects, which link back to this as gene
46 - crossReferences is a group of CrossReference objects, which link back to this as subject
47 - cytoLocation is a String
48 - dataSets is a group of DataSet objects, which link back to this as bioEntities
49 - downstreamIntergenicRegion is a IntergenicRegion
50 - exons is a group of Exon objects, which link back to this as gene
51 - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene
52 - goAnnotation is a group of GOAnnotation objects
53 - homologues is a group of Homologue objects, which link back to this as gene
54 - id is a Integer
55 - interactions is a group of Interaction objects, which link back to this as gene
56 - length is a Integer
57 ...
58
59 @see: L{Attribute}
60 @see: L{Reference}
61 @see: L{Collection}
62 """
63 - def __init__(self, name, type_name, class_origin):
64 """
65 Constructor - DO NOT USE
66 ========================
67
68 THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY
69
70 you are unlikely to need to do
71 so anyway: it is recommended you access fields
72 through the classes generated by the model
73
74 @param name: The name of the reference
75 @param type_name: The name of the model.Class this refers to
76 @param class_origin: The model.Class this was declared in
77
78 """
79 self.name = name
80 self.type_name = type_name
81 self.type_class = None
82 self.declared_in = class_origin
84 return self.name + " is a " + self.type_name
87
89 """
90 Attributes represent columns that contain actual data
91 =====================================================
92
93 The Attribute class inherits all the behaviour of L{intermine.model.Field}
94 """
95 pass
96
98 """
99 References represent columns that refer to records in other tables
100 ==================================================================
101
102 In addition the the behaviour and properties of Field, references
103 may also have a reverse reference, if the other record points
104 back to this one as well. And all references will have their
105 type upgraded to a type_class during parsing
106 """
107 - def __init__(self, name, type_name, class_origin, reverse_ref=None):
108 """
109 Constructor
110 ===========
111
112 In addition to the a parameters of Field, Reference also
113 takes an optional reverse reference name (str)
114
115 @param name: The name of the reference
116 @param type_name: The name of the model.Class this refers to
117 @param class_origin: The model.Class this was declared in
118 @param reverse_ref: The name of the reverse reference (default: None)
119
120 """
121 self.reverse_reference_name = reverse_ref
122 super(Reference, self).__init__(name, type_name, class_origin)
123 self.reverse_reference = None
125 """
126 Return a string representation
127 ==============================
128
129 @rtype: str
130 """
131 s = super(Reference, self).__repr__()
132 if self.reverse_reference is None:
133 return s
134 else:
135 return s + ", which links back to this as " + self.reverse_reference.name
136
138 """
139 Collections are references which refer to groups of objects
140 ===========================================================
141
142 Collections have all the same behaviour and properties as References
143 """
145 """Return a string representation"""
146 ret = super(Collection, self).__repr__().replace(" is a ", " is a group of ")
147 if self.reverse_reference is None:
148 return ret + " objects"
149 else:
150 return ret.replace(", which links", " objects, which link")
151
154 """
155 An abstraction of database tables in the data model
156 ===================================================
157
158 These objects refer to the table objects in the
159 InterMine ORM layer.
160
161 SYNOPSIS
162 --------
163
164 >>> service = Service("http://www.flymine.org/query/service")
165 >>> model = service.model
166 >>>
167 >>> if "Gene" in model.classes:
168 ... gene_cd = model.get_class("Gene")
169 ... print "Gene has", len(gene_cd.fields), "fields"
170 ... for field in gene_cd.fields:
171 ... print " - ", field.name
172
173 OVERVIEW
174 --------
175
176 Each class can have attributes (columns) of various types,
177 and can have references to other classes (tables), on either
178 a one-to-one (references) or one-to-many (collections) basis
179
180 Classes should not be instantiated by hand, but rather used
181 as part of the model they belong to.
182
183 """
184
185
186 - def __init__(self, name, parents, model):
187 """
188 Constructor - Creates a new Class descriptor
189 ============================================
190
191 >>> cd = intermine.model.Class("Gene", ["SequenceFeature"])
192 <intermine.model.Class: Gene>
193
194 This constructor is called when deserialising the
195 model - you should have no need to create Classes by hand
196
197 @param name: The name of this class
198 @param parents: a list of parental names
199
200 """
201 self.name = name
202 self.parents = parents
203 self.model = model
204 self.parent_classes = []
205 self.field_dict = {}
206 self.has_id = "Object" not in parents
207 if self.has_id:
208
209 id_field = Attribute("id", "Integer", self)
210 self.field_dict["id"] = id_field
211
213 return "<%s.%s %s.%s>" % (self.__module__, self.__class__.__name__,
214 self.model.package_name if hasattr(self.model, 'package_name') else "__test__", self.name)
215
216 @property
218 """
219 The fields of this class
220 ========================
221
222 The fields are returned sorted by name. Fields
223 includes all Attributes, References and Collections
224
225 @rtype: list(L{Field})
226 """
227 return sorted(self.field_dict.values(), key=lambda field: field.name)
228
230 for f in self.field_dict.values():
231 yield f
232
234 if isinstance(item, Field):
235 return item in self.field_dict.values()
236 else:
237 return str(item) in self.field_dict
238
239 @property
241 """
242 The fields of this class which contain data
243 ===========================================
244
245 @rtype: list(L{Attribute})
246 """
247 return filter(lambda x: isinstance(x, Attribute), self.fields)
248
249 @property
251 """
252 fields which reference other objects
253 ====================================
254
255 @rtype: list(L{Reference})
256 """
257 def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection)
258 return filter(isRef, self.fields)
259
260 @property
262 """
263 fields which reference many other objects
264 =========================================
265
266 @rtype: list(L{Collection})
267 """
268 return filter(lambda x: isinstance(x, Collection), self.fields)
269
271 """
272 Get a field by name
273 ===================
274
275 The standard way of retrieving a field
276
277 @raise ModelError: if the Class does not have such a field
278
279 @rtype: subclass of L{intermine.model.Field}
280 """
281 if name in self.field_dict:
282 return self.field_dict[name]
283 else:
284 raise ModelError("There is no field called %s in %s" % (name, self.name))
285
286 - def isa(self, other):
287 """
288 Check if self is, or inherits from other
289 ========================================
290
291 This method validates statements about inheritance.
292 Returns true if the "other" is, or is within the
293 ancestry of, this class
294
295 Other can be passed as a name (str), or as the class object itself
296
297 @rtype: boolean
298 """
299 if isinstance(other, Class):
300 other_name = other.name
301 else:
302 other_name = other
303 if self.name == other_name:
304 return True
305 if other_name in self.parents:
306 return True
307 for p in self.parent_classes:
308 if p.isa(other):
309 return True
310 return False
311
312
313 -class Path(object):
314 """
315 A class representing a validated dotted string path
316 ===================================================
317
318 A path represents a connection between records and fields
319
320 SYNOPSIS
321 --------
322
323 >>> service = Service("http://www.flymine.org/query/service")
324 model = service.model
325 path = model.make_path("Gene.organism.name")
326 path.is_attribute()
327 ... True
328 >>> path2 = model.make_path("Gene.proteins")
329 path2.is_attribute()
330 ... False
331 >>> path2.is_reference()
332 ... True
333 >>> path2.get_class()
334 ... <intermine.model.Class: gene>
335
336 OVERVIEW
337 --------
338
339 This class is used for performing validation on dotted path strings.
340 The simple act of parsing it into existence will validate the path
341 to some extent, but there are additional methods for verifying certain
342 relationships as well
343 """
344 - def __init__(self, path, model, subclasses={}):
345 """
346 Constructor
347 ===========
348
349 >>> path = Path("Gene.name", model)
350
351 You will not need to use this constructor directly. Instead,
352 use the "make_path" method on the model to construct paths for you.
353
354 @param path: the dotted path string (eg: Gene.proteins.name)
355 @type path: str
356 @param model: the model to validate the path against
357 @type model: L{Model}
358 @param subclasses: a dict which maps subclasses (defaults to an empty dict)
359 @type subclasses: dict
360 """
361 self.model = weakref.proxy(model)
362 self.subclasses = subclasses
363 if isinstance(path, Class):
364 self._string = path.name
365 self.parts = [path]
366 else:
367 self._string = str(path)
368 self.parts = model.parse_path_string(str(path), subclasses)
369
372
374 return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>'
375
377 """
378 The path one step above this path.
379 ==================================
380
381 >>> p1 = Path("Gene.exons.name", model)
382 >>> p2 = p1.prefix()
383 >>> print p2
384 ... Gene.exons
385
386 """
387 parts = list(self.parts)
388 parts.pop()
389 if len(parts) < 1:
390 raise PathParseError(str(self) + " does not have a prefix")
391 s = ".".join(map(lambda x: x.name, parts))
392 return Path(s, self.model._unproxied(), self.subclasses)
393
395 """
396 Construct a new path by adding elements to the end of this one.
397 ===============================================================
398
399 >>> p1 = Path("Gene.exons", model)
400 >>> p2 = p1.append("name")
401 >>> print p2
402 ... Gene.exons.name
403
404 This is the inverse of prefix.
405 """
406 s = str(self) + "." + ".".join(elements)
407 return Path(s, self.model._unproxied(), self.subclasses)
408
409 @property
411 """
412 The descriptor for the first part of the string. This should always a class descriptor.
413
414 @rtype: L{intermine.model.Class}
415 """
416 return self.parts[0]
417
418 @property
420 """
421 The descriptor for the last part of the string.
422
423 @rtype: L{model.Class} or L{model.Field}
424 """
425 return self.parts[-1]
426
428 """
429 Return the class object for this path, if it refers to a class
430 or a reference. Attribute paths return None
431
432 @rtype: L{model.Class}
433 """
434 if self.is_class():
435 return self.end
436 elif self.is_reference():
437 if str(self) in self.subclasses:
438 return self.model.get_class(self.subclasses[str(self)])
439 return self.end.type_class
440 else:
441 return None
442
443 end_class = property(get_class)
444
446 """
447 Return true if the path is a reference, eg: Gene.organism or Gene.proteins
448 Note: Collections are ALSO references
449
450 @rtype: boolean
451 """
452 return isinstance(self.end, Reference)
453
455 """
456 Return true if the path just refers to a class, eg: Gene
457
458 @rtype: boolean
459 """
460 return isinstance(self.end, Class)
461
463 """
464 Return true if the path refers to an attribute, eg: Gene.length
465
466 @rtype: boolean
467 """
468 return isinstance(self.end, Attribute)
469
471 return str(self) == str(other)
472
474 i = hash(str(self))
475 return reduce(lambda a, b: a ^ b, [hash(k) ^ hash(v) for k, v in self.subclasses.items()], i)
476
478
480 self.op = op
481 self.left = left
482 self.right = right
483
486
489
491 for n in [self.left, self.right]:
492 for subn in n:
493 yield subn
494
495 - def as_logic(self, codes = None, start = 'A'):
496 if codes is None:
497 codes = (chr(c) for c in range(ord(start), ord('Z')))
498 return "(%s %s %s)" % (self.left.as_logic(codes), self.op, self.right.as_logic(codes))
499
501
503 self.vargs = args
504 self.kwargs = kwargs
505
508
509 - def as_logic(self, codes = None, start = 'A'):
510 if codes is None:
511 codes = (chr(c) for c in range(ord(start), ord('Z')))
512 return codes.next()
513
515
516 - def as_logic(self, code = None, start = 'A'):
518
520 """
521 A representation of a path in a query that can be constrained
522 =============================================================
523
524 Column objects allow constraints to be constructed in something
525 close to a declarative style
526 """
527
528 - def __init__(self, path, model, subclasses={}, query=None, parent = None):
529 self._model = model
530 self._query = query
531 self._subclasses = subclasses
532 self._parent = parent
533 self.filter = self.where
534 if isinstance(path, Path):
535 self._path = path
536 else:
537 self._path = model.make_path(path, subclasses)
538 self._branches = {}
539
541 """
542 Create a new query with this column as the base class, selecting the given fields.
543 """
544 q = self._model.service.new_query(str(self))
545 q.select(*cols)
546 return q
547
548 - def where(self, *args, **kwargs):
549 """
550 Create a new query based on this column, filtered with the given constraint.
551
552 also available as "filter"
553 """
554 q = self._model.service.new_query(str(self))
555 return q.where(*args, **kwargs)
556
558 """
559 Return a query for all objects of this class in the given webservice
560 """
561 q = self.select("*")
562 return iter(q)
563
565 if name in self._branches:
566 return self._branches[name]
567 cld = self._path.get_class()
568 if cld is not None:
569 try:
570 fld = cld.get_field(name)
571 branch = Column(str(self) + "." + name, self._model, self._subclasses, self._query, self)
572 self._branches[name] = branch
573 return branch
574 except ModelError, e:
575 raise AttributeError(str(e))
576 raise AttributeError("No attribute '" + name + "'")
577
579 return str(self._path)
580
582 if isinstance(other, tuple):
583 return ConstraintNode(str(self), 'LOOKUP', *other)
584 else:
585 return ConstraintNode(str(self), 'LOOKUP', str(other))
586
589
590 __lshift__ = __rshift__
591
593 if other is None:
594 return ConstraintNode(str(self), "IS NULL")
595 elif isinstance(other, Column):
596 return ConstraintNode(str(self), "IS", str(other))
597 elif hasattr(other, "make_list_constraint"):
598 return other.make_list_constraint(str(self), "IN")
599 elif isinstance(other, list):
600 return ConstraintNode(str(self), "ONE OF", other)
601 else:
602 return ConstraintNode(str(self), "=", other)
603
605 if other is None:
606 return ConstraintNode(str(self), "IS NOT NULL")
607 elif isinstance(other, Column):
608 return ConstraintNode(str(self), "IS NOT", str(other))
609 elif hasattr(other, "make_list_constraint"):
610 return other.make_list_constraint(str(self), "NOT IN")
611 elif isinstance(other, list):
612 return ConstraintNode(str(self), "NONE OF", other)
613 else:
614 return ConstraintNode(str(self), "!=", other)
615
617 if hasattr(other, "make_list_constraint"):
618 return other.make_list_constraint(str(self), "NOT IN")
619 elif isinstance(other, list):
620 return ConstraintNode(str(self), "NONE OF", other)
621 raise TypeError("Invalid argument for xor: %r" % other)
622
623 - def in_(self, other):
624 if hasattr(other, "make_list_constraint"):
625 return other.make_list_constraint(str(self), "IN")
626 elif isinstance(other, list):
627 return ConstraintNode(str(self), "ONE OF", other)
628 raise TypeError("Invalid argument for in_: %r" % other)
629
631 if isinstance(other, Column):
632 self._parent._subclasses[str(self)] = str(other)
633 self._parent._branches = {}
634 return CodelessNode(str(self), str(other))
635 try:
636 return self.in_(other)
637 except TypeError:
638 return ConstraintNode(str(self), "<", other)
639
641 if isinstance(other, Column):
642 return CodelessNode(str(self), str(other))
643 try:
644 return self.in_(other)
645 except TypeError:
646 return ConstraintNode(str(self), "<=", other)
647
650
653
655 """
656 A class for representing the data model of an InterMine datawarehouse
657 =====================================================================
658
659 An abstraction of the database schema
660
661 SYNOPSIS
662 --------
663
664 >>> service = Service("http://www.flymine.org/query/service")
665 >>> model = service.model
666 >>> model.get_class("Gene")
667 <intermine.model.Class: Gene>
668
669 OVERVIEW
670 --------
671
672 This class represents the data model - ie. an abstraction
673 of the database schema. It can be used to introspect what
674 data is available and how it is inter-related
675 """
676
677 NUMERIC_TYPES = frozenset(["int", "Integer", "float", "Float", "double", "Double", "long", "Long", "short", "Short"])
678
679 - def __init__(self, source, service=None):
680 """
681 Constructor
682 ===========
683
684 >>> model = Model(xml)
685
686 You will most like not need to create a model directly,
687 instead get one from the Service object:
688
689 @see: L{intermine.webservice.Service}
690
691 @param source: the model.xml, as a local file, string, or url
692 """
693 assert source is not None
694 self.source = source
695 if service is not None:
696 self.service = weakref.proxy(service)
697 else:
698 self.service = service
699 self.classes= {}
700 self.parse_model(source)
701 self.vivify()
702
703
704 self.table = self.column
705
707 """
708 Create classes, attributes, references and collections from the model.xml
709 =========================================================================
710
711 The xml can be provided as a file, url or string. This method
712 is called during instantiation - it does not need to be called
713 directly.
714
715 @param source: the model.xml, as a local file, string, or url
716 @raise ModelParseError: if there is a problem parsing the source
717 """
718 try:
719 io = openAnything(source)
720 doc = minidom.parse(io)
721 for node in doc.getElementsByTagName('model'):
722 self.name = node.getAttribute('name')
723 self.package_name = node.getAttribute('package')
724 assert node.nextSibling is None, "More than one model element"
725 assert self.name and self.package_name, "No model name or package name"
726
727 for c in doc.getElementsByTagName('class'):
728 class_name = c.getAttribute('name')
729 assert class_name, "Name not defined in" + c.toxml()
730 def strip_java_prefix(x):
731 return re.sub(r'.*\.', '', x)
732 parents = map(strip_java_prefix,
733 c.getAttribute('extends').split(' '))
734 cl = Class(class_name, parents, self)
735 for a in c.getElementsByTagName('attribute'):
736 name = a.getAttribute('name')
737 type_name = strip_java_prefix(a.getAttribute('type'))
738 at = Attribute(name, type_name, cl)
739 cl.field_dict[name] = at
740 for r in c.getElementsByTagName('reference'):
741 name = r.getAttribute('name')
742 type_name = r.getAttribute('referenced-type')
743 linked_field_name = r.getAttribute('reverse-reference')
744 ref = Reference(name, type_name, cl, linked_field_name)
745 cl.field_dict[name] = ref
746 for co in c.getElementsByTagName('collection'):
747 name = co.getAttribute('name')
748 type_name = co.getAttribute('referenced-type')
749 linked_field_name = co.getAttribute('reverse-reference')
750 col = Collection(name, type_name, cl, linked_field_name)
751 cl.field_dict[name] = col
752 self.classes[class_name] = cl
753 except Exception, error:
754 raise ModelParseError("Error parsing model", source, error)
755
757 """
758 Make names point to instances and insert inherited fields
759 =========================================================
760
761 This method ensures the model is internally consistent. This method
762 is called during instantiaton. It does not need to be called
763 directly.
764
765 @raise ModelError: if the names point to non-existent objects
766 """
767 for c in self.classes.values():
768 c.parent_classes = self.to_ancestry(c)
769 for pc in c.parent_classes:
770 c.field_dict.update(pc.field_dict)
771 for f in c.fields:
772 f.type_class = self.classes.get(f.type_name)
773 if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '':
774 rrn = f.reverse_reference_name
775 f.reverse_reference = f.type_class.field_dict[rrn]
776
778 """
779 Returns the lineage of the class
780 ================================
781
782 >>> classes = Model.to_ancestry(cd)
783
784 Returns the class' parents, and all the class' parents' parents
785
786 @rtype: list(L{intermine.model.Class})
787 """
788 parents = cd.parents
789 def defined(x): return x is not None
790 def to_class(x): return self.classes.get(x)
791 ancestry = filter(defined, map(to_class, parents))
792 for ancestor in ancestry:
793 ancestry.extend(self.to_ancestry(ancestor))
794 return ancestry
795
797 """
798 take a list of class names and return a list of classes
799 =======================================================
800
801 >>> classes = model.to_classes(["Gene", "Protein", "Organism"])
802
803 This simply maps from a list of strings to a list of
804 classes in the calling model.
805
806 @raise ModelError: if the list of class names includes ones that don't exist
807
808 @rtype: list(L{intermine.model.Class})
809 """
810 return map(self.get_class, classnames)
811
812 - def column(self, path, *rest):
813 return Column(path, self, *rest)
814
817
819 """
820 Get a class by its name, or by a dotted path
821 ============================================
822
823 >>> model = Model("http://www.flymine.org/query/service/model")
824 >>> model.get_class("Gene")
825 <intermine.model.Class: Gene>
826 >>> model.get_class("Gene.proteins")
827 <intermine.model.Class: Protein>
828
829 This is the recommended way of retrieving a class from
830 the model. As well as handling class names, you can also
831 pass in a path such as "Gene.proteins" and get the
832 corresponding class back (<intermine.model.Class: Protein>)
833
834 @raise ModelError: if the class name refers to a non-existant object
835
836 @rtype: L{intermine.model.Class}
837 """
838 if name.find(".") != -1:
839 path = self.make_path(name)
840 if path.is_attribute():
841 raise ModelError("'" + str(path) + "' is not a class")
842 else:
843 return path.get_class()
844 if name in self.classes:
845 return self.classes[name]
846 else:
847 raise ModelError("'" + name + "' is not a class in this model")
848
850 """
851 Return a path object for the given path string
852 ==============================================
853
854 >>> path = Model.make_path("Gene.organism.name")
855 <intermine.model.Path: Gene.organism.name>
856
857 This is recommended manner of constructing path objects.
858
859 @type path: str
860 @type subclasses: dict
861
862 @raise PathParseError: if there is a problem parsing the path string
863
864 @rtype: L{intermine.model.Path}
865 """
866 return Path(path, self, subclasses)
867
869 """
870 Validate a path
871 ===============
872
873 >>> try:
874 ... model.validate_path("Gene.symbol")
875 ... return "path is valid"
876 ... except PathParseError:
877 ... return "path is invalid"
878 "path is valid"
879
880 When you don't need to interrogate relationships
881 between paths, simply using this method to validate
882 a path string is enough. It guarantees that there
883 is a descriptor for each section of the string,
884 with the appropriate relationships
885
886 @raise PathParseError: if there is a problem parsing the path string
887 """
888 try:
889 self.parse_path_string(path_string, subclasses)
890 return True
891 except PathParseError, e:
892 raise PathParseError("Error parsing '%s' (subclasses: %s)"
893 % ( path_string, str(subclasses) ), e )
894
896 """
897 Parse a path string into a list of descriptors - one for each section
898 =====================================================================
899
900 >>> parts = Model.parse_path_string(string)
901
902 This method is used when making paths from a model, and
903 when validating path strings. It probably won't need to
904 be called directly.
905
906 @see: L{intermine.model.Model.make_path}
907 @see: L{intermine.model.Model.validate_path}
908 @see: L{intermine.model.Path}
909 """
910 descriptors = []
911 names = path_string.split('.')
912 root_name = names.pop(0)
913
914 root_descriptor = self.get_class(root_name)
915 descriptors.append(root_descriptor)
916
917 if root_name in subclasses:
918 current_class = self.get_class(subclasses[root_name])
919 else:
920 current_class = root_descriptor
921
922 for field_name in names:
923 field = current_class.get_field(field_name)
924 descriptors.append(field)
925
926 if isinstance(field, Reference):
927 key = '.'.join(map(lambda x: x.name, descriptors))
928 if key in subclasses:
929 current_class = self.get_class(subclasses[key])
930 else:
931 current_class = field.type_class
932 else:
933 current_class = None
934
935 return descriptors
936
939
942
945
947
948 - def __init__(self, message, source, cause=None):
951
953 base = repr(self.message) + ":" + repr(self.source)
954 if self.cause is None:
955 return base
956 else:
957 return base + repr(self.cause)
958