1 from xml.dom import minidom
2 import weakref
3 import re
4
5 from intermine.util import openAnything, ReadableException
6 from intermine.lists.list import List
7
8 """
9 Classes representing the data model
10 ===================================
11
12 Representations of tables and columns, and behaviour
13 for validating connections between them.
14
15 """
16
17 __author__ = "Alex Kalderimis"
18 __organization__ = "InterMine"
19 __license__ = "LGPL"
20 __contact__ = "dev@intermine.org"
21
22
23
24 -class Class(object):
25 """
26 An abstraction of database tables in the data model
27 ===================================================
28
29 These objects refer to the table objects in the
30 InterMine ORM layer.
31
32 SYNOPSIS
33 --------
34
35 >>> service = Service("http://www.flymine.org/query/service")
36 >>> model = service.model
37 >>>
38 >>> if "Gene" in model.classes:
39 ... gene_cd = model.get_class("Gene")
40 ... print "Gene has", len(gene_cd.fields), "fields"
41 ... for field in gene_cd.fields:
42 ... print " - ", field.name
43
44 OVERVIEW
45 --------
46
47 Each class can have attributes (columns) of various types,
48 and can have references to other classes (tables), on either
49 a one-to-one (references) or one-to-many (collections) basis
50
51 Classes should not be instantiated by hand, but rather used
52 as part of the model they belong to.
53
54 """
55 - def __init__(self, name, parents, model):
56 """
57 Constructor - Creates a new Class descriptor
58 ============================================
59
60 >>> cd = intermine.model.Class("Gene", ["SequenceFeature"])
61 <intermine.model.Class: Gene>
62
63 This constructor is called when deserialising the
64 model - you should have no need to create Classes by hand
65
66 @param name: The name of this class
67 @param parents: a list of parental names
68
69 """
70 self.name = name
71 self.parents = parents
72 self.model = model
73 self.parent_classes = []
74 self.field_dict = {}
75 id = Attribute("id", "Integer", self)
76 self.field_dict["id"] = id
77
79 return '<' + self.__module__ + "." + self.__class__.__name__ + ': ' + self.name + '>'
80
81 @property
83 """
84 The fields of this class
85 ========================
86
87 The fields are returned sorted by name. Fields
88 includes all Attributes, References and Collections
89
90 @rtype: list(L{Field})
91 """
92 return sorted(self.field_dict.values(), key=lambda field: field.name)
93
94 @property
96 """
97 The fields of this class which contain data
98 ===========================================
99
100 @rtype: list(L{Attribute})
101 """
102 return filter(lambda x: isinstance(x, Attribute), self.fields)
103
104 @property
106 """
107 fields which reference other objects
108 ====================================
109
110 @rtype: list(L{Reference})
111 """
112 def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection)
113 return filter(isRef, self.fields)
114
115 @property
117 """
118 fields which reference many other objects
119 =========================================
120
121 @rtype: list(L{Collection})
122 """
123 return filter(lambda x: isinstance(x, Collection), self.fields)
124
126 """
127 Get a field by name
128 ===================
129
130 The standard way of retrieving a field
131
132 @raise ModelError: if the Class does not have such a field
133
134 @rtype: subclass of L{intermine.model.Field}
135 """
136 if name in self.field_dict:
137 return self.field_dict[name]
138 else:
139 raise ModelError("There is no field called %s in %s" % (name, self.name))
140
141 - def isa(self, other):
142 """
143 Check if self is, or inherits from other
144 ========================================
145
146 This method validates statements about inheritance.
147 Returns true if the "other" is, or is within the
148 ancestry of, this class
149
150 Other can be passed as a name (str), or as the class object itself
151
152 @rtype: boolean
153 """
154 if isinstance(other, Class):
155 other_name = other.name
156 else:
157 other_name = other
158 if self.name == other_name:
159 return True
160 if other_name in self.parents:
161 return True
162 for p in self.parent_classes:
163 if p.isa(other):
164 return True
165 return False
166
169 """
170 A class representing columns on database tables
171 ===============================================
172
173 The base class for attributes, references and collections. All
174 columns in DB tables are represented by fields
175
176 SYNOPSIS
177 --------
178
179 >>> service = Service("http://www.flymine.org/query/service")
180 >>> model = service.model
181 >>> cd = model.get_class("Gene")
182 >>> print "Gene has", len(cd.fields), "fields"
183 >>> for field in gene_cd.fields:
184 ... print " - ", field
185 Gene has 45 fields
186 - CDSs is a group of CDS objects, which link back to this as gene
187 - GLEANRsymbol is a String
188 - UTRs is a group of UTR objects, which link back to this as gene
189 - alleles is a group of Allele objects, which link back to this as gene
190 - chromosome is a Chromosome
191 - chromosomeLocation is a Location
192 - clones is a group of CDNAClone objects, which link back to this as gene
193 - crossReferences is a group of CrossReference objects, which link back to this as subject
194 - cytoLocation is a String
195 - dataSets is a group of DataSet objects, which link back to this as bioEntities
196 - downstreamIntergenicRegion is a IntergenicRegion
197 - exons is a group of Exon objects, which link back to this as gene
198 - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene
199 - goAnnotation is a group of GOAnnotation objects
200 - homologues is a group of Homologue objects, which link back to this as gene
201 - id is a Integer
202 - interactions is a group of Interaction objects, which link back to this as gene
203 - length is a Integer
204 ...
205
206 @see: L{Attribute}
207 @see: L{Reference}
208 @see: L{Collection}
209 """
210 - def __init__(self, name, type_name, class_origin):
211 """
212 Constructor - DO NOT USE
213 ========================
214
215 THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY
216
217 you are unlikely to need to do
218 so anyway: it is recommended you access fields
219 through the classes generated by the model
220
221 @param name: The name of the reference
222 @param type_name: The name of the model.Class this refers to
223 @param class_origin: The model.Class this was declared in
224
225 """
226 self.name = name
227 self.type_name = type_name
228 self.type_class = None
229 self.declared_in = class_origin
231 return self.name + " is a " + self.type_name
234
237 """
238 Attributes represent columns that contain actual data
239 =====================================================
240
241 The Attribute class inherits all the behaviour of L{intermine.model.Field}
242 """
243 pass
244
246 """
247 References represent columns that refer to records in other tables
248 ==================================================================
249
250 In addition the the behaviour and properties of Field, references
251 may also have a reverse reference, if the other record points
252 back to this one as well. And all references will have their
253 type upgraded to a type_class during parsing
254 """
255 - def __init__(self, name, type_name, class_origin, reverse_ref=None):
256 """
257 Constructor
258 ===========
259
260 In addition to the a parameters of Field, Reference also
261 takes an optional reverse reference name (str)
262
263 @param name: The name of the reference
264 @param type_name: The name of the model.Class this refers to
265 @param class_origin: The model.Class this was declared in
266 @param reverse_ref: The name of the reverse reference (default: None)
267
268 """
269 self.reverse_reference_name = reverse_ref
270 super(Reference, self).__init__(name, type_name, class_origin)
271 self.reverse_reference = None
273 """
274 Return a string representation
275 ==============================
276
277 @rtype: str
278 """
279 s = super(Reference, self).toString()
280 if self.reverse_reference is None:
281 return s
282 else:
283 return s + ", which links back to this as " + self.reverse_reference.name
284
286 """
287 Collections are references which refer to groups of objects
288 ===========================================================
289
290 Collections have all the same behaviour and properties as References
291 """
293 """Return a string representation"""
294 ret = super(Collection, self).toString().replace(" is a ", " is a group of ")
295 if self.reverse_reference is None:
296 return ret + " objects"
297 else:
298 return ret.replace(", which links", " objects, which link")
299
300
301 -class Path(object):
302 """
303 A class representing a validated dotted string path
304 ===================================================
305
306 A path represents a connection between records and fields
307
308 SYNOPSIS
309 --------
310
311 >>> service = Service("http://www.flymine.org/query/service")
312 model = service.model
313 path = model.make_path("Gene.organism.name")
314 path.is_attribute()
315 ... True
316 >>> path2 = model.make_path("Gene.proteins")
317 path2.is_attribute()
318 ... False
319 >>> path2.is_reference()
320 ... True
321 >>> path2.get_class()
322 ... <intermine.model.Class: gene>
323
324 OVERVIEW
325 --------
326
327 This class is used for performing validation on dotted path strings.
328 The simple act of parsing it into existence will validate the path
329 to some extent, but there are additional methods for verifying certain
330 relationships as well
331 """
332 - def __init__(self, path, model, subclasses={}):
333 """
334 Constructor
335 ===========
336
337 >>> path = Path("Gene.name", model)
338
339 You will not need to use this constructor directly. Instead,
340 use the "make_path" method on the model to construct paths for you.
341
342 @param path_string: the dotted path string (eg: Gene.proteins.name)
343 @type path_string: str
344 @param model: the model to validate the path against
345 @type model: L{Model}
346 @param subclasses: a dict which maps subclasses (defaults to an empty dict)
347 @type subclasses: dict
348 """
349 self.model = weakref.proxy(model)
350 if isinstance(path, Class):
351 self._string = path.name
352 self.parts = [path]
353 else:
354 self._string = str(path)
355 self.parts = model.parse_path_string(str(path), subclasses)
356
359
361 return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>'
362
363 @property
365 """
366 The descriptor for the first part of the string. This should always a class descriptor.
367
368 @rtype: L{intermine.model.Class}
369 """
370 return self.parts[0]
371
372 @property
374 """
375 The descriptor for the last part of the string.
376
377 @rtype: L{model.Class} or L{model.Field}
378 """
379 return self.parts[-1]
380
382 """
383 Return the class object for this path, if it refers to a class
384 or a reference. Attribute paths return None
385
386 @rtype: L{model.Class}
387 """
388 if self.is_class():
389 return self.end
390 elif self.is_reference():
391 return self.end.type_class
392 else:
393 return None
394 end_class = property(get_class)
395
397 """
398 Return true if the path is a reference, eg: Gene.organism or Gene.proteins
399 Note: Collections are ALSO references
400
401 @rtype: boolean
402 """
403 return isinstance(self.end, Reference)
404
406 """
407 Return true if the path just refers to a class, eg: Gene
408
409 @rtype: boolean
410 """
411 return isinstance(self.end, Class)
412
414 """
415 Return true if the path refers to an attribute, eg: Gene.length
416
417 @rtype: boolean
418 """
419 return isinstance(self.end, Attribute)
420
422
423 - def __init__(self, path, model, subclasses={}, query=None):
424 self._model = model
425 self._query = query
426 self._subclasses = subclasses
427 if isinstance(path, Path):
428 self._path = path
429 else:
430 self._path = model.make_path(path, subclasses)
431
433 q = self._model.service.new_query(str(self))
434 q.select(*cols)
435 return q
436
438 cld = self._path.get_class()
439 if cld is not None:
440 try:
441 fld = cld.get_field(name)
442 return Column(str(self) + "." + name, self._model, self._subclasses, self._query)
443 except ModelError, e:
444 raise AttributeError(str(e))
445 raise AttributeError("No attribute '" + name + "'")
446
448 return str(self._path)
449
451 if isinstance(other, tuple):
452 l = [str(self), "LOOKUP"]
453 l.extend(other)
454 return tuple(l)
455 else:
456 return (str(self), 'LOOKUP', str(other))
457
459 return (str(self), str(other))
460
462 if isinstance(other, Column):
463 return (str(self), "IS", str(other))
464 elif other is None:
465 return (str(self), "IS NULL")
466 elif isinstance(other, list):
467 return (str(self), "ONE OF", other)
468 elif isinstance(other, List):
469 return (str(self), "IN", other.name)
470 else:
471 return (str(self), "=", other)
472
474 if isinstance(other, Column):
475 return (str(self), "IS NOT", str(other))
476 elif other is None:
477 return (str(self), "IS NOT NULL")
478 elif isinstance(other, list):
479 return (str(self), "NONE OF", other)
480 elif isinstance(other, List):
481 return (str(self), "NOT IN", other.name)
482 else:
483 return (str(self), "!=", other)
484
486 return (str(self), "<", other)
487
489 return (str(self), "<=", other)
490
492 return (str(self), ">", other)
493
495 return (str(self), ">=", other)
496
498 """
499 A class for representing the data model of an InterMine datawarehouse
500 =====================================================================
501
502 An abstraction of the database schema
503
504 SYNOPSIS
505 --------
506
507 >>> service = Service("http://www.flymine.org/query/service")
508 >>> model = service.model
509 >>> model.get_class("Gene")
510 <intermine.model.Class: Gene>
511
512 OVERVIEW
513 --------
514
515 This class represents the data model - ie. an abstraction
516 of the database schema. It can be used to introspect what
517 data is available and how it is inter-related
518 """
519 - def __init__(self, source, service=None):
520 """
521 Constructor
522 ===========
523
524 >>> model = Model(xml)
525
526 You will most like not need to create a model directly,
527 instead get one from the Service object:
528
529 @see: L{intermine.webservice.Service}
530
531 @param source: the model.xml, as a local file, string, or url
532 """
533 assert source is not None
534 self.source = source
535 if service is not None:
536 self.service = weakref.proxy(service)
537 else:
538 self.service = service
539 self.classes= {}
540 self.parse_model(source)
541 self.vivify()
542
543
544 self.table = self.column
545
547 """
548 Create classes, attributes, references and collections from the model.xml
549 =========================================================================
550
551 The xml can be provided as a file, url or string. This method
552 is called during instantiation - it does not need to be called
553 directly.
554
555 @param source: the model.xml, as a local file, string, or url
556 @raise ModelParseError: if there is a problem parsing the source
557 """
558 try:
559 io = openAnything(source)
560 doc = minidom.parse(io)
561 for node in doc.getElementsByTagName('model'):
562 self.name = node.getAttribute('name')
563 self.package_name = node.getAttribute('package')
564 assert node.nextSibling is None, "More than one model element"
565 assert self.name and self.package_name, "No model name or package name"
566
567 for c in doc.getElementsByTagName('class'):
568 class_name = c.getAttribute('name')
569 assert class_name, "Name not defined in" + c.toxml()
570 def strip_java_prefix(x):
571 return re.sub(r'.*\.', '', x)
572 parents = map(strip_java_prefix,
573 c.getAttribute('extends').split(' '))
574 cl = Class(class_name, parents, self)
575 for a in c.getElementsByTagName('attribute'):
576 name = a.getAttribute('name')
577 type_name = strip_java_prefix(a.getAttribute('type'))
578 at = Attribute(name, type_name, cl)
579 cl.field_dict[name] = at
580 for r in c.getElementsByTagName('reference'):
581 name = r.getAttribute('name')
582 type_name = r.getAttribute('referenced-type')
583 linked_field_name = r.getAttribute('reverse-reference')
584 ref = Reference(name, type_name, cl, linked_field_name)
585 cl.field_dict[name] = ref
586 for co in c.getElementsByTagName('collection'):
587 name = co.getAttribute('name')
588 type_name = co.getAttribute('referenced-type')
589 linked_field_name = co.getAttribute('reverse-reference')
590 col = Collection(name, type_name, cl, linked_field_name)
591 cl.field_dict[name] = col
592 self.classes[class_name] = cl
593 except Exception, error:
594 raise ModelParseError("Error parsing model", source, error)
595
597 """
598 Make names point to instances and insert inherited fields
599 =========================================================
600
601 This method ensures the model is internally consistent. This method
602 is called during instantiaton. It does not need to be called
603 directly.
604
605 @raise ModelError: if the names point to non-existent objects
606 """
607 for c in self.classes.values():
608 c.parent_classes = self.to_ancestry(c)
609 for pc in c.parent_classes:
610 c.field_dict.update(pc.field_dict)
611 for f in c.fields:
612 f.type_class = self.classes.get(f.type_name)
613 if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '':
614 rrn = f.reverse_reference_name
615 f.reverse_reference = f.type_class.field_dict[rrn]
616
618 """
619 Returns the lineage of the class
620 ================================
621
622 >>> classes = Model.to_ancestry(cd)
623
624 Returns the class' parents, and all the class' parents' parents
625
626 @rtype: list(L{intermine.model.Class})
627 """
628 parents = cd.parents
629 def defined(x): return x is not None
630 def to_class(x): return self.classes.get(x)
631 ancestry = filter(defined, map(to_class, parents))
632 for ancestor in ancestry:
633 ancestry.extend(self.to_ancestry(ancestor))
634 return ancestry
635
637 """
638 take a list of class names and return a list of classes
639 =======================================================
640
641 >>> classes = model.to_classes(["Gene", "Protein", "Organism"])
642
643 This simply maps from a list of strings to a list of
644 classes in the calling model.
645
646 @raise ModelError: if the list of class names includes ones that don't exist
647
648 @rtype: list(L{intermine.model.Class})
649 """
650 return map(self.get_class, classnames)
651
652 - def column(self, path, *rest):
653 return Column(path, self, *rest)
654
657
659 """
660 Get a class by its name, or by a dotted path
661 ============================================
662
663 >>> model = Model("http://www.flymine.org/query/service/model")
664 >>> model.get_class("Gene")
665 <intermine.model.Class: Gene>
666 >>> model.get_class("Gene.proteins")
667 <intermine.model.Class: Protein>
668
669 This is the recommended way of retrieving a class from
670 the model. As well as handling class names, you can also
671 pass in a path such as "Gene.proteins" and get the
672 corresponding class back (<intermine.model.Class: Protein>)
673
674 @raise ModelError: if the class name refers to a non-existant object
675
676 @rtype: L{intermine.model.Class}
677 """
678 if name.find(".") != -1:
679 path = self.make_path(name)
680 if path.is_attribute():
681 raise ModelError("'" + str(path) + "' is not a class")
682 else:
683 return path.get_class()
684 if name in self.classes:
685 return self.classes[name]
686 else:
687 raise ModelError("'" + name + "' is not a class in this model")
688
690 """
691 Return a path object for the given path string
692 ==============================================
693
694 >>> path = Model.make_path("Gene.organism.name")
695 <intermine.model.Path: Gene.organism.name>
696
697 This is recommended manner of constructing path objects.
698
699 @type path: str
700 @type subclasses: dict
701
702 @raise PathParseError: if there is a problem parsing the path string
703
704 @rtype: L{intermine.model.Path}
705 """
706 return Path(path, self, subclasses)
707
709 """
710 Validate a path
711 ===============
712
713 >>> try:
714 ... model.validate_path("Gene.symbol")
715 ... return "path is valid"
716 ... except PathParseError:
717 ... return "path is invalid"
718 "path is valid"
719
720 When you don't need to interrogate relationships
721 between paths, simply using this method to validate
722 a path string is enough. It guarantees that there
723 is a descriptor for each section of the string,
724 with the appropriate relationships
725
726 @raise PathParseError: if there is a problem parsing the path string
727 """
728 try:
729 self.parse_path_string(path_string, subclasses)
730 return True
731 except PathParseError, e:
732 raise PathParseError("Error parsing '%s' (subclasses: %s)"
733 % ( path_string, str(subclasses) ), e )
734
736 """
737 Parse a path string into a list of descriptors - one for each section
738 =====================================================================
739
740 >>> parts = Model.parse_path_string(string)
741
742 This method is used when making paths from a model, and
743 when validating path strings. It probably won't need to
744 be called directly.
745
746 @see: L{intermine.model.Model.make_path}
747 @see: L{intermine.model.Model.validate_path}
748 @see: L{intermine.model.Path}
749 """
750 descriptors = []
751 names = path_string.split('.')
752 root_name = names.pop(0)
753
754 root_descriptor = self.get_class(root_name)
755 descriptors.append(root_descriptor)
756
757 if root_name in subclasses:
758 current_class = self.get_class(subclasses[root_name])
759 else:
760 current_class = root_descriptor
761
762 for field_name in names:
763 field = current_class.get_field(field_name)
764 descriptors.append(field)
765
766 if isinstance(field, Reference):
767 key = '.'.join(map(lambda x: x.name, descriptors))
768 if key in subclasses:
769 current_class = self.get_class(subclasses[key])
770 else:
771 current_class = field.type_class
772 else:
773 current_class = None
774
775 return descriptors
776
779
782
784
785 - def __init__(self, message, source, cause=None):
788
790 base = repr(self.message) + ":" + repr(self.source)
791 if self.cause is None:
792 return base
793 else:
794 return base + repr(self.cause)
795