1 from xml.dom import minidom
2 import re
3
4 from .util import openAnything, ReadableException
5
6 """
7 Classes representing the data model
8 ===================================
9
10 Representations of tables and columns, and behaviour
11 for validating connections between them.
12
13 """
14
15 __author__ = "Alex Kalderimis"
16 __organization__ = "InterMine"
17 __license__ = "LGPL"
18 __contact__ = "dev@intermine.org"
19
20
21
22 -class Class(object):
23 """
24 An abstraction of database tables in the data model
25 ===================================================
26
27 These objects refer to the table objects in the
28 InterMine ORM layer.
29
30 SYNOPSIS
31 --------
32
33 >>> service = Service("http://www.flymine.org/query/service")
34 >>> model = service.model
35 >>>
36 >>> if "Gene" in model.classes:
37 ... gene_cd = model.get_class("Gene")
38 ... print "Gene has", len(gene_cd.fields), "fields"
39 ... for field in gene_cd.fields:
40 ... print " - ", field.name
41
42 OVERVIEW
43 --------
44
45 Each class can have attributes (columns) of various types,
46 and can have references to other classes (tables), on either
47 a one-to-one (references) or one-to-many (collections) basis
48
49 Classes should not be instantiated by hand, but rather used
50 as part of the model they belong to.
51
52 """
54 """
55 Constructor - Creates a new Class descriptor
56 ============================================
57
58 >>> cd = intermine.model.Class("Gene", ["SequenceFeature"])
59 <intermine.model.Class: Gene>
60
61 This constructor is called when deserialising the
62 model - you should have no need to create Classes by hand
63
64 @param name: The name of this class
65 @param parents: a list of parental names
66
67 """
68 self.name = name
69 self.parents = parents
70 self.parent_classes = []
71 self.field_dict = {}
72 id = Attribute("id", "Integer", self)
73 self.field_dict["id"] = id
74
76 return '<' + self.__module__ + "." + self.__class__.__name__ + ': ' + self.name + '>'
77
78 @property
80 """
81 The fields of this class
82 ========================
83
84 The fields are returned sorted by name. Fields
85 includes all Attributes, References and Collections
86
87 @rtype: list(L{Field})
88 """
89 return sorted(self.field_dict.values(), key=lambda field: field.name)
90
91 @property
93 """
94 The fields of this class which contain data
95 ===========================================
96
97 @rtype: list(L{Attribute})
98 """
99 return filter(lambda x: isinstance(x, Attribute), self.fields)
100
101 @property
103 """
104 fields which reference other objects
105 ====================================
106
107 @rtype: list(L{Reference})
108 """
109 def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection)
110 return filter(isRef, self.fields)
111
112 @property
114 """
115 fields which reference many other objects
116 =========================================
117
118 @rtype: list(L{Collection})
119 """
120 return filter(lambda x: isinstance(x, Collection), self.fields)
121
123 """
124 Get a field by name
125 ===================
126
127 The standard way of retrieving a field
128
129 @raise ModelError: if the Class does not have such a field
130
131 @rtype: subclass of L{intermine.model.Field}
132 """
133 if name in self.field_dict:
134 return self.field_dict[name]
135 else:
136 raise ModelError("There is no field called %s in %s" % (name, self.name))
137
138 - def isa(self, other):
139 """
140 Check if self is, or inherits from other
141 ========================================
142
143 This method validates statements about inheritance.
144 Returns true if the "other" is, or is within the
145 ancestry of, this class
146
147 Other can be passed as a name (str), or as the class object itself
148
149 @rtype: boolean
150 """
151 if isinstance(other, Class):
152 other_name = other.name
153 else:
154 other_name = other
155 if self.name == other_name:
156 return True
157 if other_name in self.parents:
158 return True
159 for p in self.parent_classes:
160 if p.isa(other):
161 return True
162 return False
163
166 """
167 A class representing columns on database tables
168 ===============================================
169
170 The base class for attributes, references and collections. All
171 columns in DB tables are represented by fields
172
173 SYNOPSIS
174 --------
175
176 >>> service = Service("http://www.flymine.org/query/service")
177 >>> model = service.model
178 >>> cd = model.get_class("Gene")
179 >>> print "Gene has", len(cd.fields), "fields"
180 >>> for field in gene_cd.fields:
181 ... print " - ", field
182 Gene has 45 fields
183 - CDSs is a group of CDS objects, which link back to this as gene
184 - GLEANRsymbol is a String
185 - UTRs is a group of UTR objects, which link back to this as gene
186 - alleles is a group of Allele objects, which link back to this as gene
187 - chromosome is a Chromosome
188 - chromosomeLocation is a Location
189 - clones is a group of CDNAClone objects, which link back to this as gene
190 - crossReferences is a group of CrossReference objects, which link back to this as subject
191 - cytoLocation is a String
192 - dataSets is a group of DataSet objects, which link back to this as bioEntities
193 - downstreamIntergenicRegion is a IntergenicRegion
194 - exons is a group of Exon objects, which link back to this as gene
195 - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene
196 - goAnnotation is a group of GOAnnotation objects
197 - homologues is a group of Homologue objects, which link back to this as gene
198 - id is a Integer
199 - interactions is a group of Interaction objects, which link back to this as gene
200 - length is a Integer
201 ...
202
203 @see: L{Attribute}
204 @see: L{Reference}
205 @see: L{Collection}
206 """
207 - def __init__(self, name, type_name, class_origin):
208 """
209 Constructor - DO NOT USE
210 ========================
211
212 THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY
213
214 you are unlikely to need to do
215 so anyway: it is recommended you access fields
216 through the classes generated by the model
217
218 @param name: The name of the reference
219 @param type_name: The name of the model.Class this refers to
220 @param class_origin: The model.Class this was declared in
221
222 """
223 self.name = name
224 self.type_name = type_name
225 self.type_class = None
226 self.declared_in = class_origin
228 return self.name + " is a " + self.type_name
231
234 """
235 Attributes represent columns that contain actual data
236 =====================================================
237
238 The Attribute class inherits all the behaviour of L{intermine.model.Field}
239 """
240 pass
241
243 """
244 References represent columns that refer to records in other tables
245 ==================================================================
246
247 In addition the the behaviour and properties of Field, references
248 may also have a reverse reference, if the other record points
249 back to this one as well. And all references will have their
250 type upgraded to a type_class during parsing
251 """
252 - def __init__(self, name, type_name, class_origin, reverse_ref=None):
253 """
254 Constructor
255 ===========
256
257 In addition to the a parameters of Field, Reference also
258 takes an optional reverse reference name (str)
259
260 @param name: The name of the reference
261 @param type_name: The name of the model.Class this refers to
262 @param class_origin: The model.Class this was declared in
263 @param reverse_ref: The name of the reverse reference (default: None)
264
265 """
266 self.reverse_reference_name = reverse_ref
267 super(Reference, self).__init__(name, type_name, class_origin)
268 self.reverse_reference = None
270 """
271 Return a string representation
272 ==============================
273
274 @rtype: str
275 """
276 s = super(Reference, self).toString()
277 if self.reverse_reference is None:
278 return s
279 else:
280 return s + ", which links back to this as " + self.reverse_reference.name
281
283 """
284 Collections are references which refer to groups of objects
285 ===========================================================
286
287 Collections have all the same behaviour and properties as References
288 """
290 """Return a string representation"""
291 ret = super(Collection, self).toString().replace(" is a ", " is a group of ")
292 if self.reverse_reference is None:
293 return ret + " objects"
294 else:
295 return ret.replace(", which links", " objects, which link")
296
297
298 -class Path(object):
299 """
300 A class representing a validated dotted string path
301 ===================================================
302
303 A path represents a connection between records and fields
304
305 SYNOPSIS
306 --------
307
308 >>> service = Service("http://www.flymine.org/query/service")
309 model = service.model
310 path = model.make_path("Gene.organism.name")
311 path.is_attribute()
312 ... True
313 >>> path2 = model.make_path("Gene.proteins")
314 path2.is_attribute()
315 ... False
316 >>> path2.is_reference()
317 ... True
318 >>> path2.get_class()
319 ... <intermine.model.Class: gene>
320
321 OVERVIEW
322 --------
323
324 This class is used for performing validation on dotted path strings.
325 The simple act of parsing it into existence will validate the path
326 to some extent, but there are additional methods for verifying certain
327 relationships as well
328 """
329 - def __init__(self, path_string, model, subclasses={}):
330 """
331 Constructor
332 ===========
333
334 >>> path = Path("Gene.name", model)
335
336 You will not need to use this constructor directly. Instead,
337 use the "make_path" method on the model to construct paths for you.
338
339 @param path_string: the dotted path string (eg: Gene.proteins.name)
340 @type path_string: str
341 @param model: the model to validate the path against
342 @type model: L{Model}
343 @param subclasses: a dict which maps subclasses (defaults to an empty dict)
344 @type subclasses: dict
345 """
346 self._string = path_string
347 self.parts = model.parse_path_string(path_string, subclasses)
348
351
353 return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>'
354
355 @property
357 """
358 The descriptor for the last part of the string.
359
360 @rtype: L{model.Class} or L{model.Field}
361 """
362 return self.parts[-1]
363
365 """
366 Return the class object for this path, if it refers to a class
367 or a reference. Attribute paths return None
368
369 @rtype: L{model.Class}
370 """
371 if self.is_class():
372 return self.end
373 elif self.is_reference():
374 return self.end.type_class
375 else:
376 return None
377
379 """
380 Return true if the path is a reference, eg: Gene.organism or Gene.proteins
381 Note: Collections are ALSO references
382
383 @rtype: boolean
384 """
385 return isinstance(self.end, Reference)
386
388 """
389 Return true if the path just refers to a class, eg: Gene
390
391 @rtype: boolean
392 """
393 return isinstance(self.end, Class)
394
396 """
397 Return true if the path refers to an attribute, eg: Gene.length
398
399 @rtype: boolean
400 """
401 return isinstance(self.end, Attribute)
402
404 """
405 A class for representing the data model of an InterMine datawarehouse
406 =====================================================================
407
408 An abstraction of the database schema
409
410 SYNOPSIS
411 --------
412
413 >>> service = Service("http://www.flymine.org/query/service")
414 >>> model = service.model
415 >>> model.get_class("Gene")
416 <intermine.model.Class: Gene>
417
418 OVERVIEW
419 --------
420
421 This class represents the data model - ie. an abstraction
422 of the database schema. It can be used to introspect what
423 data is available and how it is inter-related
424 """
426 """
427 Constructor
428 ===========
429
430 >>> model = Model(xml)
431
432 You will most like not need to create a model directly,
433 instead get one from the Service object:
434
435 @see: L{intermine.webservice.Service}
436
437 @param source: the model.xml, as a local file, string, or url
438 """
439 assert source is not None
440 self.source = source
441 self.classes= {}
442 self.parse_model(source)
443 self.vivify()
444
446 """
447 Create classes, attributes, references and collections from the model.xml
448 =========================================================================
449
450 The xml can be provided as a file, url or string. This method
451 is called during instantiation - it does not need to be called
452 directly.
453
454 @param source: the model.xml, as a local file, string, or url
455 @raise ModelParseError: if there is a problem parsing the source
456 """
457 try:
458 io = openAnything(source)
459 doc = minidom.parse(io)
460 for node in doc.getElementsByTagName('model'):
461 self.name = node.getAttribute('name')
462 self.package_name = node.getAttribute('package')
463 assert node.nextSibling is None, "More than one model element"
464 assert self.name and self.package_name, "No model name or package name"
465
466 for c in doc.getElementsByTagName('class'):
467 class_name = c.getAttribute('name')
468 assert class_name, "Name not defined in" + c.toxml()
469 def strip_java_prefix(x):
470 return re.sub(r'.*\.', '', x)
471 parents = map(strip_java_prefix,
472 c.getAttribute('extends').split(' '))
473 cl = Class(class_name, parents)
474 for a in c.getElementsByTagName('attribute'):
475 name = a.getAttribute('name')
476 type_name = strip_java_prefix(a.getAttribute('type'))
477 at = Attribute(name, type_name, cl)
478 cl.field_dict[name] = at
479 for r in c.getElementsByTagName('reference'):
480 name = r.getAttribute('name')
481 type_name = r.getAttribute('referenced-type')
482 linked_field_name = r.getAttribute('reverse-reference')
483 ref = Reference(name, type_name, cl, linked_field_name)
484 cl.field_dict[name] = ref
485 for co in c.getElementsByTagName('collection'):
486 name = co.getAttribute('name')
487 type_name = co.getAttribute('referenced-type')
488 linked_field_name = co.getAttribute('reverse-reference')
489 col = Collection(name, type_name, cl, linked_field_name)
490 cl.field_dict[name] = col
491 self.classes[class_name] = cl
492 except Exception, error:
493 raise ModelParseError("Error parsing model", source, error)
494
496 """
497 Make names point to instances and insert inherited fields
498 =========================================================
499
500 This method ensures the model is internally consistent. This method
501 is called during instantiaton. It does not need to be called
502 directly.
503
504 @raise ModelError: if the names point to non-existent objects
505 """
506 for c in self.classes.values():
507 c.parent_classes = self.to_ancestry(c)
508 for pc in c.parent_classes:
509 c.field_dict.update(pc.field_dict)
510 for f in c.fields:
511 f.type_class = self.classes.get(f.type_name)
512 if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '':
513 rrn = f.reverse_reference_name
514 f.reverse_reference = f.type_class.field_dict[rrn]
515
517 """
518 Returns the lineage of the class
519 ================================
520
521 >>> classes = Model.to_ancestry(cd)
522
523 Returns the class' parents, and all the class' parents' parents
524
525 @rtype: list(L{intermine.model.Class})
526 """
527 parents = cd.parents
528 def defined(x): return x is not None
529 def to_class(x): return self.classes.get(x)
530 ancestry = filter(defined, map(to_class, parents))
531 for ancestor in ancestry:
532 ancestry.extend(self.to_ancestry(ancestor))
533 return ancestry
534
536 """
537 take a list of class names and return a list of classes
538 =======================================================
539
540 >>> classes = model.to_classes(["Gene", "Protein", "Organism"])
541
542 This simply maps from a list of strings to a list of
543 classes in the calling model.
544
545 @raise ModelError: if the list of class names includes ones that don't exist
546
547 @rtype: list(L{intermine.model.Class})
548 """
549 return map(self.get_class, classnames)
550
552 """
553 Get a class by its name, or by a dotted path
554 ============================================
555
556 >>> model = Model("http://www.flymine.org/query/service/model")
557 >>> model.get_class("Gene")
558 <intermine.model.Class: Gene>
559 >>> model.get_class("Gene.proteins")
560 <intermine.model.Class: Protein>
561
562 This is the recommended way of retrieving a class from
563 the model. As well as handling class names, you can also
564 pass in a path such as "Gene.proteins" and get the
565 corresponding class back (<intermine.model.Class: Protein>)
566
567 @raise ModelError: if the class name refers to a non-existant object
568
569 @rtype: L{intermine.model.Class}
570 """
571 if name.find(".") != -1:
572 path = self.make_path(name)
573 if path.is_attribute():
574 raise ModelError("'" + str(path) + "' is not a class")
575 else:
576 return path.get_class()
577 if name in self.classes:
578 return self.classes[name]
579 else:
580 raise ModelError("'" + name + "' is not a class in this model")
581
583 """
584 Return a path object for the given path string
585 ==============================================
586
587 >>> path = Model.make_path("Gene.organism.name")
588 <intermine.model.Path: Gene.organism.name>
589
590 This is recommended manner of constructing path objects.
591
592 @type path: str
593 @type subclasses: dict
594
595 @raise PathParseError: if there is a problem parsing the path string
596
597 @rtype: L{intermine.model.Path}
598 """
599 return Path(path, self, subclasses)
600
602 """
603 Validate a path
604 ===============
605
606 >>> try:
607 ... model.validate_path("Gene.symbol")
608 ... return "path is valid"
609 ... except PathParseError:
610 ... return "path is invalid"
611 "path is valid"
612
613 When you don't need to interrogate relationships
614 between paths, simply using this method to validate
615 a path string is enough. It guarantees that there
616 is a descriptor for each section of the string,
617 with the appropriate relationships
618
619 @raise PathParseError: if there is a problem parsing the path string
620 """
621 try:
622 self.parse_path_string(path_string, subclasses)
623 return True
624 except PathParseError, e:
625 raise PathParseError("Error parsing '%s' (subclasses: %s)"
626 % ( path_string, str(subclasses) ), e )
627
629 """
630 Parse a path string into a list of descriptors - one for each section
631 =====================================================================
632
633 >>> parts = Model.parse_path_string(string)
634
635 This method is used when making paths from a model, and
636 when validating path strings. It probably won't need to
637 be called directly.
638
639 @see: L{intermine.model.Model.make_path}
640 @see: L{intermine.model.Model.validate_path}
641 @see: L{intermine.model.Path}
642 """
643 descriptors = []
644 names = path_string.split('.')
645 root_name = names.pop(0)
646
647 root_descriptor = self.get_class(root_name)
648 descriptors.append(root_descriptor)
649
650 if root_name in subclasses:
651 current_class = self.get_class(subclasses[root_name])
652 else:
653 current_class = root_descriptor
654
655 for field_name in names:
656 field = current_class.get_field(field_name)
657 descriptors.append(field)
658
659 if isinstance(field, Reference):
660 key = '.'.join(map(lambda x: x.name, descriptors))
661 if key in subclasses:
662 current_class = self.get_class(subclasses[key])
663 else:
664 current_class = field.type_class
665 else:
666 current_class = None
667
668 return descriptors
669
672
675
677
678 - def __init__(self, message, source, cause=None):
681
683 base = repr(self.message) + ":" + repr(self.source)
684 if self.cause is None:
685 return base
686 else:
687 return base + repr(self.cause)
688