Package intermine :: Module model
[hide private]
[frames] | no frames]

Source Code for Module intermine.model

  1  from xml.dom import minidom 
  2  import weakref 
  3  import re 
  4   
  5  from .util import openAnything, ReadableException 
  6  from .lists.list import List 
  7   
  8  """ 
  9  Classes representing the data model 
 10  =================================== 
 11   
 12  Representations of tables and columns, and behaviour 
 13  for validating connections between them. 
 14   
 15  """ 
 16   
 17  __author__ = "Alex Kalderimis" 
 18  __organization__ = "InterMine" 
 19  __license__ = "LGPL" 
 20  __contact__ = "dev@intermine.org" 
21 22 23 24 -class Class(object):
25 """ 26 An abstraction of database tables in the data model 27 =================================================== 28 29 These objects refer to the table objects in the 30 InterMine ORM layer. 31 32 SYNOPSIS 33 -------- 34 35 >>> service = Service("http://www.flymine.org/query/service") 36 >>> model = service.model 37 >>> 38 >>> if "Gene" in model.classes: 39 ... gene_cd = model.get_class("Gene") 40 ... print "Gene has", len(gene_cd.fields), "fields" 41 ... for field in gene_cd.fields: 42 ... print " - ", field.name 43 44 OVERVIEW 45 -------- 46 47 Each class can have attributes (columns) of various types, 48 and can have references to other classes (tables), on either 49 a one-to-one (references) or one-to-many (collections) basis 50 51 Classes should not be instantiated by hand, but rather used 52 as part of the model they belong to. 53 54 """
55 - def __init__(self, name, parents, model):
56 """ 57 Constructor - Creates a new Class descriptor 58 ============================================ 59 60 >>> cd = intermine.model.Class("Gene", ["SequenceFeature"]) 61 <intermine.model.Class: Gene> 62 63 This constructor is called when deserialising the 64 model - you should have no need to create Classes by hand 65 66 @param name: The name of this class 67 @param parents: a list of parental names 68 69 """ 70 self.name = name 71 self.parents = parents 72 self.model = model 73 self.parent_classes = [] 74 self.field_dict = {} 75 id = Attribute("id", "Integer", self) # All classes have the id attr 76 self.field_dict["id"] = id
77
78 - def __repr__(self):
79 return '<' + self.__module__ + "." + self.__class__.__name__ + ': ' + self.name + '>'
80 81 @property
82 - def fields(self):
83 """ 84 The fields of this class 85 ======================== 86 87 The fields are returned sorted by name. Fields 88 includes all Attributes, References and Collections 89 90 @rtype: list(L{Field}) 91 """ 92 return sorted(self.field_dict.values(), key=lambda field: field.name)
93 94 @property
95 - def attributes(self):
96 """ 97 The fields of this class which contain data 98 =========================================== 99 100 @rtype: list(L{Attribute}) 101 """ 102 return filter(lambda x: isinstance(x, Attribute), self.fields)
103 104 @property
105 - def references(self):
106 """ 107 fields which reference other objects 108 ==================================== 109 110 @rtype: list(L{Reference}) 111 """ 112 def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection) 113 return filter(isRef, self.fields)
114 115 @property
116 - def collections(self):
117 """ 118 fields which reference many other objects 119 ========================================= 120 121 @rtype: list(L{Collection}) 122 """ 123 return filter(lambda x: isinstance(x, Collection), self.fields)
124
125 - def get_field(self, name):
126 """ 127 Get a field by name 128 =================== 129 130 The standard way of retrieving a field 131 132 @raise ModelError: if the Class does not have such a field 133 134 @rtype: subclass of L{intermine.model.Field} 135 """ 136 if name in self.field_dict: 137 return self.field_dict[name] 138 else: 139 raise ModelError("There is no field called %s in %s" % (name, self.name))
140
141 - def isa(self, other):
142 """ 143 Check if self is, or inherits from other 144 ======================================== 145 146 This method validates statements about inheritance. 147 Returns true if the "other" is, or is within the 148 ancestry of, this class 149 150 Other can be passed as a name (str), or as the class object itself 151 152 @rtype: boolean 153 """ 154 if isinstance(other, Class): 155 other_name = other.name 156 else: 157 other_name = other 158 if self.name == other_name: 159 return True 160 if other_name in self.parents: 161 return True 162 for p in self.parent_classes: 163 if p.isa(other): 164 return True 165 return False
166
167 168 -class Field(object):
169 """ 170 A class representing columns on database tables 171 =============================================== 172 173 The base class for attributes, references and collections. All 174 columns in DB tables are represented by fields 175 176 SYNOPSIS 177 -------- 178 179 >>> service = Service("http://www.flymine.org/query/service") 180 >>> model = service.model 181 >>> cd = model.get_class("Gene") 182 >>> print "Gene has", len(cd.fields), "fields" 183 >>> for field in gene_cd.fields: 184 ... print " - ", field 185 Gene has 45 fields 186 - CDSs is a group of CDS objects, which link back to this as gene 187 - GLEANRsymbol is a String 188 - UTRs is a group of UTR objects, which link back to this as gene 189 - alleles is a group of Allele objects, which link back to this as gene 190 - chromosome is a Chromosome 191 - chromosomeLocation is a Location 192 - clones is a group of CDNAClone objects, which link back to this as gene 193 - crossReferences is a group of CrossReference objects, which link back to this as subject 194 - cytoLocation is a String 195 - dataSets is a group of DataSet objects, which link back to this as bioEntities 196 - downstreamIntergenicRegion is a IntergenicRegion 197 - exons is a group of Exon objects, which link back to this as gene 198 - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene 199 - goAnnotation is a group of GOAnnotation objects 200 - homologues is a group of Homologue objects, which link back to this as gene 201 - id is a Integer 202 - interactions is a group of Interaction objects, which link back to this as gene 203 - length is a Integer 204 ... 205 206 @see: L{Attribute} 207 @see: L{Reference} 208 @see: L{Collection} 209 """
210 - def __init__(self, name, type_name, class_origin):
211 """ 212 Constructor - DO NOT USE 213 ======================== 214 215 THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY 216 217 you are unlikely to need to do 218 so anyway: it is recommended you access fields 219 through the classes generated by the model 220 221 @param name: The name of the reference 222 @param type_name: The name of the model.Class this refers to 223 @param class_origin: The model.Class this was declared in 224 225 """ 226 self.name = name 227 self.type_name = type_name 228 self.type_class = None 229 self.declared_in = class_origin
230 - def toString(self):
231 return self.name + " is a " + self.type_name
232 - def __str__(self):
233 return self.toString()
234
235 236 -class Attribute(Field):
237 """ 238 Attributes represent columns that contain actual data 239 ===================================================== 240 241 The Attribute class inherits all the behaviour of L{intermine.model.Field} 242 """ 243 pass
244
245 -class Reference(Field):
246 """ 247 References represent columns that refer to records in other tables 248 ================================================================== 249 250 In addition the the behaviour and properties of Field, references 251 may also have a reverse reference, if the other record points 252 back to this one as well. And all references will have their 253 type upgraded to a type_class during parsing 254 """
255 - def __init__(self, name, type_name, class_origin, reverse_ref=None):
256 """ 257 Constructor 258 =========== 259 260 In addition to the a parameters of Field, Reference also 261 takes an optional reverse reference name (str) 262 263 @param name: The name of the reference 264 @param type_name: The name of the model.Class this refers to 265 @param class_origin: The model.Class this was declared in 266 @param reverse_ref: The name of the reverse reference (default: None) 267 268 """ 269 self.reverse_reference_name = reverse_ref 270 super(Reference, self).__init__(name, type_name, class_origin) 271 self.reverse_reference = None
272 - def toString(self):
273 """ 274 Return a string representation 275 ============================== 276 277 @rtype: str 278 """ 279 s = super(Reference, self).toString() 280 if self.reverse_reference is None: 281 return s 282 else: 283 return s + ", which links back to this as " + self.reverse_reference.name
284
285 -class Collection(Reference):
286 """ 287 Collections are references which refer to groups of objects 288 =========================================================== 289 290 Collections have all the same behaviour and properties as References 291 """
292 - def toString(self):
293 """Return a string representation""" 294 ret = super(Collection, self).toString().replace(" is a ", " is a group of ") 295 if self.reverse_reference is None: 296 return ret + " objects" 297 else: 298 return ret.replace(", which links", " objects, which link")
299
300 301 -class Path(object):
302 """ 303 A class representing a validated dotted string path 304 =================================================== 305 306 A path represents a connection between records and fields 307 308 SYNOPSIS 309 -------- 310 311 >>> service = Service("http://www.flymine.org/query/service") 312 model = service.model 313 path = model.make_path("Gene.organism.name") 314 path.is_attribute() 315 ... True 316 >>> path2 = model.make_path("Gene.proteins") 317 path2.is_attribute() 318 ... False 319 >>> path2.is_reference() 320 ... True 321 >>> path2.get_class() 322 ... <intermine.model.Class: gene> 323 324 OVERVIEW 325 -------- 326 327 This class is used for performing validation on dotted path strings. 328 The simple act of parsing it into existence will validate the path 329 to some extent, but there are additional methods for verifying certain 330 relationships as well 331 """
332 - def __init__(self, path, model, subclasses={}):
333 """ 334 Constructor 335 =========== 336 337 >>> path = Path("Gene.name", model) 338 339 You will not need to use this constructor directly. Instead, 340 use the "make_path" method on the model to construct paths for you. 341 342 @param path_string: the dotted path string (eg: Gene.proteins.name) 343 @type path_string: str 344 @param model: the model to validate the path against 345 @type model: L{Model} 346 @param subclasses: a dict which maps subclasses (defaults to an empty dict) 347 @type subclasses: dict 348 """ 349 self.model = weakref.proxy(model) 350 if isinstance(path, Class): 351 self._string = path.name 352 self.parts = [path] 353 else: 354 self._string = str(path) 355 self.parts = model.parse_path_string(str(path), subclasses)
356
357 - def __str__(self):
358 return self._string
359
360 - def __repr__(self):
361 return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>'
362 363 @property
364 - def root(self):
365 """ 366 The descriptor for the first part of the string. This should always a class descriptor. 367 368 @rtype: L{intermine.model.Class} 369 """ 370 return self.parts[0]
371 372 @property
373 - def end(self):
374 """ 375 The descriptor for the last part of the string. 376 377 @rtype: L{model.Class} or L{model.Field} 378 """ 379 return self.parts[-1]
380
381 - def get_class(self):
382 """ 383 Return the class object for this path, if it refers to a class 384 or a reference. Attribute paths return None 385 386 @rtype: L{model.Class} 387 """ 388 if self.is_class(): 389 return self.end 390 elif self.is_reference(): 391 return self.end.type_class 392 else: 393 return None
394 end_class = property(get_class) 395
396 - def is_reference(self):
397 """ 398 Return true if the path is a reference, eg: Gene.organism or Gene.proteins 399 Note: Collections are ALSO references 400 401 @rtype: boolean 402 """ 403 return isinstance(self.end, Reference)
404
405 - def is_class(self):
406 """ 407 Return true if the path just refers to a class, eg: Gene 408 409 @rtype: boolean 410 """ 411 return isinstance(self.end, Class)
412
413 - def is_attribute(self):
414 """ 415 Return true if the path refers to an attribute, eg: Gene.length 416 417 @rtype: boolean 418 """ 419 return isinstance(self.end, Attribute)
420
421 -class Column(object):
422
423 - def __init__(self, path, model, subclasses={}, query=None):
424 self._model = model 425 self._query = query 426 self._subclasses = subclasses 427 if isinstance(path, Path): 428 self._path = path 429 else: 430 self._path = model.make_path(path, subclasses)
431
432 - def select(self, *cols):
433 q = self._model.service.new_query(str(self)) 434 q.select(*cols) 435 return q
436
437 - def __getattr__(self, name):
438 cld = self._path.get_class() 439 if cld is not None: 440 try: 441 fld = cld.get_field(name) 442 return Column(str(self) + "." + name, self._model, self._subclasses, self._query) 443 except ModelError, e: 444 raise AttributeError(str(e)) 445 raise AttributeError("No attribute '" + name + "'")
446
447 - def __str__(self):
448 return str(self._path)
449
450 - def __mod__(self, other):
451 if isinstance(other, tuple): 452 l = [str(self), "LOOKUP"] 453 l.extend(other) 454 return tuple(l) 455 else: 456 return (str(self), 'LOOKUP', str(other))
457
458 - def __rshift__(self, other):
459 return (str(self), str(other))
460
461 - def __eq__(self, other):
462 if isinstance(other, Column): 463 return (str(self), "IS", str(other)) 464 elif other is None: 465 return (str(self), "IS NULL") 466 elif isinstance(other, list): 467 return (str(self), "ONE OF", other) 468 elif isinstance(other, List): 469 return (str(self), "IN", other.name) 470 else: 471 return (str(self), "=", other)
472
473 - def __ne__(self, other):
474 if isinstance(other, Column): 475 return (str(self), "IS NOT", str(other)) 476 elif other is None: 477 return (str(self), "IS NOT NULL") 478 elif isinstance(other, list): 479 return (str(self), "NONE OF", other) 480 elif isinstance(other, List): 481 return (str(self), "NOT IN", other.name) 482 else: 483 return (str(self), "!=", other)
484
485 - def __lt__(self, other):
486 return (str(self), "<", other)
487
488 - def __le__(self, other):
489 return (str(self), "<=", other)
490
491 - def __gt__(self, other):
492 return (str(self), ">", other)
493
494 - def __ge__(self, other):
495 return (str(self), ">=", other)
496
497 -class Model(object):
498 """ 499 A class for representing the data model of an InterMine datawarehouse 500 ===================================================================== 501 502 An abstraction of the database schema 503 504 SYNOPSIS 505 -------- 506 507 >>> service = Service("http://www.flymine.org/query/service") 508 >>> model = service.model 509 >>> model.get_class("Gene") 510 <intermine.model.Class: Gene> 511 512 OVERVIEW 513 -------- 514 515 This class represents the data model - ie. an abstraction 516 of the database schema. It can be used to introspect what 517 data is available and how it is inter-related 518 """
519 - def __init__(self, source, service=None):
520 """ 521 Constructor 522 =========== 523 524 >>> model = Model(xml) 525 526 You will most like not need to create a model directly, 527 instead get one from the Service object: 528 529 @see: L{intermine.webservice.Service} 530 531 @param source: the model.xml, as a local file, string, or url 532 """ 533 assert source is not None 534 self.source = source 535 self.service = weakref.proxy(service) if service is not None else service 536 self.classes= {} 537 self.parse_model(source) 538 self.vivify() 539 540 # Make sugary aliases 541 self.table = self.column
542
543 - def parse_model(self, source):
544 """ 545 Create classes, attributes, references and collections from the model.xml 546 ========================================================================= 547 548 The xml can be provided as a file, url or string. This method 549 is called during instantiation - it does not need to be called 550 directly. 551 552 @param source: the model.xml, as a local file, string, or url 553 @raise ModelParseError: if there is a problem parsing the source 554 """ 555 try: 556 io = openAnything(source) 557 doc = minidom.parse(io) 558 for node in doc.getElementsByTagName('model'): 559 self.name = node.getAttribute('name') 560 self.package_name = node.getAttribute('package') 561 assert node.nextSibling is None, "More than one model element" 562 assert self.name and self.package_name, "No model name or package name" 563 564 for c in doc.getElementsByTagName('class'): 565 class_name = c.getAttribute('name') 566 assert class_name, "Name not defined in" + c.toxml() 567 def strip_java_prefix(x): 568 return re.sub(r'.*\.', '', x)
569 parents = map(strip_java_prefix, 570 c.getAttribute('extends').split(' ')) 571 cl = Class(class_name, parents, self) 572 for a in c.getElementsByTagName('attribute'): 573 name = a.getAttribute('name') 574 type_name = strip_java_prefix(a.getAttribute('type')) 575 at = Attribute(name, type_name, cl) 576 cl.field_dict[name] = at 577 for r in c.getElementsByTagName('reference'): 578 name = r.getAttribute('name') 579 type_name = r.getAttribute('referenced-type') 580 linked_field_name = r.getAttribute('reverse-reference') 581 ref = Reference(name, type_name, cl, linked_field_name) 582 cl.field_dict[name] = ref 583 for co in c.getElementsByTagName('collection'): 584 name = co.getAttribute('name') 585 type_name = co.getAttribute('referenced-type') 586 linked_field_name = co.getAttribute('reverse-reference') 587 col = Collection(name, type_name, cl, linked_field_name) 588 cl.field_dict[name] = col 589 self.classes[class_name] = cl 590 except Exception, error: 591 raise ModelParseError("Error parsing model", source, error)
592
593 - def vivify(self):
594 """ 595 Make names point to instances and insert inherited fields 596 ========================================================= 597 598 This method ensures the model is internally consistent. This method 599 is called during instantiaton. It does not need to be called 600 directly. 601 602 @raise ModelError: if the names point to non-existent objects 603 """ 604 for c in self.classes.values(): 605 c.parent_classes = self.to_ancestry(c) 606 for pc in c.parent_classes: 607 c.field_dict.update(pc.field_dict) 608 for f in c.fields: 609 f.type_class = self.classes.get(f.type_name) 610 if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '': 611 rrn = f.reverse_reference_name 612 f.reverse_reference = f.type_class.field_dict[rrn]
613
614 - def to_ancestry(self, cd):
615 """ 616 Returns the lineage of the class 617 ================================ 618 619 >>> classes = Model.to_ancestry(cd) 620 621 Returns the class' parents, and all the class' parents' parents 622 623 @rtype: list(L{intermine.model.Class}) 624 """ 625 parents = cd.parents 626 def defined(x): return x is not None # weeds out the java classes 627 def to_class(x): return self.classes.get(x) 628 ancestry = filter(defined, map(to_class, parents)) 629 for ancestor in ancestry: 630 ancestry.extend(self.to_ancestry(ancestor)) 631 return ancestry
632
633 - def to_classes(self, classnames):
634 """ 635 take a list of class names and return a list of classes 636 ======================================================= 637 638 >>> classes = model.to_classes(["Gene", "Protein", "Organism"]) 639 640 This simply maps from a list of strings to a list of 641 classes in the calling model. 642 643 @raise ModelError: if the list of class names includes ones that don't exist 644 645 @rtype: list(L{intermine.model.Class}) 646 """ 647 return map(self.get_class, classnames)
648
649 - def column(self, path, *rest):
650 return Column(path, self, *rest)
651
652 - def __getattr__(self, name):
653 return self.column(name)
654
655 - def get_class(self, name):
656 """ 657 Get a class by its name, or by a dotted path 658 ============================================ 659 660 >>> model = Model("http://www.flymine.org/query/service/model") 661 >>> model.get_class("Gene") 662 <intermine.model.Class: Gene> 663 >>> model.get_class("Gene.proteins") 664 <intermine.model.Class: Protein> 665 666 This is the recommended way of retrieving a class from 667 the model. As well as handling class names, you can also 668 pass in a path such as "Gene.proteins" and get the 669 corresponding class back (<intermine.model.Class: Protein>) 670 671 @raise ModelError: if the class name refers to a non-existant object 672 673 @rtype: L{intermine.model.Class} 674 """ 675 if name.find(".") != -1: 676 path = self.make_path(name) 677 if path.is_attribute(): 678 raise ModelError("'" + str(path) + "' is not a class") 679 else: 680 return path.get_class() 681 if name in self.classes: 682 return self.classes[name] 683 else: 684 raise ModelError("'" + name + "' is not a class in this model")
685
686 - def make_path(self, path, subclasses={}):
687 """ 688 Return a path object for the given path string 689 ============================================== 690 691 >>> path = Model.make_path("Gene.organism.name") 692 <intermine.model.Path: Gene.organism.name> 693 694 This is recommended manner of constructing path objects. 695 696 @type path: str 697 @type subclasses: dict 698 699 @raise PathParseError: if there is a problem parsing the path string 700 701 @rtype: L{intermine.model.Path} 702 """ 703 return Path(path, self, subclasses)
704
705 - def validate_path(self, path_string, subclasses={}):
706 """ 707 Validate a path 708 =============== 709 710 >>> try: 711 ... model.validate_path("Gene.symbol") 712 ... return "path is valid" 713 ... except PathParseError: 714 ... return "path is invalid" 715 "path is valid" 716 717 When you don't need to interrogate relationships 718 between paths, simply using this method to validate 719 a path string is enough. It guarantees that there 720 is a descriptor for each section of the string, 721 with the appropriate relationships 722 723 @raise PathParseError: if there is a problem parsing the path string 724 """ 725 try: 726 self.parse_path_string(path_string, subclasses) 727 return True 728 except PathParseError, e: 729 raise PathParseError("Error parsing '%s' (subclasses: %s)" 730 % ( path_string, str(subclasses) ), e )
731
732 - def parse_path_string(self, path_string, subclasses={}):
733 """ 734 Parse a path string into a list of descriptors - one for each section 735 ===================================================================== 736 737 >>> parts = Model.parse_path_string(string) 738 739 This method is used when making paths from a model, and 740 when validating path strings. It probably won't need to 741 be called directly. 742 743 @see: L{intermine.model.Model.make_path} 744 @see: L{intermine.model.Model.validate_path} 745 @see: L{intermine.model.Path} 746 """ 747 descriptors = [] 748 names = path_string.split('.') 749 root_name = names.pop(0) 750 751 root_descriptor = self.get_class(root_name) 752 descriptors.append(root_descriptor) 753 754 if root_name in subclasses: 755 current_class = self.get_class(subclasses[root_name]) 756 else: 757 current_class = root_descriptor 758 759 for field_name in names: 760 field = current_class.get_field(field_name) 761 descriptors.append(field) 762 763 if isinstance(field, Reference): 764 key = '.'.join(map(lambda x: x.name, descriptors)) 765 if key in subclasses: 766 current_class = self.get_class(subclasses[key]) 767 else: 768 current_class = field.type_class 769 else: 770 current_class = None 771 772 return descriptors
773
774 -class ModelError(ReadableException):
775 pass
776
777 -class PathParseError(ModelError):
778 pass
779
780 -class ModelParseError(ModelError):
781
782 - def __init__(self, message, source, cause=None):
783 self.source = source 784 super(ModelParseError, self).__init__(message, cause)
785
786 - def __str__(self):
787 base = repr(self.message) + ":" + repr(self.source) 788 if self.cause is None: 789 return base 790 else: 791 return base + repr(self.cause)
792