Package intermine :: Module model
[hide private]
[frames] | no frames]

Source Code for Module intermine.model

  1  from xml.dom import minidom 
  2  import weakref 
  3  import re 
  4   
  5  from intermine.util import openAnything, ReadableException 
  6  from intermine.lists.list import List 
  7   
  8  """ 
  9  Classes representing the data model 
 10  =================================== 
 11   
 12  Representations of tables and columns, and behaviour 
 13  for validating connections between them. 
 14   
 15  """ 
 16   
 17  __author__ = "Alex Kalderimis" 
 18  __organization__ = "InterMine" 
 19  __license__ = "LGPL" 
 20  __contact__ = "dev@intermine.org" 
21 22 23 24 -class Class(object):
25 """ 26 An abstraction of database tables in the data model 27 =================================================== 28 29 These objects refer to the table objects in the 30 InterMine ORM layer. 31 32 SYNOPSIS 33 -------- 34 35 >>> service = Service("http://www.flymine.org/query/service") 36 >>> model = service.model 37 >>> 38 >>> if "Gene" in model.classes: 39 ... gene_cd = model.get_class("Gene") 40 ... print "Gene has", len(gene_cd.fields), "fields" 41 ... for field in gene_cd.fields: 42 ... print " - ", field.name 43 44 OVERVIEW 45 -------- 46 47 Each class can have attributes (columns) of various types, 48 and can have references to other classes (tables), on either 49 a one-to-one (references) or one-to-many (collections) basis 50 51 Classes should not be instantiated by hand, but rather used 52 as part of the model they belong to. 53 54 """
55 - def __init__(self, name, parents, model):
56 """ 57 Constructor - Creates a new Class descriptor 58 ============================================ 59 60 >>> cd = intermine.model.Class("Gene", ["SequenceFeature"]) 61 <intermine.model.Class: Gene> 62 63 This constructor is called when deserialising the 64 model - you should have no need to create Classes by hand 65 66 @param name: The name of this class 67 @param parents: a list of parental names 68 69 """ 70 self.name = name 71 self.parents = parents 72 self.model = model 73 self.parent_classes = [] 74 self.field_dict = {} 75 id = Attribute("id", "Integer", self) # All classes have the id attr 76 self.field_dict["id"] = id
77
78 - def __repr__(self):
79 return '<' + self.__module__ + "." + self.__class__.__name__ + ': ' + self.name + '>'
80 81 @property
82 - def fields(self):
83 """ 84 The fields of this class 85 ======================== 86 87 The fields are returned sorted by name. Fields 88 includes all Attributes, References and Collections 89 90 @rtype: list(L{Field}) 91 """ 92 return sorted(self.field_dict.values(), key=lambda field: field.name)
93 94 @property
95 - def attributes(self):
96 """ 97 The fields of this class which contain data 98 =========================================== 99 100 @rtype: list(L{Attribute}) 101 """ 102 return filter(lambda x: isinstance(x, Attribute), self.fields)
103 104 @property
105 - def references(self):
106 """ 107 fields which reference other objects 108 ==================================== 109 110 @rtype: list(L{Reference}) 111 """ 112 def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection) 113 return filter(isRef, self.fields)
114 115 @property
116 - def collections(self):
117 """ 118 fields which reference many other objects 119 ========================================= 120 121 @rtype: list(L{Collection}) 122 """ 123 return filter(lambda x: isinstance(x, Collection), self.fields)
124
125 - def get_field(self, name):
126 """ 127 Get a field by name 128 =================== 129 130 The standard way of retrieving a field 131 132 @raise ModelError: if the Class does not have such a field 133 134 @rtype: subclass of L{intermine.model.Field} 135 """ 136 if name in self.field_dict: 137 return self.field_dict[name] 138 else: 139 raise ModelError("There is no field called %s in %s" % (name, self.name))
140
141 - def isa(self, other):
142 """ 143 Check if self is, or inherits from other 144 ======================================== 145 146 This method validates statements about inheritance. 147 Returns true if the "other" is, or is within the 148 ancestry of, this class 149 150 Other can be passed as a name (str), or as the class object itself 151 152 @rtype: boolean 153 """ 154 if isinstance(other, Class): 155 other_name = other.name 156 else: 157 other_name = other 158 if self.name == other_name: 159 return True 160 if other_name in self.parents: 161 return True 162 for p in self.parent_classes: 163 if p.isa(other): 164 return True 165 return False
166
167 168 -class Field(object):
169 """ 170 A class representing columns on database tables 171 =============================================== 172 173 The base class for attributes, references and collections. All 174 columns in DB tables are represented by fields 175 176 SYNOPSIS 177 -------- 178 179 >>> service = Service("http://www.flymine.org/query/service") 180 >>> model = service.model 181 >>> cd = model.get_class("Gene") 182 >>> print "Gene has", len(cd.fields), "fields" 183 >>> for field in gene_cd.fields: 184 ... print " - ", field 185 Gene has 45 fields 186 - CDSs is a group of CDS objects, which link back to this as gene 187 - GLEANRsymbol is a String 188 - UTRs is a group of UTR objects, which link back to this as gene 189 - alleles is a group of Allele objects, which link back to this as gene 190 - chromosome is a Chromosome 191 - chromosomeLocation is a Location 192 - clones is a group of CDNAClone objects, which link back to this as gene 193 - crossReferences is a group of CrossReference objects, which link back to this as subject 194 - cytoLocation is a String 195 - dataSets is a group of DataSet objects, which link back to this as bioEntities 196 - downstreamIntergenicRegion is a IntergenicRegion 197 - exons is a group of Exon objects, which link back to this as gene 198 - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene 199 - goAnnotation is a group of GOAnnotation objects 200 - homologues is a group of Homologue objects, which link back to this as gene 201 - id is a Integer 202 - interactions is a group of Interaction objects, which link back to this as gene 203 - length is a Integer 204 ... 205 206 @see: L{Attribute} 207 @see: L{Reference} 208 @see: L{Collection} 209 """
210 - def __init__(self, name, type_name, class_origin):
211 """ 212 Constructor - DO NOT USE 213 ======================== 214 215 THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY 216 217 you are unlikely to need to do 218 so anyway: it is recommended you access fields 219 through the classes generated by the model 220 221 @param name: The name of the reference 222 @param type_name: The name of the model.Class this refers to 223 @param class_origin: The model.Class this was declared in 224 225 """ 226 self.name = name 227 self.type_name = type_name 228 self.type_class = None 229 self.declared_in = class_origin
230 - def toString(self):
231 return self.name + " is a " + self.type_name
232 - def __str__(self):
233 return self.toString()
234
235 236 -class Attribute(Field):
237 """ 238 Attributes represent columns that contain actual data 239 ===================================================== 240 241 The Attribute class inherits all the behaviour of L{intermine.model.Field} 242 """ 243 pass
244
245 -class Reference(Field):
246 """ 247 References represent columns that refer to records in other tables 248 ================================================================== 249 250 In addition the the behaviour and properties of Field, references 251 may also have a reverse reference, if the other record points 252 back to this one as well. And all references will have their 253 type upgraded to a type_class during parsing 254 """
255 - def __init__(self, name, type_name, class_origin, reverse_ref=None):
256 """ 257 Constructor 258 =========== 259 260 In addition to the a parameters of Field, Reference also 261 takes an optional reverse reference name (str) 262 263 @param name: The name of the reference 264 @param type_name: The name of the model.Class this refers to 265 @param class_origin: The model.Class this was declared in 266 @param reverse_ref: The name of the reverse reference (default: None) 267 268 """ 269 self.reverse_reference_name = reverse_ref 270 super(Reference, self).__init__(name, type_name, class_origin) 271 self.reverse_reference = None
272 - def toString(self):
273 """ 274 Return a string representation 275 ============================== 276 277 @rtype: str 278 """ 279 s = super(Reference, self).toString() 280 if self.reverse_reference is None: 281 return s 282 else: 283 return s + ", which links back to this as " + self.reverse_reference.name
284
285 -class Collection(Reference):
286 """ 287 Collections are references which refer to groups of objects 288 =========================================================== 289 290 Collections have all the same behaviour and properties as References 291 """
292 - def toString(self):
293 """Return a string representation""" 294 ret = super(Collection, self).toString().replace(" is a ", " is a group of ") 295 if self.reverse_reference is None: 296 return ret + " objects" 297 else: 298 return ret.replace(", which links", " objects, which link")
299
300 301 -class Path(object):
302 """ 303 A class representing a validated dotted string path 304 =================================================== 305 306 A path represents a connection between records and fields 307 308 SYNOPSIS 309 -------- 310 311 >>> service = Service("http://www.flymine.org/query/service") 312 model = service.model 313 path = model.make_path("Gene.organism.name") 314 path.is_attribute() 315 ... True 316 >>> path2 = model.make_path("Gene.proteins") 317 path2.is_attribute() 318 ... False 319 >>> path2.is_reference() 320 ... True 321 >>> path2.get_class() 322 ... <intermine.model.Class: gene> 323 324 OVERVIEW 325 -------- 326 327 This class is used for performing validation on dotted path strings. 328 The simple act of parsing it into existence will validate the path 329 to some extent, but there are additional methods for verifying certain 330 relationships as well 331 """
332 - def __init__(self, path, model, subclasses={}):
333 """ 334 Constructor 335 =========== 336 337 >>> path = Path("Gene.name", model) 338 339 You will not need to use this constructor directly. Instead, 340 use the "make_path" method on the model to construct paths for you. 341 342 @param path: the dotted path string (eg: Gene.proteins.name) 343 @type path: str 344 @param model: the model to validate the path against 345 @type model: L{Model} 346 @param subclasses: a dict which maps subclasses (defaults to an empty dict) 347 @type subclasses: dict 348 """ 349 self.model = weakref.proxy(model) 350 if isinstance(path, Class): 351 self._string = path.name 352 self.parts = [path] 353 else: 354 self._string = str(path) 355 self.parts = model.parse_path_string(str(path), subclasses)
356
357 - def __str__(self):
358 return self._string
359
360 - def __repr__(self):
361 return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>'
362 363 @property
364 - def root(self):
365 """ 366 The descriptor for the first part of the string. This should always a class descriptor. 367 368 @rtype: L{intermine.model.Class} 369 """ 370 return self.parts[0]
371 372 @property
373 - def end(self):
374 """ 375 The descriptor for the last part of the string. 376 377 @rtype: L{model.Class} or L{model.Field} 378 """ 379 return self.parts[-1]
380
381 - def get_class(self):
382 """ 383 Return the class object for this path, if it refers to a class 384 or a reference. Attribute paths return None 385 386 @rtype: L{model.Class} 387 """ 388 if self.is_class(): 389 return self.end 390 elif self.is_reference(): 391 return self.end.type_class 392 else: 393 return None
394 end_class = property(get_class) 395
396 - def is_reference(self):
397 """ 398 Return true if the path is a reference, eg: Gene.organism or Gene.proteins 399 Note: Collections are ALSO references 400 401 @rtype: boolean 402 """ 403 return isinstance(self.end, Reference)
404
405 - def is_class(self):
406 """ 407 Return true if the path just refers to a class, eg: Gene 408 409 @rtype: boolean 410 """ 411 return isinstance(self.end, Class)
412
413 - def is_attribute(self):
414 """ 415 Return true if the path refers to an attribute, eg: Gene.length 416 417 @rtype: boolean 418 """ 419 return isinstance(self.end, Attribute)
420
421 -class Column(object):
422
423 - def __init__(self, path, model, subclasses={}, query=None):
424 self._model = model 425 self._query = query 426 self._subclasses = subclasses 427 if isinstance(path, Path): 428 self._path = path 429 else: 430 self._path = model.make_path(path, subclasses)
431
432 - def select(self, *cols):
433 q = self._model.service.new_query(str(self)) 434 q.select(*cols) 435 return q
436
437 - def __getattr__(self, name):
438 cld = self._path.get_class() 439 if cld is not None: 440 try: 441 fld = cld.get_field(name) 442 return Column(str(self) + "." + name, self._model, self._subclasses, self._query) 443 except ModelError, e: 444 raise AttributeError(str(e)) 445 raise AttributeError("No attribute '" + name + "'")
446
447 - def __str__(self):
448 return str(self._path)
449
450 - def __mod__(self, other):
451 if isinstance(other, tuple): 452 l = [str(self), "LOOKUP"] 453 l.extend(other) 454 return tuple(l) 455 else: 456 return (str(self), 'LOOKUP', str(other))
457
458 - def __rshift__(self, other):
459 return (str(self), str(other))
460
461 - def __eq__(self, other):
462 if isinstance(other, Column): 463 return (str(self), "IS", str(other)) 464 elif other is None: 465 return (str(self), "IS NULL") 466 elif isinstance(other, list): 467 return (str(self), "ONE OF", other) 468 elif isinstance(other, List): 469 return (str(self), "IN", other.name) 470 else: 471 return (str(self), "=", other)
472
473 - def __ne__(self, other):
474 if isinstance(other, Column): 475 return (str(self), "IS NOT", str(other)) 476 elif other is None: 477 return (str(self), "IS NOT NULL") 478 elif isinstance(other, list): 479 return (str(self), "NONE OF", other) 480 elif isinstance(other, List): 481 return (str(self), "NOT IN", other.name) 482 else: 483 return (str(self), "!=", other)
484
485 - def __lt__(self, other):
486 return (str(self), "<", other)
487
488 - def __le__(self, other):
489 return (str(self), "<=", other)
490
491 - def __gt__(self, other):
492 return (str(self), ">", other)
493
494 - def __ge__(self, other):
495 return (str(self), ">=", other)
496
497 -class Model(object):
498 """ 499 A class for representing the data model of an InterMine datawarehouse 500 ===================================================================== 501 502 An abstraction of the database schema 503 504 SYNOPSIS 505 -------- 506 507 >>> service = Service("http://www.flymine.org/query/service") 508 >>> model = service.model 509 >>> model.get_class("Gene") 510 <intermine.model.Class: Gene> 511 512 OVERVIEW 513 -------- 514 515 This class represents the data model - ie. an abstraction 516 of the database schema. It can be used to introspect what 517 data is available and how it is inter-related 518 """
519 - def __init__(self, source, service=None):
520 """ 521 Constructor 522 =========== 523 524 >>> model = Model(xml) 525 526 You will most like not need to create a model directly, 527 instead get one from the Service object: 528 529 @see: L{intermine.webservice.Service} 530 531 @param source: the model.xml, as a local file, string, or url 532 """ 533 assert source is not None 534 self.source = source 535 if service is not None: 536 self.service = weakref.proxy(service) 537 else: 538 self.service = service 539 self.classes= {} 540 self.parse_model(source) 541 self.vivify() 542 543 # Make sugary aliases 544 self.table = self.column
545
546 - def parse_model(self, source):
547 """ 548 Create classes, attributes, references and collections from the model.xml 549 ========================================================================= 550 551 The xml can be provided as a file, url or string. This method 552 is called during instantiation - it does not need to be called 553 directly. 554 555 @param source: the model.xml, as a local file, string, or url 556 @raise ModelParseError: if there is a problem parsing the source 557 """ 558 try: 559 io = openAnything(source) 560 doc = minidom.parse(io) 561 for node in doc.getElementsByTagName('model'): 562 self.name = node.getAttribute('name') 563 self.package_name = node.getAttribute('package') 564 assert node.nextSibling is None, "More than one model element" 565 assert self.name and self.package_name, "No model name or package name" 566 567 for c in doc.getElementsByTagName('class'): 568 class_name = c.getAttribute('name') 569 assert class_name, "Name not defined in" + c.toxml() 570 def strip_java_prefix(x): 571 return re.sub(r'.*\.', '', x)
572 parents = map(strip_java_prefix, 573 c.getAttribute('extends').split(' ')) 574 cl = Class(class_name, parents, self) 575 for a in c.getElementsByTagName('attribute'): 576 name = a.getAttribute('name') 577 type_name = strip_java_prefix(a.getAttribute('type')) 578 at = Attribute(name, type_name, cl) 579 cl.field_dict[name] = at 580 for r in c.getElementsByTagName('reference'): 581 name = r.getAttribute('name') 582 type_name = r.getAttribute('referenced-type') 583 linked_field_name = r.getAttribute('reverse-reference') 584 ref = Reference(name, type_name, cl, linked_field_name) 585 cl.field_dict[name] = ref 586 for co in c.getElementsByTagName('collection'): 587 name = co.getAttribute('name') 588 type_name = co.getAttribute('referenced-type') 589 linked_field_name = co.getAttribute('reverse-reference') 590 col = Collection(name, type_name, cl, linked_field_name) 591 cl.field_dict[name] = col 592 self.classes[class_name] = cl 593 except Exception, error: 594 raise ModelParseError("Error parsing model", source, error)
595
596 - def vivify(self):
597 """ 598 Make names point to instances and insert inherited fields 599 ========================================================= 600 601 This method ensures the model is internally consistent. This method 602 is called during instantiaton. It does not need to be called 603 directly. 604 605 @raise ModelError: if the names point to non-existent objects 606 """ 607 for c in self.classes.values(): 608 c.parent_classes = self.to_ancestry(c) 609 for pc in c.parent_classes: 610 c.field_dict.update(pc.field_dict) 611 for f in c.fields: 612 f.type_class = self.classes.get(f.type_name) 613 if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '': 614 rrn = f.reverse_reference_name 615 f.reverse_reference = f.type_class.field_dict[rrn]
616
617 - def to_ancestry(self, cd):
618 """ 619 Returns the lineage of the class 620 ================================ 621 622 >>> classes = Model.to_ancestry(cd) 623 624 Returns the class' parents, and all the class' parents' parents 625 626 @rtype: list(L{intermine.model.Class}) 627 """ 628 parents = cd.parents 629 def defined(x): return x is not None # weeds out the java classes 630 def to_class(x): return self.classes.get(x) 631 ancestry = filter(defined, map(to_class, parents)) 632 for ancestor in ancestry: 633 ancestry.extend(self.to_ancestry(ancestor)) 634 return ancestry
635
636 - def to_classes(self, classnames):
637 """ 638 take a list of class names and return a list of classes 639 ======================================================= 640 641 >>> classes = model.to_classes(["Gene", "Protein", "Organism"]) 642 643 This simply maps from a list of strings to a list of 644 classes in the calling model. 645 646 @raise ModelError: if the list of class names includes ones that don't exist 647 648 @rtype: list(L{intermine.model.Class}) 649 """ 650 return map(self.get_class, classnames)
651
652 - def column(self, path, *rest):
653 return Column(path, self, *rest)
654
655 - def __getattr__(self, name):
656 return self.column(name)
657
658 - def get_class(self, name):
659 """ 660 Get a class by its name, or by a dotted path 661 ============================================ 662 663 >>> model = Model("http://www.flymine.org/query/service/model") 664 >>> model.get_class("Gene") 665 <intermine.model.Class: Gene> 666 >>> model.get_class("Gene.proteins") 667 <intermine.model.Class: Protein> 668 669 This is the recommended way of retrieving a class from 670 the model. As well as handling class names, you can also 671 pass in a path such as "Gene.proteins" and get the 672 corresponding class back (<intermine.model.Class: Protein>) 673 674 @raise ModelError: if the class name refers to a non-existant object 675 676 @rtype: L{intermine.model.Class} 677 """ 678 if name.find(".") != -1: 679 path = self.make_path(name) 680 if path.is_attribute(): 681 raise ModelError("'" + str(path) + "' is not a class") 682 else: 683 return path.get_class() 684 if name in self.classes: 685 return self.classes[name] 686 else: 687 raise ModelError("'" + name + "' is not a class in this model")
688
689 - def make_path(self, path, subclasses={}):
690 """ 691 Return a path object for the given path string 692 ============================================== 693 694 >>> path = Model.make_path("Gene.organism.name") 695 <intermine.model.Path: Gene.organism.name> 696 697 This is recommended manner of constructing path objects. 698 699 @type path: str 700 @type subclasses: dict 701 702 @raise PathParseError: if there is a problem parsing the path string 703 704 @rtype: L{intermine.model.Path} 705 """ 706 return Path(path, self, subclasses)
707
708 - def validate_path(self, path_string, subclasses={}):
709 """ 710 Validate a path 711 =============== 712 713 >>> try: 714 ... model.validate_path("Gene.symbol") 715 ... return "path is valid" 716 ... except PathParseError: 717 ... return "path is invalid" 718 "path is valid" 719 720 When you don't need to interrogate relationships 721 between paths, simply using this method to validate 722 a path string is enough. It guarantees that there 723 is a descriptor for each section of the string, 724 with the appropriate relationships 725 726 @raise PathParseError: if there is a problem parsing the path string 727 """ 728 try: 729 self.parse_path_string(path_string, subclasses) 730 return True 731 except PathParseError, e: 732 raise PathParseError("Error parsing '%s' (subclasses: %s)" 733 % ( path_string, str(subclasses) ), e )
734
735 - def parse_path_string(self, path_string, subclasses={}):
736 """ 737 Parse a path string into a list of descriptors - one for each section 738 ===================================================================== 739 740 >>> parts = Model.parse_path_string(string) 741 742 This method is used when making paths from a model, and 743 when validating path strings. It probably won't need to 744 be called directly. 745 746 @see: L{intermine.model.Model.make_path} 747 @see: L{intermine.model.Model.validate_path} 748 @see: L{intermine.model.Path} 749 """ 750 descriptors = [] 751 names = path_string.split('.') 752 root_name = names.pop(0) 753 754 root_descriptor = self.get_class(root_name) 755 descriptors.append(root_descriptor) 756 757 if root_name in subclasses: 758 current_class = self.get_class(subclasses[root_name]) 759 else: 760 current_class = root_descriptor 761 762 for field_name in names: 763 field = current_class.get_field(field_name) 764 descriptors.append(field) 765 766 if isinstance(field, Reference): 767 key = '.'.join(map(lambda x: x.name, descriptors)) 768 if key in subclasses: 769 current_class = self.get_class(subclasses[key]) 770 else: 771 current_class = field.type_class 772 else: 773 current_class = None 774 775 return descriptors
776
777 -class ModelError(ReadableException):
778 pass
779
780 -class PathParseError(ModelError):
781 pass
782
783 -class ModelParseError(ModelError):
784
785 - def __init__(self, message, source, cause=None):
786 self.source = source 787 super(ModelParseError, self).__init__(message, cause)
788
789 - def __str__(self):
790 base = repr(self.message) + ":" + repr(self.source) 791 if self.cause is None: 792 return base 793 else: 794 return base + repr(self.cause)
795