Package intermine :: Module model
[hide private]
[frames] | no frames]

Source Code for Module intermine.model

  1  from xml.dom import minidom 
  2  import re 
  3   
  4  from .util import openAnything, ReadableException 
  5   
  6  """ 
  7  Classes representing the data model 
  8  =================================== 
  9   
 10  Representations of tables and columns, and behaviour 
 11  for validating connections between them. 
 12   
 13  """ 
 14   
 15  __author__ = "Alex Kalderimis" 
 16  __organization__ = "InterMine" 
 17  __license__ = "LGPL" 
 18  __contact__ = "dev@intermine.org" 
19 20 21 22 -class Class(object):
23 """ 24 An abstraction of database tables in the data model 25 =================================================== 26 27 These objects refer to the table objects in the 28 InterMine ORM layer. 29 30 SYNOPSIS 31 -------- 32 33 >>> service = Service("http://www.flymine.org/query/service") 34 >>> model = service.model 35 >>> 36 >>> if "Gene" in model.classes: 37 ... gene_cd = model.get_class("Gene") 38 ... print "Gene has", len(gene_cd.fields), "fields" 39 ... for field in gene_cd.fields: 40 ... print " - ", field.name 41 42 OVERVIEW 43 -------- 44 45 Each class can have attributes (columns) of various types, 46 and can have references to other classes (tables), on either 47 a one-to-one (references) or one-to-many (collections) basis 48 49 Classes should not be instantiated by hand, but rather used 50 as part of the model they belong to. 51 52 """
53 - def __init__(self, name, parents):
54 """ 55 Constructor - Creates a new Class descriptor 56 ============================================ 57 58 >>> cd = intermine.model.Class("Gene", ["SequenceFeature"]) 59 <intermine.model.Class: Gene> 60 61 This constructor is called when deserialising the 62 model - you should have no need to create Classes by hand 63 64 @param name: The name of this class 65 @param parents: a list of parental names 66 67 """ 68 self.name = name 69 self.parents = parents 70 self.parent_classes = [] 71 self.field_dict = {} 72 id = Attribute("id", "Integer", self) # All classes have the id attr 73 self.field_dict["id"] = id
74
75 - def __repr__(self):
76 return '<' + self.__module__ + "." + self.__class__.__name__ + ': ' + self.name + '>'
77 78 @property
79 - def fields(self):
80 """ 81 The fields of this class 82 ======================== 83 84 The fields are returned sorted by name. Fields 85 includes all Attributes, References and Collections 86 87 @rtype: list(L{Field}) 88 """ 89 return sorted(self.field_dict.values(), key=lambda field: field.name)
90 91 @property
92 - def attributes(self):
93 """ 94 The fields of this class which contain data 95 =========================================== 96 97 @rtype: list(L{Attribute}) 98 """ 99 return filter(lambda x: isinstance(x, Attribute), self.fields)
100 101 @property
102 - def references(self):
103 """ 104 fields which reference other objects 105 ==================================== 106 107 @rtype: list(L{Reference}) 108 """ 109 def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection) 110 return filter(isRef, self.fields)
111 112 @property
113 - def collections(self):
114 """ 115 fields which reference many other objects 116 ========================================= 117 118 @rtype: list(L{Collection}) 119 """ 120 return filter(lambda x: isinstance(x, Collection), self.fields)
121
122 - def get_field(self, name):
123 """ 124 Get a field by name 125 =================== 126 127 The standard way of retrieving a field 128 129 @raise ModelError: if the Class does not have such a field 130 131 @rtype: subclass of L{intermine.model.Field} 132 """ 133 if name in self.field_dict: 134 return self.field_dict[name] 135 else: 136 raise ModelError("There is no field called %s in %s" % (name, self.name))
137
138 - def isa(self, other):
139 """ 140 Check if self is, or inherits from other 141 ======================================== 142 143 This method validates statements about inheritance. 144 Returns true if the "other" is, or is within the 145 ancestry of, this class 146 147 Other can be passed as a name (str), or as the class object itself 148 149 @rtype: boolean 150 """ 151 if isinstance(other, Class): 152 other_name = other.name 153 else: 154 other_name = other 155 if self.name == other_name: 156 return True 157 if other_name in self.parents: 158 return True 159 for p in self.parent_classes: 160 if p.isa(other): 161 return True 162 return False
163
164 165 -class Field(object):
166 """ 167 A class representing columns on database tables 168 =============================================== 169 170 The base class for attributes, references and collections. All 171 columns in DB tables are represented by fields 172 173 SYNOPSIS 174 -------- 175 176 >>> service = Service("http://www.flymine.org/query/service") 177 >>> model = service.model 178 >>> cd = model.get_class("Gene") 179 >>> print "Gene has", len(cd.fields), "fields" 180 >>> for field in gene_cd.fields: 181 ... print " - ", field 182 Gene has 45 fields 183 - CDSs is a group of CDS objects, which link back to this as gene 184 - GLEANRsymbol is a String 185 - UTRs is a group of UTR objects, which link back to this as gene 186 - alleles is a group of Allele objects, which link back to this as gene 187 - chromosome is a Chromosome 188 - chromosomeLocation is a Location 189 - clones is a group of CDNAClone objects, which link back to this as gene 190 - crossReferences is a group of CrossReference objects, which link back to this as subject 191 - cytoLocation is a String 192 - dataSets is a group of DataSet objects, which link back to this as bioEntities 193 - downstreamIntergenicRegion is a IntergenicRegion 194 - exons is a group of Exon objects, which link back to this as gene 195 - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene 196 - goAnnotation is a group of GOAnnotation objects 197 - homologues is a group of Homologue objects, which link back to this as gene 198 - id is a Integer 199 - interactions is a group of Interaction objects, which link back to this as gene 200 - length is a Integer 201 ... 202 203 @see: L{Attribute} 204 @see: L{Reference} 205 @see: L{Collection} 206 """
207 - def __init__(self, name, type_name, class_origin):
208 """ 209 Constructor - DO NOT USE 210 ======================== 211 212 THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY 213 214 you are unlikely to need to do 215 so anyway: it is recommended you access fields 216 through the classes generated by the model 217 218 @param name: The name of the reference 219 @param type_name: The name of the model.Class this refers to 220 @param class_origin: The model.Class this was declared in 221 222 """ 223 self.name = name 224 self.type_name = type_name 225 self.type_class = None 226 self.declared_in = class_origin
227 - def toString(self):
228 return self.name + " is a " + self.type_name
229 - def __str__(self):
230 return self.toString()
231
232 233 -class Attribute(Field):
234 """ 235 Attributes represent columns that contain actual data 236 ===================================================== 237 238 The Attribute class inherits all the behaviour of L{intermine.model.Field} 239 """ 240 pass
241
242 -class Reference(Field):
243 """ 244 References represent columns that refer to records in other tables 245 ================================================================== 246 247 In addition the the behaviour and properties of Field, references 248 may also have a reverse reference, if the other record points 249 back to this one as well. And all references will have their 250 type upgraded to a type_class during parsing 251 """
252 - def __init__(self, name, type_name, class_origin, reverse_ref=None):
253 """ 254 Constructor 255 =========== 256 257 In addition to the a parameters of Field, Reference also 258 takes an optional reverse reference name (str) 259 260 @param name: The name of the reference 261 @param type_name: The name of the model.Class this refers to 262 @param class_origin: The model.Class this was declared in 263 @param reverse_ref: The name of the reverse reference (default: None) 264 265 """ 266 self.reverse_reference_name = reverse_ref 267 super(Reference, self).__init__(name, type_name, class_origin) 268 self.reverse_reference = None
269 - def toString(self):
270 """ 271 Return a string representation 272 ============================== 273 274 @rtype: str 275 """ 276 s = super(Reference, self).toString() 277 if self.reverse_reference is None: 278 return s 279 else: 280 return s + ", which links back to this as " + self.reverse_reference.name
281
282 -class Collection(Reference):
283 """ 284 Collections are references which refer to groups of objects 285 =========================================================== 286 287 Collections have all the same behaviour and properties as References 288 """
289 - def toString(self):
290 """Return a string representation""" 291 ret = super(Collection, self).toString().replace(" is a ", " is a group of ") 292 if self.reverse_reference is None: 293 return ret + " objects" 294 else: 295 return ret.replace(", which links", " objects, which link")
296
297 298 -class Path(object):
299 """ 300 A class representing a validated dotted string path 301 =================================================== 302 303 A path represents a connection between records and fields 304 305 SYNOPSIS 306 -------- 307 308 >>> service = Service("http://www.flymine.org/query/service") 309 model = service.model 310 path = model.make_path("Gene.organism.name") 311 path.is_attribute() 312 ... True 313 >>> path2 = model.make_path("Gene.proteins") 314 path2.is_attribute() 315 ... False 316 >>> path2.is_reference() 317 ... True 318 >>> path2.get_class() 319 ... <intermine.model.Class: gene> 320 321 OVERVIEW 322 -------- 323 324 This class is used for performing validation on dotted path strings. 325 The simple act of parsing it into existence will validate the path 326 to some extent, but there are additional methods for verifying certain 327 relationships as well 328 """
329 - def __init__(self, path_string, model, subclasses={}):
330 """ 331 Constructor 332 =========== 333 334 >>> path = Path("Gene.name", model) 335 336 You will not need to use this constructor directly. Instead, 337 use the "make_path" method on the model to construct paths for you. 338 339 @param path_string: the dotted path string (eg: Gene.proteins.name) 340 @type path_string: str 341 @param model: the model to validate the path against 342 @type model: L{Model} 343 @param subclasses: a dict which maps subclasses (defaults to an empty dict) 344 @type subclasses: dict 345 """ 346 self._string = path_string 347 self.parts = model.parse_path_string(path_string, subclasses)
348
349 - def __str__(self):
350 return self._string
351
352 - def __repr__(self):
353 return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>'
354 355 @property
356 - def end(self):
357 """ 358 The descriptor for the last part of the string. 359 360 @rtype: L{model.Class} or L{model.Field} 361 """ 362 return self.parts[-1]
363
364 - def get_class(self):
365 """ 366 Return the class object for this path, if it refers to a class 367 or a reference. Attribute paths return None 368 369 @rtype: L{model.Class} 370 """ 371 if self.is_class(): 372 return self.end 373 elif self.is_reference(): 374 return self.end.type_class 375 else: 376 return None
377
378 - def is_reference(self):
379 """ 380 Return true if the path is a reference, eg: Gene.organism or Gene.proteins 381 Note: Collections are ALSO references 382 383 @rtype: boolean 384 """ 385 return isinstance(self.end, Reference)
386
387 - def is_class(self):
388 """ 389 Return true if the path just refers to a class, eg: Gene 390 391 @rtype: boolean 392 """ 393 return isinstance(self.end, Class)
394
395 - def is_attribute(self):
396 """ 397 Return true if the path refers to an attribute, eg: Gene.length 398 399 @rtype: boolean 400 """ 401 return isinstance(self.end, Attribute)
402
403 -class Model(object):
404 """ 405 A class for representing the data model of an InterMine datawarehouse 406 ===================================================================== 407 408 An abstraction of the database schema 409 410 SYNOPSIS 411 -------- 412 413 >>> service = Service("http://www.flymine.org/query/service") 414 >>> model = service.model 415 >>> model.get_class("Gene") 416 <intermine.model.Class: Gene> 417 418 OVERVIEW 419 -------- 420 421 This class represents the data model - ie. an abstraction 422 of the database schema. It can be used to introspect what 423 data is available and how it is inter-related 424 """
425 - def __init__(self, source):
426 """ 427 Constructor 428 =========== 429 430 >>> model = Model(xml) 431 432 You will most like not need to create a model directly, 433 instead get one from the Service object: 434 435 @see: L{intermine.webservice.Service} 436 437 @param source: the model.xml, as a local file, string, or url 438 """ 439 assert source is not None 440 self.source = source 441 self.classes= {} 442 self.parse_model(source) 443 self.vivify()
444
445 - def parse_model(self, source):
446 """ 447 Create classes, attributes, references and collections from the model.xml 448 ========================================================================= 449 450 The xml can be provided as a file, url or string. This method 451 is called during instantiation - it does not need to be called 452 directly. 453 454 @param source: the model.xml, as a local file, string, or url 455 @raise ModelParseError: if there is a problem parsing the source 456 """ 457 try: 458 io = openAnything(source) 459 doc = minidom.parse(io) 460 for node in doc.getElementsByTagName('model'): 461 self.name = node.getAttribute('name') 462 self.package_name = node.getAttribute('package') 463 assert node.nextSibling is None, "More than one model element" 464 assert self.name and self.package_name, "No model name or package name" 465 466 for c in doc.getElementsByTagName('class'): 467 class_name = c.getAttribute('name') 468 assert class_name, "Name not defined in" + c.toxml() 469 def strip_java_prefix(x): 470 return re.sub(r'.*\.', '', x)
471 parents = map(strip_java_prefix, 472 c.getAttribute('extends').split(' ')) 473 cl = Class(class_name, parents) 474 for a in c.getElementsByTagName('attribute'): 475 name = a.getAttribute('name') 476 type_name = strip_java_prefix(a.getAttribute('type')) 477 at = Attribute(name, type_name, cl) 478 cl.field_dict[name] = at 479 for r in c.getElementsByTagName('reference'): 480 name = r.getAttribute('name') 481 type_name = r.getAttribute('referenced-type') 482 linked_field_name = r.getAttribute('reverse-reference') 483 ref = Reference(name, type_name, cl, linked_field_name) 484 cl.field_dict[name] = ref 485 for co in c.getElementsByTagName('collection'): 486 name = co.getAttribute('name') 487 type_name = co.getAttribute('referenced-type') 488 linked_field_name = co.getAttribute('reverse-reference') 489 col = Collection(name, type_name, cl, linked_field_name) 490 cl.field_dict[name] = col 491 self.classes[class_name] = cl 492 except Exception, error: 493 raise ModelParseError("Error parsing model", source, error)
494
495 - def vivify(self):
496 """ 497 Make names point to instances and insert inherited fields 498 ========================================================= 499 500 This method ensures the model is internally consistent. This method 501 is called during instantiaton. It does not need to be called 502 directly. 503 504 @raise ModelError: if the names point to non-existent objects 505 """ 506 for c in self.classes.values(): 507 c.parent_classes = self.to_ancestry(c) 508 for pc in c.parent_classes: 509 c.field_dict.update(pc.field_dict) 510 for f in c.fields: 511 f.type_class = self.classes.get(f.type_name) 512 if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '': 513 rrn = f.reverse_reference_name 514 f.reverse_reference = f.type_class.field_dict[rrn]
515
516 - def to_ancestry(self, cd):
517 """ 518 Returns the lineage of the class 519 ================================ 520 521 >>> classes = Model.to_ancestry(cd) 522 523 Returns the class' parents, and all the class' parents' parents 524 525 @rtype: list(L{intermine.model.Class}) 526 """ 527 parents = cd.parents 528 def defined(x): return x is not None # weeds out the java classes 529 def to_class(x): return self.classes.get(x) 530 ancestry = filter(defined, map(to_class, parents)) 531 for ancestor in ancestry: 532 ancestry.extend(self.to_ancestry(ancestor)) 533 return ancestry
534
535 - def to_classes(self, classnames):
536 """ 537 take a list of class names and return a list of classes 538 ======================================================= 539 540 >>> classes = model.to_classes(["Gene", "Protein", "Organism"]) 541 542 This simply maps from a list of strings to a list of 543 classes in the calling model. 544 545 @raise ModelError: if the list of class names includes ones that don't exist 546 547 @rtype: list(L{intermine.model.Class}) 548 """ 549 return map(self.get_class, classnames)
550
551 - def get_class(self, name):
552 """ 553 Get a class by its name, or by a dotted path 554 ============================================ 555 556 >>> model = Model("http://www.flymine.org/query/service/model") 557 >>> model.get_class("Gene") 558 <intermine.model.Class: Gene> 559 >>> model.get_class("Gene.proteins") 560 <intermine.model.Class: Protein> 561 562 This is the recommended way of retrieving a class from 563 the model. As well as handling class names, you can also 564 pass in a path such as "Gene.proteins" and get the 565 corresponding class back (<intermine.model.Class: Protein>) 566 567 @raise ModelError: if the class name refers to a non-existant object 568 569 @rtype: L{intermine.model.Class} 570 """ 571 if name.find(".") != -1: 572 path = self.make_path(name) 573 if path.is_attribute(): 574 raise ModelError("'" + str(path) + "' is not a class") 575 else: 576 return path.get_class() 577 if name in self.classes: 578 return self.classes[name] 579 else: 580 raise ModelError("'" + name + "' is not a class in this model")
581
582 - def make_path(self, path, subclasses={}):
583 """ 584 Return a path object for the given path string 585 ============================================== 586 587 >>> path = Model.make_path("Gene.organism.name") 588 <intermine.model.Path: Gene.organism.name> 589 590 This is recommended manner of constructing path objects. 591 592 @type path: str 593 @type subclasses: dict 594 595 @raise PathParseError: if there is a problem parsing the path string 596 597 @rtype: L{intermine.model.Path} 598 """ 599 return Path(path, self, subclasses)
600
601 - def validate_path(self, path_string, subclasses={}):
602 """ 603 Validate a path 604 =============== 605 606 >>> try: 607 ... model.validate_path("Gene.symbol") 608 ... return "path is valid" 609 ... except PathParseError: 610 ... return "path is invalid" 611 "path is valid" 612 613 When you don't need to interrogate relationships 614 between paths, simply using this method to validate 615 a path string is enough. It guarantees that there 616 is a descriptor for each section of the string, 617 with the appropriate relationships 618 619 @raise PathParseError: if there is a problem parsing the path string 620 """ 621 try: 622 self.parse_path_string(path_string, subclasses) 623 return True 624 except PathParseError, e: 625 raise PathParseError("Error parsing '%s' (subclasses: %s)" 626 % ( path_string, str(subclasses) ), e )
627
628 - def parse_path_string(self, path_string, subclasses={}):
629 """ 630 Parse a path string into a list of descriptors - one for each section 631 ===================================================================== 632 633 >>> parts = Model.parse_path_string(string) 634 635 This method is used when making paths from a model, and 636 when validating path strings. It probably won't need to 637 be called directly. 638 639 @see: L{intermine.model.Model.make_path} 640 @see: L{intermine.model.Model.validate_path} 641 @see: L{intermine.model.Path} 642 """ 643 descriptors = [] 644 names = path_string.split('.') 645 root_name = names.pop(0) 646 647 root_descriptor = self.get_class(root_name) 648 descriptors.append(root_descriptor) 649 650 if root_name in subclasses: 651 current_class = self.get_class(subclasses[root_name]) 652 else: 653 current_class = root_descriptor 654 655 for field_name in names: 656 field = current_class.get_field(field_name) 657 descriptors.append(field) 658 659 if isinstance(field, Reference): 660 key = '.'.join(map(lambda x: x.name, descriptors)) 661 if key in subclasses: 662 current_class = self.get_class(subclasses[key]) 663 else: 664 current_class = field.type_class 665 else: 666 current_class = None 667 668 return descriptors
669
670 -class ModelError(ReadableException):
671 pass
672
673 -class PathParseError(ModelError):
674 pass
675
676 -class ModelParseError(ModelError):
677
678 - def __init__(self, message, source, cause=None):
679 self.source = source 680 super(ModelParseError, self).__init__(message, cause)
681
682 - def __str__(self):
683 base = repr(self.message) + ":" + repr(self.source) 684 if self.cause is None: 685 return base 686 else: 687 return base + repr(self.cause)
688