Package intermine :: Module results
[hide private]
[frames] | no frames]

Source Code for Module intermine.results

  1  try: 
  2      import simplejson as json # Prefer this as it is faster 
  3  except ImportError: # pragma: no cover 
  4      try: 
  5          import json 
  6      except ImportError: 
  7          raise ImportError("Could not find any JSON module to import - " 
  8              + "please install simplejson or jsonlib to continue") 
  9   
 10  import urllib 
 11  import httplib 
 12  import re 
 13  import copy 
 14  import base64 
 15  from urlparse import urlparse 
 16  from itertools import groupby 
 17  import UserDict 
 18   
 19  from intermine.errors import WebserviceError 
 20  from intermine.model import Attribute, Reference, Collection 
 21   
 22  USER_AGENT = 'WebserviceInterMinePerlAPIClient' 
23 24 -class EnrichmentLine(UserDict.UserDict):
25 """ 26 An object that represents a result returned from the enrichment service. 27 ======================================================================== 28 29 These objects operate as dictionaries as well as objects with predefined 30 properties. 31 """ 32
33 - def __str__(self):
34 return str(self.data)
35
36 - def __repr__(self):
37 return "EnrichmentLine(%s)" % self.data
38
39 - def __getattr__(self, name):
40 if name is not None: 41 key_name = name.replace('_', '-') 42 if key_name in self.keys(): 43 return self.data[key_name] 44 raise AttributeError(name)
45
46 -class ResultObject(object):
47 """ 48 An object used to represent result records as returned in jsonobjects format 49 ============================================================================ 50 51 These objects are backed by a row of data and the class descriptor that 52 describes the object. They allow access in standard object style: 53 54 >>> for gene in query.results(): 55 ... print gene.symbol 56 ... print map(lambda x: x.name, gene.pathways) 57 58 All objects will have "id" and "type" properties. The type refers to the 59 actual type of this object: if it is a subclass of the one requested, the 60 subclass name will be returned. The "id" refers to the internal database id 61 of the object, and is a guarantor of object identity. 62 63 """ 64
65 - def __init__(self, data, cld, view=[]):
66 stripped = [v[v.find(".") + 1:] for v in view] 67 self.selected_attributes = [v for v in stripped if "." not in v] 68 self.reference_paths = dict(((k, list(i)) for k, i in groupby(stripped, lambda x: x[:x.find(".") + 1]))) 69 self._data = data 70 self._cld = cld if "class" not in data or cld.name == data["class"] else cld.model.get_class(data["class"]) 71 self._attr_cache = {}
72
73 - def __str__(self):
74 dont_show = set(["objectId", "class"]) 75 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, v) for k, v in self._data.items() 76 if not isinstance(v, dict) and not isinstance(v, list) and k not in dont_show))
77
78 - def __repr__(self):
79 dont_show = set(["objectId", "class"]) 80 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, getattr(self, k)) for k in self._data.keys() 81 if k not in dont_show))
82
83 - def __getattr__(self, name):
84 if name in self._attr_cache: 85 return self._attr_cache[name] 86 87 if name == "type": 88 return self._data["class"] 89 90 fld = self._cld.get_field(name) 91 attr = None 92 if isinstance(fld, Attribute): 93 if name in self._data: 94 attr = self._data[name] 95 if attr is None: 96 attr = self._fetch_attr(fld) 97 elif isinstance(fld, Reference): 98 ref_paths = self._get_ref_paths(fld) 99 if name in self._data: 100 data = self._data[name] 101 else: 102 data = self._fetch_reference(fld) 103 if isinstance(fld, Collection): 104 if data is None: 105 attr = [] 106 else: 107 attr = map(lambda x: ResultObject(x, fld.type_class, ref_paths), data) 108 else: 109 if data is None: 110 attr = None 111 else: 112 attr = ResultObject(data, fld.type_class, ref_paths) 113 else: 114 raise WebserviceError("Inconsistent model - This should never happen") 115 self._attr_cache[name] = attr 116 return attr
117
118 - def _get_ref_paths(self, fld):
119 if fld.name + "." in self.reference_paths: 120 return self.reference_paths[fld.name + "."] 121 else: 122 return []
123 124 @property
125 - def id(self):
126 """Return the internal DB identifier of this object. Or None if this is not an InterMine object""" 127 return self._data.get('objectId')
128
129 - def _fetch_attr(self, fld):
130 if fld.name in self.selected_attributes: 131 return None # Was originally selected - no point asking twice 132 c = self._cld 133 if "id" not in c: 134 return None # Cannot reliably fetch anything without access to the objectId. 135 q = c.model.service.query(c, fld).where(id = self.id) 136 r = q.first() 137 return r._data[fld.name] if fld.name in r._data else None
138
139 - def _fetch_reference(self, ref):
140 if ref.name + "." in self.reference_paths: 141 return None # Was originally selected - no point asking twice. 142 c = self._cld 143 if "id" not in c: 144 return None # Cannot reliably fetch anything without access to the objectId. 145 q = c.model.service.query(ref).outerjoin(ref).where(id = self.id) 146 r = q.first() 147 return r._data[ref.name] if ref.name in r._data else None
148
149 -class ResultRow(object):
150 """ 151 An object for representing a row of data received back from the server. 152 ======================================================================= 153 154 ResultRows provide access to the fields of the row through index lookup. However, 155 for convenience both list indexes and dictionary keys can be used. So the 156 following all work: 157 158 >>> # Assuming the view is "Gene.symbol", "Gene.organism.name": 159 >>> row[0] == row["symbol"] == row["Gene.symbol"] 160 ... True 161 162 """ 163
164 - def __init__(self, data, views):
165 self.data = data 166 self.views = views 167 self.index_map = None
168
169 - def __len__(self):
170 """Return the number of cells in this row""" 171 return len(self.data)
172
173 - def __iter__(self):
174 """Return the list view of the row, so each cell can be processed""" 175 return iter(self.to_l())
176
177 - def _get_index_for(self, key):
178 if self.index_map is None: 179 self.index_map = {} 180 for i in range(len(self.views)): 181 view = self.views[i] 182 headless_view = re.sub("^[^.]+.", "", view) 183 self.index_map[view] = i 184 self.index_map[headless_view] = i 185 186 return self.index_map[key]
187
188 - def __str__(self):
189 root = re.sub("\..*$", "", self.views[0]) 190 parts = [root + ":"] 191 for view in self.views: 192 short_form = re.sub("^[^.]+.", "", view) 193 value = self[view] 194 parts.append(short_form + "=" + repr(value)) 195 return " ".join(parts)
196
197 - def __getitem__(self, key):
198 if isinstance(key, int): 199 return self.data[key] 200 elif isinstance(key, slice): 201 return self.data[key] 202 else: 203 index = self._get_index_for(key) 204 return self.data[index]
205
206 - def to_l(self):
207 """Return a list view of this row""" 208 return [x for x in self.data]
209 210
211 - def to_d(self):
212 """Return a dictionary view of this row""" 213 d = {} 214 for view in self.views: 215 d[view] = self[view] 216 217 return d
218
219 - def items(self):
220 return [(view, self[view]) for view in self.views]
221
222 - def iteritems(self):
223 for view in self.views: 224 yield (view, self[view])
225
226 - def keys(self):
227 return copy.copy(self.views)
228
229 - def values(self):
230 return self.to_l()
231
232 - def itervalues(self):
233 return iter(self.to_l())
234
235 - def iterkeys(self):
236 return iter(self.views)
237
238 - def has_key(self, key):
239 try: 240 self._get_index_for(key) 241 return True 242 except KeyError: 243 return False
244
245 -class TableResultRow(ResultRow):
246 """ 247 A class for parsing results from the jsonrows data format. 248 """ 249
250 - def __getitem__(self, key):
251 if isinstance(key, int): 252 return self.data[key]["value"] 253 elif isinstance(key, slice): 254 vals = map(lambda x: x["value"], self.data[key]) 255 return vals 256 else: 257 index = self._get_index_for(key) 258 return self.data[index]["value"]
259
260 - def to_l(self):
261 """Return a list view of this row""" 262 return map(lambda x: x["value"], self.data)
263
264 -class ResultIterator(object):
265 """ 266 A facade over the internal iterator object 267 ========================================== 268 269 These objects handle the iteration over results 270 in the formats requested by the user. They are responsible 271 for generating an appropriate parser, 272 connecting the parser to the results, and delegating 273 iteration appropriately. 274 """ 275 276 PARSED_FORMATS = frozenset(["rr", "list", "dict"]) 277 STRING_FORMATS = frozenset(["tsv", "csv", "count"]) 278 JSON_FORMATS = frozenset(["jsonrows", "jsonobjects", "json"]) 279 ROW_FORMATS = PARSED_FORMATS | STRING_FORMATS | JSON_FORMATS 280
281 - def __init__(self, service, path, params, rowformat, view, cld=None):
282 """ 283 Constructor 284 =========== 285 286 Services are responsible for getting result iterators. You will 287 not need to create one manually. 288 289 @param root: The root path (eg: "http://www.flymine.org/query/service") 290 @type root: string 291 @param path: The resource path (eg: "/query/results") 292 @type path: string 293 @param params: The query parameters for this request 294 @type params: dict 295 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects", "json" 296 @type rowformat: string 297 @param view: The output columns 298 @type view: list 299 @param opener: A url opener (user-agent) 300 @type opener: urllib.URLopener 301 302 @raise ValueError: if the row format is incorrect 303 @raise WebserviceError: if the request is unsuccessful 304 """ 305 if rowformat.startswith("object"): # Accept "object", "objects", "objectformat", etc... 306 rowformat = "jsonobjects" # these are synonymous 307 if rowformat not in self.ROW_FORMATS: 308 raise ValueError("'%s' is not one of the valid row formats (%s)" 309 % (rowformat, repr(list(self.ROW_FORMATS)))) 310 311 self.row = ResultRow if service.version >= 8 else TableResultRow 312 313 if rowformat in self.PARSED_FORMATS: 314 if service.version >= 8: 315 params.update({"format": "json"}) 316 else: 317 params.update({"format" : "jsonrows"}) 318 else: 319 params.update({"format" : rowformat}) 320 321 self.url = service.root + path 322 self.data = urllib.urlencode(params) 323 self.view = view 324 self.opener = service.opener 325 self.cld = cld 326 self.rowformat = rowformat 327 self._it = None
328
329 - def __len__(self):
330 """ 331 Return the number of items in this iterator 332 =========================================== 333 334 Note that this requires iterating over the full result set. 335 """ 336 c = 0 337 for x in self: 338 c += 1 339 return c
340
341 - def __iter__(self):
342 """ 343 Return an iterator over the results 344 =================================== 345 346 Returns the internal iterator object. 347 """ 348 con = self.opener.open(self.url, self.data) 349 identity = lambda x: x 350 flat_file_parser = lambda: FlatFileIterator(con, identity) 351 simple_json_parser = lambda: JSONIterator(con, identity) 352 353 try: 354 reader = { 355 "tsv" : flat_file_parser, 356 "csv" : flat_file_parser, 357 "count" : flat_file_parser, 358 "json" : simple_json_parser, 359 "jsonrows" : simple_json_parser, 360 "list" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_l()), 361 "rr" : lambda: JSONIterator(con, lambda x: self.row(x, self.view)), 362 "dict" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_d()), 363 "jsonobjects" : lambda: JSONIterator(con, lambda x: ResultObject(x, self.cld, self.view)) 364 }.get(self.rowformat)() 365 except Exception, e: 366 raise Exception("Couldn't get iterator for " + self.rowformat + str(e)) 367 return reader
368
369 - def next(self):
370 """ 371 Returns the next row, in the appropriate format 372 373 @rtype: whatever the rowformat was determined to be 374 """ 375 if self._it is None: 376 self._it = iter(self) 377 try: 378 return self._it.next() 379 except StopIteration: 380 self._it = None 381 raise StopIteration
382
383 -class FlatFileIterator(object):
384 """ 385 An iterator for handling results returned as a flat file (TSV/CSV). 386 =================================================================== 387 388 This iterator can be used as the sub iterator in a ResultIterator 389 """ 390
391 - def __init__(self, connection, parser):
392 """ 393 Constructor 394 =========== 395 396 @param connection: The source of data 397 @type connection: socket.socket 398 @param parser: a handler for each row of data 399 @type parser: Parser 400 """ 401 self.connection = connection 402 self.parser = parser
403
404 - def __iter__(self):
405 return self
406
407 - def next(self):
408 """Return a parsed line of data""" 409 line = self.connection.next().strip() 410 if line.startswith("[ERROR]"): 411 raise WebserviceError(line) 412 return self.parser(line)
413
414 -class JSONIterator(object):
415 """ 416 An iterator for handling results returned in the JSONRows format 417 ================================================================ 418 419 This iterator can be used as the sub iterator in a ResultIterator 420 """ 421
422 - def __init__(self, connection, parser):
423 """ 424 Constructor 425 =========== 426 427 @param connection: The source of data 428 @type connection: socket.socket 429 @param parser: a handler for each row of data 430 @type parser: Parser 431 """ 432 self.connection = connection 433 self.parser = parser 434 self.header = "" 435 self.footer = "" 436 self.parse_header() 437 self._is_finished = False
438
439 - def __iter__(self):
440 return self
441
442 - def next(self):
443 """Returns a parsed row of data""" 444 if self._is_finished: 445 raise StopIteration 446 return self.get_next_row_from_connection()
447
448 - def parse_header(self):
449 """Reads out the header information from the connection""" 450 try: 451 line = self.connection.next().strip() 452 self.header += line 453 if not line.endswith('"results":['): 454 self.parse_header() 455 except StopIteration: 456 raise WebserviceError("The connection returned a bad header" + self.header)
457
458 - def check_return_status(self):
459 """ 460 Perform status checks 461 ===================== 462 463 The footer containts information as to whether the result 464 set was successfully transferred in its entirety. This 465 method makes sure we don't silently accept an 466 incomplete result set. 467 468 @raise WebserviceError: if the footer indicates there was an error 469 """ 470 container = self.header + self.footer 471 info = None 472 try: 473 info = json.loads(container) 474 except: 475 raise WebserviceError("Error parsing JSON container: " + container) 476 477 if not info["wasSuccessful"]: 478 raise WebserviceError(info["statusCode"], info["error"])
479
481 """ 482 Reads the connection to get the next row, and sends it to the parser 483 484 @raise WebserviceError: if the connection is interrupted 485 """ 486 next_row = None 487 try: 488 line = self.connection.next() 489 if line.startswith("]"): 490 self.footer += line; 491 for otherline in self.connection: 492 self.footer += line 493 self.check_return_status() 494 else: 495 line = line.strip().strip(',') 496 if len(line) > 0: 497 try: 498 row = json.loads(line) 499 except json.decoder.JSONDecodeError, e: 500 raise WebserviceError("Error parsing line from results: '" 501 + line + "' - " + str(e)) 502 next_row = self.parser(row) 503 except StopIteration: 504 raise WebserviceError("Connection interrupted") 505 506 if next_row is None: 507 self._is_finished = True 508 raise StopIteration 509 else: 510 return next_row
511
512 -class InterMineURLOpener(urllib.FancyURLopener):
513 """ 514 Specific implementation of urllib.FancyURLOpener for this client 515 ================================================================ 516 517 Provides user agent and authentication headers, and handling of errors 518 """ 519 version = "InterMine-Python-Client-0.96.00" 520
521 - def __init__(self, credentials=None, token=None):
522 """ 523 Constructor 524 =========== 525 526 InterMineURLOpener((username, password)) S{->} InterMineURLOpener 527 528 Return a new url-opener with the appropriate credentials 529 """ 530 urllib.FancyURLopener.__init__(self) 531 self.token = token 532 self.plain_post_header = { 533 "Content-Type": "text/plain; charset=utf-8", 534 "UserAgent": USER_AGENT 535 } 536 if credentials and len(credentials) == 2: 537 base64string = base64.encodestring('%s:%s' % credentials)[:-1] 538 self.addheader("Authorization", base64string) 539 self.plain_post_header["Authorization"] = base64string 540 self.using_authentication = True 541 else: 542 self.using_authentication = False
543
544 - def post_plain_text(self, url, body):
545 url = self.prepare_url(url) 546 o = urlparse(url) 547 con = httplib.HTTPConnection(o.hostname, o.port) 548 con.request('POST', url, body, self.plain_post_header) 549 resp = con.getresponse() 550 content = resp.read() 551 con.close() 552 if resp.status != 200: 553 raise WebserviceError(resp.status, resp.reason, content) 554 return content
555
556 - def open(self, url, data=None):
557 url = self.prepare_url(url) 558 return urllib.FancyURLopener.open(self, url, data)
559
560 - def prepare_url(self, url):
561 if self.token: 562 token_param = "token=" + self.token 563 o = urlparse(url) 564 if o.query: 565 url += "&" + token_param 566 else: 567 url += "?" + token_param 568 569 return url
570
571 - def delete(self, url):
572 url = self.prepare_url(url) 573 o = urlparse(url) 574 con = httplib.HTTPConnection(o.hostname, o.port) 575 con.request('DELETE', url, None, self.plain_post_header) 576 resp = con.getresponse() 577 content = resp.read() 578 con.close() 579 if resp.status != 200: 580 raise WebserviceError(resp.status, resp.reason, content) 581 return content
582
583 - def http_error_default(self, url, fp, errcode, errmsg, headers):
584 """Re-implementation of http_error_default, with content now supplied by default""" 585 content = fp.read() 586 fp.close() 587 raise WebserviceError(errcode, errmsg, content)
588
589 - def http_error_400(self, url, fp, errcode, errmsg, headers, data=None):
590 """ 591 Handle 400 HTTP errors, attempting to return informative error messages 592 ======================================================================= 593 594 400 errors indicate that something about our request was incorrect 595 596 @raise WebserviceError: in all circumstances 597 598 """ 599 content = fp.read() 600 fp.close() 601 try: 602 message = json.loads(content)["error"] 603 except: 604 message = content 605 raise WebserviceError("There was a problem with our request", errcode, errmsg, message)
606
607 - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
608 """ 609 Handle 401 HTTP errors, attempting to return informative error messages 610 ======================================================================= 611 612 401 errors indicate we don't have sufficient permission for the resource 613 we requested - usually a list or a tempate 614 615 @raise WebserviceError: in all circumstances 616 617 """ 618 content = fp.read() 619 fp.close() 620 if self.using_authentication: 621 raise WebserviceError("Insufficient permissions", errcode, errmsg, content) 622 else: 623 raise WebserviceError("No permissions - not logged in", errcode, errmsg, content)
624
625 - def http_error_403(self, url, fp, errcode, errmsg, headers, data=None):
626 """ 627 Handle 403 HTTP errors, attempting to return informative error messages 628 ======================================================================= 629 630 401 errors indicate we don't have sufficient permission for the resource 631 we requested - usually a list or a tempate 632 633 @raise WebserviceError: in all circumstances 634 635 """ 636 content = fp.read() 637 fp.close() 638 try: 639 message = json.loads(content)["error"] 640 except: 641 message = content 642 if self.using_authentication: 643 raise WebserviceError("Insufficient permissions", errcode, errmsg, message) 644 else: 645 raise WebserviceError("No permissions - not logged in", errcode, errmsg, message)
646
647 - def http_error_404(self, url, fp, errcode, errmsg, headers, data=None):
648 """ 649 Handle 404 HTTP errors, attempting to return informative error messages 650 ======================================================================= 651 652 404 errors indicate that the requested resource does not exist - usually 653 a template that is not longer available. 654 655 @raise WebserviceError: in all circumstances 656 657 """ 658 content = fp.read() 659 fp.close() 660 try: 661 message = json.loads(content)["error"] 662 except: 663 message = content 664 raise WebserviceError("Missing resource", errcode, errmsg, message)
665 - def http_error_500(self, url, fp, errcode, errmsg, headers, data=None):
666 """ 667 Handle 500 HTTP errors, attempting to return informative error messages 668 ======================================================================= 669 670 500 errors indicate that the server borked during the request - ie: it wasn't 671 our fault. 672 673 @raise WebserviceError: in all circumstances 674 675 """ 676 content = fp.read() 677 fp.close() 678 try: 679 message = json.loads(content)["error"] 680 except: 681 message = content 682 raise WebserviceError("Internal server error", errcode, errmsg, message)
683