Package intermine :: Module results
[hide private]
[frames] | no frames]

Source Code for Module intermine.results

  1  try: 
  2      import simplejson as json # Prefer this as it is faster 
  3  except ImportError: # pragma: no cover 
  4      try: 
  5          import json 
  6      except ImportError: 
  7          raise ImportError("Could not find any JSON module to import - " 
  8              + "please install simplejson or jsonlib to continue") 
  9   
 10  import urllib 
 11  import httplib 
 12  import re 
 13  import copy 
 14  import base64 
 15  from urlparse import urlparse 
 16  from itertools import groupby 
 17  import UserDict 
 18   
 19  from intermine.errors import WebserviceError 
 20  from intermine.model import Attribute, Reference, Collection 
 21   
 22  USER_AGENT = 'WebserviceInterMinePerlAPIClient' 
23 24 -class EnrichmentLine(UserDict.UserDict):
25 """ 26 An object that represents a result returned from the enrichment service. 27 ======================================================================== 28 29 These objects operate as dictionaries as well as objects with predefined 30 properties. 31 """ 32
33 - def __str__(self):
34 return str(self.data)
35
36 - def __repr__(self):
37 return "EnrichmentLine(%s)" % self.data
38
39 - def __getattr__(self, name):
40 if name is not None: 41 key_name = name.replace('_', '-') 42 if key_name in self.keys(): 43 return self.data[key_name] 44 raise AttributeError(name)
45
46 -class ResultObject(object):
47 """ 48 An object used to represent result records as returned in jsonobjects format 49 ============================================================================ 50 51 These objects are backed by a row of data and the class descriptor that 52 describes the object. They allow access in standard object style: 53 54 >>> for gene in query.results(): 55 ... print gene.symbol 56 ... print map(lambda x: x.name, gene.pathways) 57 58 All objects will have "id" and "type" properties. The type refers to the 59 actual type of this object: if it is a subclass of the one requested, the 60 subclass name will be returned. The "id" refers to the internal database id 61 of the object, and is a guarantor of object identity. 62 63 """ 64
65 - def __init__(self, data, cld, view=[]):
66 stripped = [v[v.find(".") + 1:] for v in view] 67 self.selected_attributes = [v for v in stripped if "." not in v] 68 self.reference_paths = dict(((k, list(i)) for k, i in groupby(stripped, lambda x: x[:x.find(".") + 1]))) 69 self._data = data 70 self._cld = cld if "class" not in data or cld.name == data["class"] else cld.model.get_class(data["class"]) 71 self._attr_cache = {}
72
73 - def __str__(self):
74 dont_show = set(["objectId", "class"]) 75 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, v) for k, v in self._data.items() 76 if not isinstance(v, dict) and not isinstance(v, list) and k not in dont_show))
77
78 - def __repr__(self):
79 dont_show = set(["objectId", "class"]) 80 return "%s(%s)" % (self._cld.name, ", ".join("%s = %r" % (k, getattr(self, k)) for k in self._data.keys() 81 if k not in dont_show))
82
83 - def __getattr__(self, name):
84 if name in self._attr_cache: 85 return self._attr_cache[name] 86 87 if name == "type": 88 return self._data["class"] 89 90 fld = self._cld.get_field(name) 91 attr = None 92 if isinstance(fld, Attribute): 93 if name in self._data: 94 attr = self._data[name] 95 if attr is None: 96 attr = self._fetch_attr(fld) 97 elif isinstance(fld, Reference): 98 ref_paths = self._get_ref_paths(fld) 99 if name in self._data: 100 data = self._data[name] 101 else: 102 data = self._fetch_reference(fld) 103 if isinstance(fld, Collection): 104 if data is None: 105 attr = [] 106 else: 107 attr = map(lambda x: ResultObject(x, fld.type_class, ref_paths), data) 108 else: 109 if data is None: 110 attr = None 111 else: 112 attr = ResultObject(data, fld.type_class, ref_paths) 113 else: 114 raise WebserviceError("Inconsistent model - This should never happen") 115 self._attr_cache[name] = attr 116 return attr
117
118 - def _get_ref_paths(self, fld):
119 if fld.name + "." in self.reference_paths: 120 return self.reference_paths[fld.name + "."] 121 else: 122 return []
123 124 @property
125 - def id(self):
126 """Return the internal DB identifier of this object. Or None if this is not an InterMine object""" 127 return self._data.get('objectId')
128
129 - def _fetch_attr(self, fld):
130 if fld.name in self.selected_attributes: 131 return None # Was originally selected - no point asking twice 132 c = self._cld 133 if "id" not in c: 134 return None # Cannot reliably fetch anything without access to the objectId. 135 q = c.model.service.query(c, fld).where(id = self.id) 136 r = q.first() 137 return r._data[fld.name] if fld.name in r._data else None
138
139 - def _fetch_reference(self, ref):
140 if ref.name + "." in self.reference_paths: 141 return None # Was originally selected - no point asking twice. 142 c = self._cld 143 if "id" not in c: 144 return None # Cannot reliably fetch anything without access to the objectId. 145 q = c.model.service.query(ref).outerjoin(ref).where(id = self.id) 146 r = q.first() 147 return r._data[ref.name] if ref.name in r._data else None
148
149 -class ResultRow(object):
150 """ 151 An object for representing a row of data received back from the server. 152 ======================================================================= 153 154 ResultRows provide access to the fields of the row through index lookup. However, 155 for convenience both list indexes and dictionary keys can be used. So the 156 following all work: 157 158 >>> # Assuming the view is "Gene.symbol", "Gene.organism.name": 159 >>> row[0] == row["symbol"] == row["Gene.symbol"] 160 ... True 161 162 """ 163
164 - def __init__(self, data, views):
165 self.data = data 166 self.views = views 167 self.index_map = None
168
169 - def __len__(self):
170 """Return the number of cells in this row""" 171 return len(self.data)
172
173 - def __iter__(self):
174 """Return the list view of the row, so each cell can be processed""" 175 return iter(self.to_l())
176
177 - def _get_index_for(self, key):
178 if self.index_map is None: 179 self.index_map = {} 180 for i in range(len(self.views)): 181 view = self.views[i] 182 headless_view = re.sub("^[^.]+.", "", view) 183 self.index_map[view] = i 184 self.index_map[headless_view] = i 185 186 return self.index_map[key]
187
188 - def __str__(self):
189 root = re.sub("\..*$", "", self.views[0]) 190 parts = [root + ":"] 191 for view in self.views: 192 short_form = re.sub("^[^.]+.", "", view) 193 value = self[view] 194 parts.append(short_form + "=" + repr(value)) 195 return " ".join(parts)
196
197 - def __getitem__(self, key):
198 if isinstance(key, int): 199 return self.data[key] 200 elif isinstance(key, slice): 201 return self.data[key] 202 else: 203 index = self._get_index_for(key) 204 return self.data[index]
205
206 - def to_l(self):
207 """Return a list view of this row""" 208 return [x for x in self.data]
209 210
211 - def to_d(self):
212 """Return a dictionary view of this row""" 213 d = {} 214 for view in self.views: 215 d[view] = self[view] 216 217 return d
218
219 - def items(self):
220 return [(view, self[view]) for view in self.views]
221
222 - def iteritems(self):
223 for view in self.views: 224 yield (view, self[view])
225
226 - def keys(self):
227 return copy.copy(self.views)
228
229 - def values(self):
230 return self.to_l()
231
232 - def itervalues(self):
233 return iter(self.to_l())
234
235 - def iterkeys(self):
236 return iter(self.views)
237
238 - def has_key(self, key):
239 try: 240 self._get_index_for(key) 241 return True 242 except KeyError: 243 return False
244
245 -class TableResultRow(ResultRow):
246 """ 247 A class for parsing results from the jsonrows data format. 248 """ 249
250 - def __getitem__(self, key):
251 if isinstance(key, int): 252 return self.data[key]["value"] 253 elif isinstance(key, slice): 254 vals = map(lambda x: x["value"], self.data[key]) 255 return vals 256 else: 257 index = self._get_index_for(key) 258 return self.data[index]["value"]
259
260 - def to_l(self):
261 """Return a list view of this row""" 262 return map(lambda x: x["value"], self.data)
263
264 -class ResultIterator(object):
265 """ 266 A facade over the internal iterator object 267 ========================================== 268 269 These objects handle the iteration over results 270 in the formats requested by the user. They are responsible 271 for generating an appropriate parser, 272 connecting the parser to the results, and delegating 273 iteration appropriately. 274 """ 275 276 PARSED_FORMATS = frozenset(["rr", "list", "dict"]) 277 STRING_FORMATS = frozenset(["tsv", "csv", "count"]) 278 JSON_FORMATS = frozenset(["jsonrows", "jsonobjects", "json"]) 279 ROW_FORMATS = PARSED_FORMATS | STRING_FORMATS | JSON_FORMATS 280
281 - def __init__(self, service, path, params, rowformat, view, cld=None):
282 """ 283 Constructor 284 =========== 285 286 Services are responsible for getting result iterators. You will 287 not need to create one manually. 288 289 @param root: The root path (eg: "http://www.flymine.org/query/service") 290 @type root: string 291 @param path: The resource path (eg: "/query/results") 292 @type path: string 293 @param params: The query parameters for this request 294 @type params: dict 295 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects", "json" 296 @type rowformat: string 297 @param view: The output columns 298 @type view: list 299 @param opener: A url opener (user-agent) 300 @type opener: urllib.URLopener 301 302 @raise ValueError: if the row format is incorrect 303 @raise WebserviceError: if the request is unsuccessful 304 """ 305 if rowformat.startswith("object"): # Accept "object", "objects", "objectformat", etc... 306 rowformat = "jsonobjects" # these are synonymous 307 if rowformat not in self.ROW_FORMATS: 308 raise ValueError("'%s' is not one of the valid row formats (%s)" 309 % (rowformat, repr(list(self.ROW_FORMATS)))) 310 311 self.row = ResultRow if service.version >= 8 else TableResultRow 312 313 if rowformat in self.PARSED_FORMATS: 314 if service.version >= 8: 315 params.update({"format": "json"}) 316 else: 317 params.update({"format" : "jsonrows"}) 318 elif rowformat == 'tsv': 319 params.update({"format": "tab"}) 320 else: 321 params.update({"format" : rowformat}) 322 323 self.url = service.root + path 324 self.data = urllib.urlencode(params) 325 self.view = view 326 self.opener = service.opener 327 self.cld = cld 328 self.rowformat = rowformat 329 self._it = None
330
331 - def __len__(self):
332 """ 333 Return the number of items in this iterator 334 =========================================== 335 336 Note that this requires iterating over the full result set. 337 """ 338 c = 0 339 for x in self: 340 c += 1 341 return c
342
343 - def __iter__(self):
344 """ 345 Return an iterator over the results 346 =================================== 347 348 Returns the internal iterator object. 349 """ 350 con = self.opener.open(self.url, self.data) 351 identity = lambda x: x 352 flat_file_parser = lambda: FlatFileIterator(con, identity) 353 simple_json_parser = lambda: JSONIterator(con, identity) 354 355 try: 356 reader = { 357 "tsv" : flat_file_parser, 358 "csv" : flat_file_parser, 359 "count" : flat_file_parser, 360 "json" : simple_json_parser, 361 "jsonrows" : simple_json_parser, 362 "list" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_l()), 363 "rr" : lambda: JSONIterator(con, lambda x: self.row(x, self.view)), 364 "dict" : lambda: JSONIterator(con, lambda x: self.row(x, self.view).to_d()), 365 "jsonobjects" : lambda: JSONIterator(con, lambda x: ResultObject(x, self.cld, self.view)) 366 }.get(self.rowformat)() 367 except Exception, e: 368 raise Exception("Couldn't get iterator for " + self.rowformat + str(e)) 369 return reader
370
371 - def next(self):
372 """ 373 Returns the next row, in the appropriate format 374 375 @rtype: whatever the rowformat was determined to be 376 """ 377 if self._it is None: 378 self._it = iter(self) 379 try: 380 return self._it.next() 381 except StopIteration: 382 self._it = None 383 raise StopIteration
384
385 -class FlatFileIterator(object):
386 """ 387 An iterator for handling results returned as a flat file (TSV/CSV). 388 =================================================================== 389 390 This iterator can be used as the sub iterator in a ResultIterator 391 """ 392
393 - def __init__(self, connection, parser):
394 """ 395 Constructor 396 =========== 397 398 @param connection: The source of data 399 @type connection: socket.socket 400 @param parser: a handler for each row of data 401 @type parser: Parser 402 """ 403 self.connection = connection 404 self.parser = parser
405
406 - def __iter__(self):
407 return self
408
409 - def next(self):
410 """Return a parsed line of data""" 411 line = self.connection.next().strip() 412 if line.startswith("[ERROR]"): 413 raise WebserviceError(line) 414 return self.parser(line)
415
416 -class JSONIterator(object):
417 """ 418 An iterator for handling results returned in the JSONRows format 419 ================================================================ 420 421 This iterator can be used as the sub iterator in a ResultIterator 422 """ 423
424 - def __init__(self, connection, parser):
425 """ 426 Constructor 427 =========== 428 429 @param connection: The source of data 430 @type connection: socket.socket 431 @param parser: a handler for each row of data 432 @type parser: Parser 433 """ 434 self.connection = connection 435 self.parser = parser 436 self.header = "" 437 self.footer = "" 438 self.parse_header() 439 self._is_finished = False
440
441 - def __iter__(self):
442 return self
443
444 - def next(self):
445 """Returns a parsed row of data""" 446 if self._is_finished: 447 raise StopIteration 448 return self.get_next_row_from_connection()
449
450 - def parse_header(self):
451 """Reads out the header information from the connection""" 452 try: 453 line = self.connection.next().strip() 454 self.header += line 455 if not line.endswith('"results":['): 456 self.parse_header() 457 except StopIteration: 458 raise WebserviceError("The connection returned a bad header" + self.header)
459
460 - def check_return_status(self):
461 """ 462 Perform status checks 463 ===================== 464 465 The footer containts information as to whether the result 466 set was successfully transferred in its entirety. This 467 method makes sure we don't silently accept an 468 incomplete result set. 469 470 @raise WebserviceError: if the footer indicates there was an error 471 """ 472 container = self.header + self.footer 473 info = None 474 try: 475 info = json.loads(container) 476 except: 477 raise WebserviceError("Error parsing JSON container: " + container) 478 479 if not info["wasSuccessful"]: 480 raise WebserviceError(info["statusCode"], info["error"])
481
483 """ 484 Reads the connection to get the next row, and sends it to the parser 485 486 @raise WebserviceError: if the connection is interrupted 487 """ 488 next_row = None 489 try: 490 line = self.connection.next() 491 if line.startswith("]"): 492 self.footer += line; 493 for otherline in self.connection: 494 self.footer += line 495 self.check_return_status() 496 else: 497 line = line.strip().strip(',') 498 if len(line) > 0: 499 try: 500 row = json.loads(line) 501 except json.decoder.JSONDecodeError, e: 502 raise WebserviceError("Error parsing line from results: '" 503 + line + "' - " + str(e)) 504 next_row = self.parser(row) 505 except StopIteration: 506 raise WebserviceError("Connection interrupted") 507 508 if next_row is None: 509 self._is_finished = True 510 raise StopIteration 511 else: 512 return next_row
513
514 -class InterMineURLOpener(urllib.FancyURLopener):
515 """ 516 Specific implementation of urllib.FancyURLOpener for this client 517 ================================================================ 518 519 Provides user agent and authentication headers, and handling of errors 520 """ 521 version = "InterMine-Python-Client-0.96.00" 522
523 - def __init__(self, credentials=None, token=None):
524 """ 525 Constructor 526 =========== 527 528 InterMineURLOpener((username, password)) S{->} InterMineURLOpener 529 530 Return a new url-opener with the appropriate credentials 531 """ 532 urllib.FancyURLopener.__init__(self) 533 self.token = token 534 self.plain_post_header = { 535 "Content-Type": "text/plain; charset=utf-8", 536 "UserAgent": USER_AGENT 537 } 538 if credentials and len(credentials) == 2: 539 base64string = base64.encodestring('%s:%s' % credentials)[:-1] 540 self.addheader("Authorization", base64string) 541 self.plain_post_header["Authorization"] = base64string 542 self.using_authentication = True 543 else: 544 self.using_authentication = False
545
546 - def post_plain_text(self, url, body):
547 url = self.prepare_url(url) 548 o = urlparse(url) 549 con = httplib.HTTPConnection(o.hostname, o.port) 550 con.request('POST', url, body, self.plain_post_header) 551 resp = con.getresponse() 552 content = resp.read() 553 con.close() 554 if resp.status != 200: 555 raise WebserviceError(resp.status, resp.reason, content) 556 return content
557
558 - def open(self, url, data=None):
559 url = self.prepare_url(url) 560 return urllib.FancyURLopener.open(self, url, data)
561
562 - def prepare_url(self, url):
563 if self.token: 564 token_param = "token=" + self.token 565 o = urlparse(url) 566 if o.query: 567 url += "&" + token_param 568 else: 569 url += "?" + token_param 570 571 return url
572
573 - def delete(self, url):
574 url = self.prepare_url(url) 575 o = urlparse(url) 576 con = httplib.HTTPConnection(o.hostname, o.port) 577 con.request('DELETE', url, None, self.plain_post_header) 578 resp = con.getresponse() 579 content = resp.read() 580 con.close() 581 if resp.status != 200: 582 raise WebserviceError(resp.status, resp.reason, content) 583 return content
584
585 - def http_error_default(self, url, fp, errcode, errmsg, headers):
586 """Re-implementation of http_error_default, with content now supplied by default""" 587 content = fp.read() 588 fp.close() 589 raise WebserviceError(errcode, errmsg, content)
590
591 - def http_error_400(self, url, fp, errcode, errmsg, headers, data=None):
592 """ 593 Handle 400 HTTP errors, attempting to return informative error messages 594 ======================================================================= 595 596 400 errors indicate that something about our request was incorrect 597 598 @raise WebserviceError: in all circumstances 599 600 """ 601 content = fp.read() 602 fp.close() 603 try: 604 message = json.loads(content)["error"] 605 except: 606 message = content 607 raise WebserviceError("There was a problem with our request", errcode, errmsg, message)
608
609 - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
610 """ 611 Handle 401 HTTP errors, attempting to return informative error messages 612 ======================================================================= 613 614 401 errors indicate we don't have sufficient permission for the resource 615 we requested - usually a list or a tempate 616 617 @raise WebserviceError: in all circumstances 618 619 """ 620 content = fp.read() 621 fp.close() 622 if self.using_authentication: 623 raise WebserviceError("Insufficient permissions", errcode, errmsg, content) 624 else: 625 raise WebserviceError("No permissions - not logged in", errcode, errmsg, content)
626
627 - def http_error_403(self, url, fp, errcode, errmsg, headers, data=None):
628 """ 629 Handle 403 HTTP errors, attempting to return informative error messages 630 ======================================================================= 631 632 401 errors indicate we don't have sufficient permission for the resource 633 we requested - usually a list or a tempate 634 635 @raise WebserviceError: in all circumstances 636 637 """ 638 content = fp.read() 639 fp.close() 640 try: 641 message = json.loads(content)["error"] 642 except: 643 message = content 644 if self.using_authentication: 645 raise WebserviceError("Insufficient permissions", errcode, errmsg, message) 646 else: 647 raise WebserviceError("No permissions - not logged in", errcode, errmsg, message)
648
649 - def http_error_404(self, url, fp, errcode, errmsg, headers, data=None):
650 """ 651 Handle 404 HTTP errors, attempting to return informative error messages 652 ======================================================================= 653 654 404 errors indicate that the requested resource does not exist - usually 655 a template that is not longer available. 656 657 @raise WebserviceError: in all circumstances 658 659 """ 660 content = fp.read() 661 fp.close() 662 try: 663 message = json.loads(content)["error"] 664 except: 665 message = content 666 raise WebserviceError("Missing resource", errcode, errmsg, message)
667 - def http_error_500(self, url, fp, errcode, errmsg, headers, data=None):
668 """ 669 Handle 500 HTTP errors, attempting to return informative error messages 670 ======================================================================= 671 672 500 errors indicate that the server borked during the request - ie: it wasn't 673 our fault. 674 675 @raise WebserviceError: in all circumstances 676 677 """ 678 content = fp.read() 679 fp.close() 680 try: 681 message = json.loads(content)["error"] 682 except: 683 message = content 684 raise WebserviceError("Internal server error", errcode, errmsg, message)
685