Package intermine :: Module webservice
[hide private]
[frames] | no frames]

Source Code for Module intermine.webservice

  1  from urlparse import urlunsplit, urljoin 
  2  from xml.dom import minidom 
  3  import urllib 
  4  import csv 
  5  import base64 
  6   
  7  # Use core json for 2.6+, simplejson for <=2.5 
  8  try: 
  9      import json 
 10  except ImportError: 
 11      import simplejson as json 
 12   
 13  # Local intermine imports 
 14  from .query import Query, Template 
 15  from .model import Model 
 16  from .util import ReadableException 
 17   
 18  """ 
 19  Webservice Interaction Routines for InterMine Webservices 
 20  ========================================================= 
 21   
 22  Classes for dealing with communication with an InterMine 
 23  RESTful webservice. 
 24   
 25  """ 
 26   
 27  __author__ = "Alex Kalderimis" 
 28  __organization__ = "InterMine" 
 29  __license__ = "LGPL" 
 30  __contact__ = "dev@intermine.org" 
31 32 -class Service(object):
33 """ 34 A class representing connections to different InterMine WebServices 35 =================================================================== 36 37 The intermine.webservice.Service class is the main interface for the user. 38 It will provide access to queries and templates, as well as doing the 39 background task of fetching the data model, and actually requesting 40 the query results. 41 42 SYNOPSIS 43 -------- 44 45 example:: 46 47 from intermine.webservice import Service 48 service = Service("http://www.flymine.org/query/service") 49 50 template = service.get_template("Gene_Pathways") 51 for row in template.results(A={"value":"zen"}): 52 do_something_with(row) 53 ... 54 55 query = service.new_query() 56 query.add_view("Gene.symbol", "Gene.pathway.name") 57 query.add_constraint("Gene", "LOOKUP", "zen") 58 for row in query.results(): 59 do_something_with(row) 60 ... 61 62 OVERVIEW 63 -------- 64 The two methods the user will be most concerned with are: 65 - L{Service.new_query}: constructs a new query to query a service with 66 - L{Service.get_template}: gets a template from the service 67 68 TERMINOLOGY 69 ----------- 70 X{Query} is the term for an arbitrarily complex structured request for 71 data from the webservice. The user is responsible for specifying the 72 structure that determines what records are returned, and what information 73 about each record is provided. 74 75 X{Template} is the term for a predefined "Query", ie: one that has been 76 written and saved on the webservice you will access. The definition 77 of the query is already done, but the user may want to specify the 78 values of the constraints that exist on the template. Templates are accessed 79 by name, and while you can easily introspect templates, it is assumed 80 you know what they do when you use them 81 82 @see: L{intermine.query} 83 """ 84 QUERY_PATH = '/query/results' 85 MODEL_PATH = '/model' 86 TEMPLATES_PATH = '/templates/xml' 87 TEMPLATEQUERY_PATH = '/template/results' 88 VERSION_PATH = '/version' 89 USER_AGENT = 'WebserviceInterMinePerlAPIClient' 90 LIST_PATH = '/lists/xml' 91 SAVEDQUERY_PATH = '/savedqueries/xml' 92 RELEASE_PATH = '/version/release' 93 SCHEME = 'http://' 94
95 - def __init__(self, root, username=None, password=None):
96 """ 97 Constructor 98 =========== 99 100 Construct a connection to a webservice:: 101 102 service = Service("http://www.flymine.org/query/service") 103 104 @param root: the root url of the webservice (required) 105 @param username: your login name (optional) 106 @param password: your password (required if a username is given) 107 108 @raise ServiceError: if the version cannot be fetched and parsed 109 @raise ValueError: if a username is supplied, but no password 110 """ 111 self.root = root 112 self._templates = None 113 self._model = None 114 self._version = None 115 self._release = None 116 if username: 117 if not password: 118 raise ValueError("No password supplied") 119 self.opener = InterMineURLOpener((username, password)) 120 else: 121 self.opener = InterMineURLOpener()
122 123 # This works in the real world, but not in testing... 124 # try: 125 # self.version 126 # except ServiceError: 127 # raise ServiceError("Could not validate service - is the root url correct?") 128 129 @property
130 - def version(self):
131 """ 132 Returns the webservice version 133 ============================== 134 135 The version specifies what capabilities a 136 specific webservice provides. The most current 137 version is 3 138 139 may raise ServiceError: if the version cannot be fetched 140 141 @rtype: int 142 """ 143 if self._version is None: 144 try: 145 url = self.root + self.VERSION_PATH 146 self._version = int(self.opener.open(url).read()) 147 except ValueError: 148 raise ServiceError("Could not parse a valid webservice version") 149 return self._version
150 @property
151 - def release(self):
152 """ 153 Returns the datawarehouse release 154 ================================= 155 156 Service.release S{->} string 157 158 The release is an arbitrary string used to distinguish 159 releases of the datawarehouse. This usually coincides 160 with updates to the data contained within. While a string, 161 releases usually sort in ascending order of recentness 162 (eg: "release-26", "release-27", "release-28"). They can also 163 have less machine readable meanings (eg: "beta") 164 165 @rtype: string 166 """ 167 if self._release is None: 168 self._release = urllib.urlopen(self.root + RELEASE_PATH).read() 169 return self._release
170
171 - def new_query(self):
172 """ 173 Construct a new Query object for the given webservice 174 ===================================================== 175 176 This is the standard method for instantiating new Query 177 objects. Queries require access to the data model, as well 178 as the service itself, so it is easiest to access them through 179 this factory method. 180 181 @return: L{intermine.query.Query} 182 """ 183 return Query(self.model, self)
184
185 - def get_template(self, name):
186 """ 187 Returns a template of the given name 188 ==================================== 189 190 Tries to retrieve a template of the given name 191 from the webservice. If you are trying to fetch 192 a private template (ie. one you made yourself 193 and is not available to others) then you may need to authenticate 194 195 @see: L{intermine.webservice.Service.__init__} 196 197 @param name: the template's name 198 @type name: string 199 200 @raise ServiceError: if the template does not exist 201 @raise QueryParseError: if the template cannot be parsed 202 203 @return: L{intermine.query.Template} 204 """ 205 try: 206 t = self.templates[name] 207 except KeyError: 208 raise ServiceError("There is no template called '" 209 + name + "' at this service") 210 if not isinstance(t, Template): 211 t = Template.from_xml(t, self.model, self) 212 self.templates[name] = t 213 return t
214 215 @property
216 - def templates(self):
217 """ 218 The dictionary of templates from the webservice 219 =============================================== 220 221 Service.templates S{->} dict(intermine.query.Template|string) 222 223 For efficiency's sake, Templates are not parsed until 224 they are required, and until then they are stored as XML 225 strings. It is recommended that in most cases you would want 226 to use L{Service.get_template}. 227 228 You can use this property however to test for template existence though:: 229 230 if name in service.templates: 231 template = service.get_template(name) 232 233 @rtype: dict 234 235 """ 236 if self._templates is None: 237 sock = self.opener.open(self.root + self.TEMPLATES_PATH) 238 dom = minidom.parse(sock) 239 sock.close() 240 templates = {} 241 for e in dom.getElementsByTagName('template'): 242 name = e.getAttribute('name') 243 if name in templates: 244 raise ServiceError("Two templates with same name: " + name) 245 else: 246 templates[name] = e.toxml() 247 self._templates = templates 248 return self._templates
249 250 @property
251 - def model(self):
252 """ 253 The data model for the webservice you are querying 254 ================================================== 255 256 Service.model S{->} L{intermine.model.Model} 257 258 This is used when constructing queries to provide them 259 with information on the structure of the data model 260 they are accessing. You are very unlikely to want to 261 access this object directly. 262 263 raises ModelParseError: if the model cannot be read 264 265 @rtype: L{intermine.model.Model} 266 267 """ 268 if self._model is None: 269 model_url = self.root + self.MODEL_PATH 270 self._model = Model(model_url) 271 return self._model
272
273 - def get_results(self, path, params, rowformat, view):
274 """ 275 Return an Iterator over the rows of the results 276 =============================================== 277 278 This method is called internally by the query objects 279 when they are called to get results. You will not 280 normally need to call it directly 281 282 @param path: The resource path (eg: "/query/results") 283 @type path: string 284 @param params: The query parameters for this request as a dictionary 285 @type params: dict 286 @param rowformat: One of "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects" 287 @type rowformat: string 288 @param view: The output columns 289 @type view: list 290 291 @raise WebserviceError: for failed requests 292 293 @return: L{intermine.webservice.ResultIterator} 294 """ 295 return ResultIterator(self.root, path, params, rowformat, view, self.opener)
296
297 - def get_results_list(self, path, params, rowformat, view):
298 """ 299 Return a list of the rows of the results 300 ======================================== 301 302 This method is called internally by the query objects 303 when they are called to get results. You will not 304 normally need to call it directly 305 306 @param path: The resource path (eg: "/query/results") 307 @type path: string 308 @param params: The query parameters for this request as a dictionary 309 @type params: dict 310 @param rowformat: One of "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects" 311 @type rowformat: string 312 @param view: The output columns 313 @type view: list 314 315 @raise WebserviceError: for failed requests 316 317 @return: a list of rows of data 318 """ 319 rows = self.get_results(path, params, rowformat, view) 320 return [r for r in rows]
321
322 -class ResultIterator(object):
323 324 PARSED_FORMATS = frozenset(["list", "dict"]) 325 STRING_FORMATS = frozenset(["tsv", "csv"]) 326 JSON_FORMATS = frozenset(["jsonrows", "jsonobjects"]) 327 ROW_FORMATS = PARSED_FORMATS | STRING_FORMATS | JSON_FORMATS 328
329 - def __init__(self, root, path, params, rowformat, view, opener):
330 """ 331 Constructor 332 =========== 333 334 Services are responsible for getting result iterators. You will 335 not need to create one manually. 336 337 @param root: The root path (eg: "http://www.flymine.org/query/service") 338 @type root: string 339 @param path: The resource path (eg: "/query/results") 340 @type path: string 341 @param params: The query parameters for this request 342 @type params: dict 343 @param rowformat: One of "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects" 344 @type rowformat: string 345 @param view: The output columns 346 @type view: list 347 @param opener: A url opener (user-agent) 348 @type opener: urllib.URLopener 349 350 @raise ValueError: if the row format is incorrect 351 @raise WebserviceError: if the request is unsuccessful 352 """ 353 if rowformat not in self.ROW_FORMATS: 354 raise ValueError("'" + rowformat + "' is not a valid row format:" + self.ROW_FORMATS) 355 356 if rowformat in self.PARSED_FORMATS: 357 params.update({"format" : "jsonrows"}) 358 else: 359 params.update({"format" : rowformat}) 360 361 url = root + path 362 data = urllib.urlencode(params) 363 con = opener.open(url, data) 364 self.reader = { 365 "tsv" : lambda: FlatFileIterator(con, EchoParser()), 366 "csv" : lambda: FlatFileIterator(con, EchoParser()), 367 "list" : lambda: JSONIterator(con, ListValueParser()), 368 "dict" : lambda: JSONIterator(con, DictValueParser(view)), 369 "jsonobjects" : lambda: JSONIterator(con, EchoParser()), 370 "jsonrows" : lambda: JSONIterator(con, EchoParser()) 371 }.get(rowformat)()
372
373 - def __iter__(self):
374 return self.reader
375
376 - def next(self):
377 """ 378 Returns the next row, in the appropriate format 379 380 @rtype: whatever the rowformat was determined to be 381 """ 382 return self.reader.next()
383
384 -class FlatFileIterator(object):
385 """ 386 An iterator for handling results returned as a flat file (TSV/CSV). 387 =================================================================== 388 389 This iterator can be used as the sub iterator in a ResultIterator 390 """ 391
392 - def __init__(self, connection, parser):
393 """ 394 Constructor 395 =========== 396 397 @param connection: The source of data 398 @type connection: socket.socket 399 @param parser: a handler for each row of data 400 @type parser: Parser 401 """ 402 self.connection = connection 403 self.parser = parser
404
405 - def __iter__(self):
406 return self
407
408 - def next(self):
409 """Return a parsed line of data""" 410 line = self.connection.next().strip() 411 if line.startswith("[ERROR]"): 412 raise WebserviceError(line) 413 return self.parser.parse(line)
414
415 -class JSONIterator(object):
416 """ 417 An iterator for handling results returned in the JSONRows format 418 ================================================================ 419 420 This iterator can be used as the sub iterator in a ResultIterator 421 """ 422
423 - def __init__(self, connection, parser):
424 """ 425 Constructor 426 =========== 427 428 @param connection: The source of data 429 @type connection: socket.socket 430 @param parser: a handler for each row of data 431 @type parser: Parser 432 """ 433 self.connection = connection 434 self.parser = parser 435 self.header = "" 436 self.footer = "" 437 self.parse_header()
438
439 - def __iter__(self):
440 return self
441
442 - def next(self):
443 """Returns a parsed row of data""" 444 return self.get_next_row_from_connection()
445
446 - def parse_header(self):
447 """Reads out the header information from the connection""" 448 try: 449 line = self.connection.next().strip() 450 self.header += line 451 if not line.endswith('"results":['): 452 self.parse_header() 453 except StopIteration: 454 raise WebserviceError("The connection returned a bad header" + self.header)
455
456 - def check_return_status(self):
457 """ 458 Perform status checks 459 ===================== 460 461 The footer containts information as to whether the result 462 set was successfully transferred in its entirety. This 463 method makes sure we don't silently accept an 464 incomplete result set. 465 466 @raise WebserviceError: if the footer indicates there was an error 467 """ 468 container = self.header + self.footer 469 info = None 470 try: 471 info = json.loads(container) 472 except: 473 raise WebserviceError("Error parsing JSON container: " + container) 474 475 if not info["wasSuccessful"]: 476 raise WebserviceError(info["statusCode"], info["error"])
477
479 """ 480 Reads the connection to get the next row, and sends it to the parser 481 482 @raise WebserviceError: if the connection is interrupted 483 """ 484 next_row = None 485 try: 486 line = self.connection.next() 487 if line.startswith("]"): 488 self.footer += line; 489 for otherline in self.connection: 490 self.footer += line 491 self.check_return_status() 492 else: 493 line = line.strip().strip(',') 494 row = json.loads(line) 495 next_row = self.parser.parse(row) 496 except StopIteration: 497 raise WebserviceError("Connection interrupted") 498 499 if next_row is None: 500 raise StopIteration 501 else: 502 return next_row
503
504 -class Parser(object):
505 """ 506 Base class for result line parsers 507 ================================== 508 509 Sub-class this class to gain a default constructor 510 511 """ 512
513 - def __init__(self, view=[]):
514 """ 515 Constructor 516 =========== 517 518 @param view: the list of output columns (default: []) 519 @type view: list 520 """ 521 self.view = view
522
523 - def parse(self, data):
524 """ 525 Abstract method - implementations must provide behaviour 526 527 @param data: a line of data 528 """ 529 raise UnimplementedError
530
531 -class EchoParser(Parser):
532 """ 533 A result parser that echoes its input 534 ===================================== 535 536 Use for parsing situations when you don't 537 actually want to change the data 538 """ 539
540 - def parse(self, data):
541 """ 542 Most basic parser - just returns the fed in data structure 543 544 @param data: the data from the result set 545 """ 546 return data
547
548 -class ListValueParser(Parser):
549 """ 550 A result parser that produces lists 551 =================================== 552 553 Parses jsonrow formatted rows into lists 554 of values. 555 """ 556
557 - def parse(self, row):
558 """ 559 Parse a row of JSON results into a list 560 561 @param row: a row of data from a result set 562 @type row: a JSON string 563 564 @rtype: list 565 """ 566 return [cell.get("value") for cell in row]
567
568 -class DictValueParser(Parser):
569 """ 570 A result parser that produces dictionaries 571 ========================================== 572 573 Parses jsonrow formatted rows into dictionaries 574 where the key is the view string for the cell, 575 and the value is the contents of the returned cell. 576 """ 577
578 - def parse(self, row):
579 """ 580 Parse a row of JSON results into a dictionary 581 582 @param row: a row of data from a result set 583 @type row: a JSON string 584 585 @rtype: dict 586 """ 587 pairs = zip(self.view, row) 588 return_dict = {} 589 for view, cell in pairs: 590 return_dict[view] = cell.get("value") 591 return return_dict
592
593 -class InterMineURLOpener(urllib.FancyURLopener):
594 """ 595 Specific implementation of urllib.FancyURLOpener for this client 596 ================================================================ 597 598 Provides user agent and authentication headers, and handling of errors 599 """ 600 version = "InterMine-Python-Client-0.96.00" 601
602 - def __init__(self, credentials=None):
603 """ 604 Constructor 605 =========== 606 607 InterMineURLOpener((username, password)) S{->} InterMineURLOpener 608 609 Return a new url-opener with the appropriate credentials 610 """ 611 urllib.FancyURLopener.__init__(self) 612 if credentials and len(credentials) == 2: 613 base64string = base64.encodestring('%s:%s' % credentials)[:-1] 614 self.addheader("Authorization", base64string) 615 self.using_authentication = True 616 else: 617 self.using_authentication = False
618
619 - def http_error_default(self, url, fp, errcode, errmsg, headers):
620 """Re-implementation of http_error_default, with content now supplied by default""" 621 content = fp.read() 622 fp.close() 623 raise WebserviceError(errcode, errmsg, content)
624
625 - def http_error_400(self, url, fp, errcode, errmsg, headers, data=None):
626 """ 627 Handle 400 HTTP errors, attempting to return informative error messages 628 ======================================================================= 629 630 400 errors indicate that something about our request was incorrect 631 632 @raise WebserviceError: in all circumstances 633 634 """ 635 content = fp.read() 636 fp.close() 637 raise WebserviceError("There was a problem with our request", errcode, errmsg, content)
638
639 - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
640 """ 641 Handle 401 HTTP errors, attempting to return informative error messages 642 ======================================================================= 643 644 401 errors indicate we don't have sufficient permission for the resource 645 we requested - usually a list or a tempate 646 647 @raise WebserviceError: in all circumstances 648 649 """ 650 content = fp.read() 651 fp.close() 652 if self.using_authentication: 653 raise WebserviceError("Insufficient permissions", errcode, errmsg, content) 654 else: 655 raise WebserviceError("No permissions - not logged in", errcode, errmsg, content)
656
657 - def http_error_404(self, url, fp, errcode, errmsg, headers, data=None):
658 """ 659 Handle 404 HTTP errors, attempting to return informative error messages 660 ======================================================================= 661 662 404 errors indicate that the requested resource does not exist - usually 663 a template that is not longer available. 664 665 @raise WebserviceError: in all circumstances 666 667 """ 668 content = fp.read() 669 fp.close() 670 raise WebserviceError("Missing resource", errcode, errmsg, content)
671 - def http_error_500(self, url, fp, errcode, errmsg, headers, data=None):
672 """ 673 Handle 500 HTTP errors, attempting to return informative error messages 674 ======================================================================= 675 676 500 errors indicate that the server borked during the request - ie: it wasn't 677 our fault. 678 679 @raise WebserviceError: in all circumstances 680 681 """ 682 content = fp.read() 683 fp.close() 684 raise WebserviceError("Internal server error", errcode, errmsg, content)
685
686 -class UnimplementedError(Exception):
687 pass
688
689 -class ServiceError(ReadableException):
690 """Errors in the creation and use of the Service object""" 691 pass
692 -class WebserviceError(IOError):
693 """Errors from interaction with the webservice""" 694 pass
695