Package intermine :: Module webservice
[hide private]
[frames] | no frames]

Source Code for Module intermine.webservice

  1  from xml.dom import minidom 
  2  import urllib 
  3  from urlparse import urlparse 
  4  import base64 
  5  import UserDict 
  6   
  7  #class UJsonLibDecoder(object): # pragma: no cover 
  8  #    def __init__(self): 
  9  #        self.loads = ujson.decode 
 10  # 
 11  # Use core json for 2.6+, simplejson for <=2.5 
 12  #try: 
 13  #    import ujson 
 14  #    json = UJsonLibDecoder() 
 15  #except ImportError: # pragma: no cover 
 16  try: 
 17      import simplejson as json # Prefer this as it is faster 
 18  except ImportError: # pragma: no cover 
 19      try: 
 20          import json 
 21      except ImportError: 
 22          raise ImportError("Could not find any JSON module to import - " 
 23              + "please install simplejson or jsonlib to continue") 
 24   
 25  # Local intermine imports 
 26  from intermine.query import Query, Template 
 27  from intermine.model import Model, Attribute, Reference, Collection, Column 
 28  from intermine.lists.listmanager import ListManager 
 29  from intermine.errors import ServiceError, WebserviceError 
 30  from intermine.results import InterMineURLOpener, ResultIterator 
 31   
 32  """ 
 33  Webservice Interaction Routines for InterMine Webservices 
 34  ========================================================= 
 35   
 36  Classes for dealing with communication with an InterMine 
 37  RESTful webservice. 
 38   
 39  """ 
 40   
 41  __author__ = "Alex Kalderimis" 
 42  __organization__ = "InterMine" 
 43  __license__ = "LGPL" 
 44  __contact__ = "dev@intermine.org" 
45 46 -class Registry(object, UserDict.DictMixin):
47 """ 48 A Class representing an InterMine registry. 49 =========================================== 50 51 Registries are web-services that mines can automatically register themselves 52 with, and thus enable service discovery by clients. 53 54 SYNOPSIS 55 -------- 56 57 example:: 58 59 from intermine.webservice import Registry 60 61 # Connect to the default registry service 62 # at www.intermine.org/registry 63 registry = Registry() 64 65 # Find all the available mines: 66 for name, mine in registry.items(): 67 print name, mine.version 68 69 # Dict-like interface for accessing mines. 70 flymine = registry["flymine"] 71 72 # The mine object is a Service 73 for gene in flymine.select("Gene.*").results(): 74 process(gene) 75 76 This class is meant to aid with interoperation between 77 mines by allowing them to discover one-another, and 78 allow users to always have correct connection information. 79 """ 80 81 MINES_PATH = "/mines.json" 82
83 - def __init__(self, registry_url="http://www.intermine.org/registry"):
84 self.registry_url = registry_url 85 opener = InterMineURLOpener() 86 data = opener.open(registry_url + Registry.MINES_PATH).read() 87 mine_data = json.loads(data) 88 self.__mine_dict = dict(( (mine["name"], mine) for mine in mine_data["mines"])) 89 self.__synonyms = dict(( (name.lower(), name) for name in self.__mine_dict.keys() )) 90 self.__mine_cache = {}
91
92 - def __contains__(self, name):
93 return name.lower() in self.__synonyms
94
95 - def __getitem__(self, name):
96 lc = name.lower() 97 if lc in self.__synonyms: 98 if lc not in self.__mine_cache: 99 self.__mine_cache[lc] = Service(self.__mine_dict[self.__synonyms[lc]]["webServiceRoot"]) 100 return self.__mine_cache[lc] 101 else: 102 raise KeyError("Unknown mine: " + name)
103
104 - def __setitem__(self, name, item):
105 raise NotImplementedError("You cannot add items to a registry")
106
107 - def __delitem__(self, name):
108 raise NotImplementedError("You cannot remove items from a registry")
109
110 - def keys(self):
111 return self.__mine_dict.keys()
112
113 -class Service(object):
114 """ 115 A class representing connections to different InterMine WebServices 116 =================================================================== 117 118 The intermine.webservice.Service class is the main interface for the user. 119 It will provide access to queries and templates, as well as doing the 120 background task of fetching the data model, and actually requesting 121 the query results. 122 123 SYNOPSIS 124 -------- 125 126 example:: 127 128 from intermine.webservice import Service 129 service = Service("http://www.flymine.org/query/service") 130 131 template = service.get_template("Gene_Pathways") 132 for row in template.results(A={"value":"zen"}): 133 do_something_with(row) 134 ... 135 136 query = service.new_query() 137 query.add_view("Gene.symbol", "Gene.pathway.name") 138 query.add_constraint("Gene", "LOOKUP", "zen") 139 for row in query.results(): 140 do_something_with(row) 141 ... 142 143 new_list = service.create_list("some/file/with.ids", "Gene") 144 list_on_server = service.get_list("On server") 145 in_both = new_list & list_on_server 146 in_both.name = "Intersection of these lists" 147 for row in in_both: 148 do_something_with(row) 149 ... 150 151 OVERVIEW 152 -------- 153 The two methods the user will be most concerned with are: 154 - L{Service.new_query}: constructs a new query to query a service with 155 - L{Service.get_template}: gets a template from the service 156 - L{ListManager.create_list}: creates a new list on the service 157 158 For list management information, see L{ListManager}. 159 160 TERMINOLOGY 161 ----------- 162 X{Query} is the term for an arbitrarily complex structured request for 163 data from the webservice. The user is responsible for specifying the 164 structure that determines what records are returned, and what information 165 about each record is provided. 166 167 X{Template} is the term for a predefined "Query", ie: one that has been 168 written and saved on the webservice you will access. The definition 169 of the query is already done, but the user may want to specify the 170 values of the constraints that exist on the template. Templates are accessed 171 by name, and while you can easily introspect templates, it is assumed 172 you know what they do when you use them 173 174 X{List} is a saved result set containing a set of objects previously identified 175 in the database. Lists can be created and managed using this client library. 176 177 @see: L{intermine.query} 178 """ 179 QUERY_PATH = '/query/results' 180 LIST_ENRICHMENT_PATH = '/list/enrichment' 181 WIDGETS_PATH = '/widgets' 182 QUERY_LIST_UPLOAD_PATH = '/query/tolist/json' 183 QUERY_LIST_APPEND_PATH = '/query/append/tolist/json' 184 MODEL_PATH = '/model' 185 TEMPLATES_PATH = '/templates/xml' 186 TEMPLATEQUERY_PATH = '/template/results' 187 LIST_PATH = '/lists/json' 188 LIST_CREATION_PATH = '/lists/json' 189 LIST_RENAME_PATH = '/lists/rename/json' 190 LIST_APPENDING_PATH = '/lists/append/json' 191 LIST_TAG_PATH = '/list/tags/json' 192 SAVEDQUERY_PATH = '/savedqueries/xml' 193 VERSION_PATH = '/version/ws' 194 RELEASE_PATH = '/version/release' 195 SCHEME = 'http://' 196 SERVICE_RESOLUTION_PATH = "/check/" 197
198 - def __init__(self, root, 199 username=None, password=None, token=None, 200 prefetch_depth=1, prefetch_id_only=False):
201 """ 202 Constructor 203 =========== 204 205 Construct a connection to a webservice:: 206 207 url = "http://www.flymine.org/query/service" 208 209 # An unauthenticated connection - access to all public data 210 service = Service(url) 211 212 # An authenticated connection - access to private and public data 213 service = Service(url, token="ABC123456") 214 215 216 @param root: the root url of the webservice (required) 217 @param username: your login name (optional) 218 @param password: your password (required if a username is given) 219 @param token: your API access token(optional - used in preference to username and password) 220 221 @raise ServiceError: if the version cannot be fetched and parsed 222 @raise ValueError: if a username is supplied, but no password 223 224 There are two alternative authentication systems supported by InterMine 225 webservices. The first is username and password authentication, which 226 is supported by all webservices. Newer webservices (version 6+) 227 also support API access token authentication, which is the recommended 228 system to use. Token access is more secure as you will never have 229 to transmit your username or password, and the token can be easily changed 230 or disabled without changing your webapp login details. 231 232 """ 233 o = urlparse(root) 234 if not o.scheme: root = "http://" + root 235 if not root.endswith("/service"): root = root + "/service" 236 237 self.root = root 238 self.prefetch_depth = prefetch_depth 239 self.prefetch_id_only = prefetch_id_only 240 # Initialize empty cached data. 241 self._templates = None 242 self._model = None 243 self._version = None 244 self._release = None 245 self._widgets = None 246 self._list_manager = ListManager(self) 247 self.__missing_method_name = None 248 if token: 249 self.opener = InterMineURLOpener(token=token) 250 elif username: 251 if token: 252 raise ValueError("Both username and token credentials supplied") 253 254 if not password: 255 raise ValueError("Username given, but no password supplied") 256 257 self.opener = InterMineURLOpener((username, password)) 258 else: 259 self.opener = InterMineURLOpener() 260 261 try: 262 self.version 263 except WebserviceError, e: 264 raise ServiceError("Could not validate service - is the root url (%s) correct? %s" % (root, e)) 265 266 if token and self.version < 6: 267 raise ServiceError("This service does not support API access token authentication") 268 269 # Set up sugary aliases 270 self.query = self.new_query
271 272 273 # Delegated list methods 274 275 LIST_MANAGER_METHODS = frozenset(["get_list", "get_all_lists", 276 "get_all_list_names", 277 "create_list", "get_list_count", "delete_lists", "l"]) 278
279 - def __getattribute__(self, name):
280 return object.__getattribute__(self, name)
281
282 - def __getattr__(self, name):
283 if name in self.LIST_MANAGER_METHODS: 284 method = getattr(self._list_manager, name) 285 return method 286 raise AttributeError("Could not find " + name)
287
288 - def __del__(self):
289 try: 290 self._list_manager.delete_temporary_lists() 291 except ReferenceError: 292 pass
293 294 @property
295 - def version(self):
296 """ 297 Returns the webservice version 298 ============================== 299 300 The version specifies what capabilities a 301 specific webservice provides. The most current 302 version is 3 303 304 may raise ServiceError: if the version cannot be fetched 305 306 @rtype: int 307 """ 308 if self._version is None: 309 try: 310 url = self.root + self.VERSION_PATH 311 self._version = int(self.opener.open(url).read()) 312 except ValueError, e: 313 raise ServiceError("Could not parse a valid webservice version: " + str(e)) 314 return self._version
315
316 - def resolve_service_path(self, variant):
317 """Resolve the path to optional services""" 318 url = self.root + self.SERVICE_RESOLUTION_PATH + variant 319 return self.opener.open(url).read()
320 321 @property
322 - def release(self):
323 """ 324 Returns the datawarehouse release 325 ================================= 326 327 Service.release S{->} string 328 329 The release is an arbitrary string used to distinguish 330 releases of the datawarehouse. This usually coincides 331 with updates to the data contained within. While a string, 332 releases usually sort in ascending order of recentness 333 (eg: "release-26", "release-27", "release-28"). They can also 334 have less machine readable meanings (eg: "beta") 335 336 @rtype: string 337 """ 338 if self._release is None: 339 self._release = urllib.urlopen(self.root + self.RELEASE_PATH).read() 340 return self._release
341
342 - def load_query(self, xml, root=None):
343 """ 344 Construct a new Query object for the given webservice 345 ===================================================== 346 347 This is the standard method for instantiating new Query 348 objects. Queries require access to the data model, as well 349 as the service itself, so it is easiest to access them through 350 this factory method. 351 352 @return: L{intermine.query.Query} 353 """ 354 return Query.from_xml(xml, self.model, root=root)
355
356 - def select(self, *columns, **kwargs):
357 """ 358 Construct a new Query object with the given columns selected. 359 ============================================================= 360 361 As new_query, except that instead of a root class, a list of 362 output column expressions are passed instead. 363 """ 364 if "xml" in kwargs: 365 return self.load_query(kwargs["xml"]) 366 if len(columns) == 1: 367 view = columns[0] 368 if isinstance(view, Attribute): 369 return Query(self.model, self).select("%s.%s" % (view.declared_in.name, view)) 370 if isinstance(view, Reference): 371 return Query(self.model, self).select("%s.%s.*" % (view.declared_in.name, view)) 372 elif not isinstance(view, Column) and not str(view).endswith("*"): 373 path = self.model.make_path(view) 374 if not path.is_attribute(): 375 return Query(self.model, self).select(str(view) + ".*") 376 return Query(self.model, self).select(*columns)
377 378 new_query = select 379
380 - def get_template(self, name):
381 """ 382 Returns a template of the given name 383 ==================================== 384 385 Tries to retrieve a template of the given name 386 from the webservice. If you are trying to fetch 387 a private template (ie. one you made yourself 388 and is not available to others) then you may need to authenticate 389 390 @see: L{intermine.webservice.Service.__init__} 391 392 @param name: the template's name 393 @type name: string 394 395 @raise ServiceError: if the template does not exist 396 @raise QueryParseError: if the template cannot be parsed 397 398 @return: L{intermine.query.Template} 399 """ 400 try: 401 t = self.templates[name] 402 except KeyError: 403 raise ServiceError("There is no template called '" 404 + name + "' at this service") 405 if not isinstance(t, Template): 406 t = Template.from_xml(t, self.model, self) 407 self.templates[name] = t 408 return t
409 410 @property
411 - def widgets(self):
412 """ 413 The dictionary of widgets from the webservice 414 ============================================== 415 416 The set of widgets available to a service does not 417 change between releases, so they are cached. 418 If you are running a long running process, you may 419 wish to periodically dump the cache by calling 420 L{Service.flush}, or simply get a new Service object. 421 422 @return dict 423 """ 424 if self._widgets is None: 425 sock = self.opener.open(self.root + self.WIDGETS_PATH) 426 text = sock.read() 427 sock.close() 428 data = json.loads(text) 429 if data['error'] is not None: 430 raise ServiceError(data['error']) 431 self._widgets = dict(([w['name'], w] for w in data['widgets'])) 432 return self._widgets
433
434 - def flush(self):
435 """ 436 Flushes any cached data. 437 """ 438 self._templates = None 439 self._model = None 440 self._version = None 441 self._release = None 442 self._widgets = None
443 444 @property
445 - def templates(self):
446 """ 447 The dictionary of templates from the webservice 448 =============================================== 449 450 Service.templates S{->} dict(intermine.query.Template|string) 451 452 For efficiency's sake, Templates are not parsed until 453 they are required, and until then they are stored as XML 454 strings. It is recommended that in most cases you would want 455 to use L{Service.get_template}. 456 457 You can use this property however to test for template existence though:: 458 459 if name in service.templates: 460 template = service.get_template(name) 461 462 @rtype: dict 463 464 """ 465 if self._templates is None: 466 sock = self.opener.open(self.root + self.TEMPLATES_PATH) 467 dom = minidom.parse(sock) 468 sock.close() 469 templates = {} 470 for e in dom.getElementsByTagName('template'): 471 name = e.getAttribute('name') 472 if name in templates: 473 raise ServiceError("Two templates with same name: " + name) 474 else: 475 templates[name] = e.toxml() 476 self._templates = templates 477 return self._templates
478 479 @property
480 - def model(self):
481 """ 482 The data model for the webservice you are querying 483 ================================================== 484 485 Service.model S{->} L{intermine.model.Model} 486 487 This is used when constructing queries to provide them 488 with information on the structure of the data model 489 they are accessing. You are very unlikely to want to 490 access this object directly. 491 492 raises ModelParseError: if the model cannot be read 493 494 @rtype: L{intermine.model.Model} 495 496 """ 497 if self._model is None: 498 model_url = self.root + self.MODEL_PATH 499 self._model = Model(model_url, self) 500 return self._model
501
502 - def get_results(self, path, params, rowformat, view, cld=None):
503 """ 504 Return an Iterator over the rows of the results 505 =============================================== 506 507 This method is called internally by the query objects 508 when they are called to get results. You will not 509 normally need to call it directly 510 511 @param path: The resource path (eg: "/query/results") 512 @type path: string 513 @param params: The query parameters for this request as a dictionary 514 @type params: dict 515 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects" 516 @type rowformat: string 517 @param view: The output columns 518 @type view: list 519 520 @raise WebserviceError: for failed requests 521 522 @return: L{intermine.webservice.ResultIterator} 523 """ 524 return ResultIterator(self, path, params, rowformat, view, cld)
525