1 from xml.dom import minidom
2 import urllib
3 from urlparse import urlparse
4 import base64
5 import UserDict
6
7
8
9
10
11
12
13
14
15
16 try:
17 import simplejson as json
18 except ImportError:
19 try:
20 import json
21 except ImportError:
22 raise ImportError("Could not find any JSON module to import - "
23 + "please install simplejson or jsonlib to continue")
24
25
26 from intermine.query import Query, Template
27 from intermine.model import Model, Attribute, Reference, Collection, Column
28 from intermine.lists.listmanager import ListManager
29 from intermine.errors import ServiceError, WebserviceError
30 from intermine.results import InterMineURLOpener, ResultIterator
31
32 """
33 Webservice Interaction Routines for InterMine Webservices
34 =========================================================
35
36 Classes for dealing with communication with an InterMine
37 RESTful webservice.
38
39 """
40
41 __author__ = "Alex Kalderimis"
42 __organization__ = "InterMine"
43 __license__ = "LGPL"
44 __contact__ = "dev@intermine.org"
45
46 -class Registry(object, UserDict.DictMixin):
47 """
48 A Class representing an InterMine registry.
49 ===========================================
50
51 Registries are web-services that mines can automatically register themselves
52 with, and thus enable service discovery by clients.
53
54 SYNOPSIS
55 --------
56
57 example::
58
59 from intermine.webservice import Registry
60
61 # Connect to the default registry service
62 # at www.intermine.org/registry
63 registry = Registry()
64
65 # Find all the available mines:
66 for name, mine in registry.items():
67 print name, mine.version
68
69 # Dict-like interface for accessing mines.
70 flymine = registry["flymine"]
71
72 # The mine object is a Service
73 for gene in flymine.select("Gene.*").results():
74 process(gene)
75
76 This class is meant to aid with interoperation between
77 mines by allowing them to discover one-another, and
78 allow users to always have correct connection information.
79 """
80
81 MINES_PATH = "/mines.json"
82
83 - def __init__(self, registry_url="http://www.intermine.org/registry"):
84 self.registry_url = registry_url
85 opener = InterMineURLOpener()
86 data = opener.open(registry_url + Registry.MINES_PATH).read()
87 mine_data = json.loads(data)
88 self.__mine_dict = dict(( (mine["name"], mine) for mine in mine_data["mines"]))
89 self.__synonyms = dict(( (name.lower(), name) for name in self.__mine_dict.keys() ))
90 self.__mine_cache = {}
91
93 return name.lower() in self.__synonyms
94
96 lc = name.lower()
97 if lc in self.__synonyms:
98 if lc not in self.__mine_cache:
99 self.__mine_cache[lc] = Service(self.__mine_dict[self.__synonyms[lc]]["webServiceRoot"])
100 return self.__mine_cache[lc]
101 else:
102 raise KeyError("Unknown mine: " + name)
103
105 raise NotImplementedError("You cannot add items to a registry")
106
108 raise NotImplementedError("You cannot remove items from a registry")
109
111 return self.__mine_dict.keys()
112
114 """
115 A class representing connections to different InterMine WebServices
116 ===================================================================
117
118 The intermine.webservice.Service class is the main interface for the user.
119 It will provide access to queries and templates, as well as doing the
120 background task of fetching the data model, and actually requesting
121 the query results.
122
123 SYNOPSIS
124 --------
125
126 example::
127
128 from intermine.webservice import Service
129 service = Service("http://www.flymine.org/query/service")
130
131 template = service.get_template("Gene_Pathways")
132 for row in template.results(A={"value":"zen"}):
133 do_something_with(row)
134 ...
135
136 query = service.new_query()
137 query.add_view("Gene.symbol", "Gene.pathway.name")
138 query.add_constraint("Gene", "LOOKUP", "zen")
139 for row in query.results():
140 do_something_with(row)
141 ...
142
143 new_list = service.create_list("some/file/with.ids", "Gene")
144 list_on_server = service.get_list("On server")
145 in_both = new_list & list_on_server
146 in_both.name = "Intersection of these lists"
147 for row in in_both:
148 do_something_with(row)
149 ...
150
151 OVERVIEW
152 --------
153 The two methods the user will be most concerned with are:
154 - L{Service.new_query}: constructs a new query to query a service with
155 - L{Service.get_template}: gets a template from the service
156 - L{ListManager.create_list}: creates a new list on the service
157
158 For list management information, see L{ListManager}.
159
160 TERMINOLOGY
161 -----------
162 X{Query} is the term for an arbitrarily complex structured request for
163 data from the webservice. The user is responsible for specifying the
164 structure that determines what records are returned, and what information
165 about each record is provided.
166
167 X{Template} is the term for a predefined "Query", ie: one that has been
168 written and saved on the webservice you will access. The definition
169 of the query is already done, but the user may want to specify the
170 values of the constraints that exist on the template. Templates are accessed
171 by name, and while you can easily introspect templates, it is assumed
172 you know what they do when you use them
173
174 X{List} is a saved result set containing a set of objects previously identified
175 in the database. Lists can be created and managed using this client library.
176
177 @see: L{intermine.query}
178 """
179 QUERY_PATH = '/query/results'
180 LIST_ENRICHMENT_PATH = '/list/enrichment'
181 QUERY_LIST_UPLOAD_PATH = '/query/tolist/json'
182 QUERY_LIST_APPEND_PATH = '/query/append/tolist/json'
183 MODEL_PATH = '/model'
184 TEMPLATES_PATH = '/templates/xml'
185 TEMPLATEQUERY_PATH = '/template/results'
186 LIST_PATH = '/lists/json'
187 LIST_CREATION_PATH = '/lists/json'
188 LIST_RENAME_PATH = '/lists/rename/json'
189 LIST_APPENDING_PATH = '/lists/append/json'
190 LIST_TAG_PATH = '/list/tags/json'
191 SAVEDQUERY_PATH = '/savedqueries/xml'
192 VERSION_PATH = '/version/ws'
193 RELEASE_PATH = '/version/release'
194 SCHEME = 'http://'
195 SERVICE_RESOLUTION_PATH = "/check/"
196
197 - def __init__(self, root,
198 username=None, password=None, token=None,
199 prefetch_depth=1, prefetch_id_only=False):
200 """
201 Constructor
202 ===========
203
204 Construct a connection to a webservice::
205
206 url = "http://www.flymine.org/query/service"
207
208 # An unauthenticated connection - access to all public data
209 service = Service(url)
210
211 # An authenticated connection - access to private and public data
212 service = Service(url, token="ABC123456")
213
214
215 @param root: the root url of the webservice (required)
216 @param username: your login name (optional)
217 @param password: your password (required if a username is given)
218 @param token: your API access token(optional - used in preference to username and password)
219
220 @raise ServiceError: if the version cannot be fetched and parsed
221 @raise ValueError: if a username is supplied, but no password
222
223 There are two alternative authentication systems supported by InterMine
224 webservices. The first is username and password authentication, which
225 is supported by all webservices. Newer webservices (version 6+)
226 also support API access token authentication, which is the recommended
227 system to use. Token access is more secure as you will never have
228 to transmit your username or password, and the token can be easily changed
229 or disabled without changing your webapp login details.
230
231 """
232 o = urlparse(root)
233 if not o.scheme: root = "http://" + root
234 if not root.endswith("/service"): root = root + "/service"
235
236 self.root = root
237 self.prefetch_depth = prefetch_depth
238 self.prefetch_id_only = prefetch_id_only
239 self._templates = None
240 self._model = None
241 self._version = None
242 self._release = None
243 self._list_manager = ListManager(self)
244 self.__missing_method_name = None
245 if token:
246 self.opener = InterMineURLOpener(token=token)
247 elif username:
248 if token:
249 raise ValueError("Both username and token credentials supplied")
250
251 if not password:
252 raise ValueError("Username given, but no password supplied")
253
254 self.opener = InterMineURLOpener((username, password))
255 else:
256 self.opener = InterMineURLOpener()
257
258 try:
259 self.version
260 except WebserviceError, e:
261 raise ServiceError("Could not validate service - is the root url (%s) correct? %s" % (root, e))
262
263 if token and self.version < 6:
264 raise ServiceError("This service does not support API access token authentication")
265
266
267 self.query = self.new_query
268
269
270
271
272 LIST_MANAGER_METHODS = frozenset(["get_list", "get_all_lists",
273 "get_all_list_names",
274 "create_list", "get_list_count", "delete_lists", "l"])
275
278
284
290
291 @property
293 """
294 Returns the webservice version
295 ==============================
296
297 The version specifies what capabilities a
298 specific webservice provides. The most current
299 version is 3
300
301 may raise ServiceError: if the version cannot be fetched
302
303 @rtype: int
304 """
305 if self._version is None:
306 try:
307 url = self.root + self.VERSION_PATH
308 self._version = int(self.opener.open(url).read())
309 except ValueError, e:
310 raise ServiceError("Could not parse a valid webservice version: " + str(e))
311 return self._version
312
317
318 @property
320 """
321 Returns the datawarehouse release
322 =================================
323
324 Service.release S{->} string
325
326 The release is an arbitrary string used to distinguish
327 releases of the datawarehouse. This usually coincides
328 with updates to the data contained within. While a string,
329 releases usually sort in ascending order of recentness
330 (eg: "release-26", "release-27", "release-28"). They can also
331 have less machine readable meanings (eg: "beta")
332
333 @rtype: string
334 """
335 if self._release is None:
336 self._release = urllib.urlopen(self.root + self.RELEASE_PATH).read()
337 return self._release
338
340 """
341 Construct a new Query object for the given webservice
342 =====================================================
343
344 This is the standard method for instantiating new Query
345 objects. Queries require access to the data model, as well
346 as the service itself, so it is easiest to access them through
347 this factory method.
348
349 @return: L{intermine.query.Query}
350 """
351 return Query.from_xml(xml, self.model, root=root)
352
353 - def select(self, *columns, **kwargs):
354 """
355 Construct a new Query object with the given columns selected.
356 =============================================================
357
358 As new_query, except that instead of a root class, a list of
359 output column expressions are passed instead.
360 """
361 if "xml" in kwargs:
362 return self.load_query(kwargs["xml"])
363 if len(columns) == 1:
364 view = columns[0]
365 if isinstance(view, Attribute):
366 return Query(self.model, self).select("%s.%s" % (view.declared_in.name, view))
367 if isinstance(view, Reference):
368 return Query(self.model, self).select("%s.%s.*" % (view.declared_in.name, view))
369 elif not isinstance(view, Column) and not str(view).endswith("*"):
370 path = self.model.make_path(view)
371 if not path.is_attribute():
372 return Query(self.model, self).select(str(view) + ".*")
373 return Query(self.model, self).select(*columns)
374
375 new_query = select
376
378 """
379 Returns a template of the given name
380 ====================================
381
382 Tries to retrieve a template of the given name
383 from the webservice. If you are trying to fetch
384 a private template (ie. one you made yourself
385 and is not available to others) then you may need to authenticate
386
387 @see: L{intermine.webservice.Service.__init__}
388
389 @param name: the template's name
390 @type name: string
391
392 @raise ServiceError: if the template does not exist
393 @raise QueryParseError: if the template cannot be parsed
394
395 @return: L{intermine.query.Template}
396 """
397 try:
398 t = self.templates[name]
399 except KeyError:
400 raise ServiceError("There is no template called '"
401 + name + "' at this service")
402 if not isinstance(t, Template):
403 t = Template.from_xml(t, self.model, self)
404 self.templates[name] = t
405 return t
406
407 @property
409 """
410 The dictionary of templates from the webservice
411 ===============================================
412
413 Service.templates S{->} dict(intermine.query.Template|string)
414
415 For efficiency's sake, Templates are not parsed until
416 they are required, and until then they are stored as XML
417 strings. It is recommended that in most cases you would want
418 to use L{Service.get_template}.
419
420 You can use this property however to test for template existence though::
421
422 if name in service.templates:
423 template = service.get_template(name)
424
425 @rtype: dict
426
427 """
428 if self._templates is None:
429 sock = self.opener.open(self.root + self.TEMPLATES_PATH)
430 dom = minidom.parse(sock)
431 sock.close()
432 templates = {}
433 for e in dom.getElementsByTagName('template'):
434 name = e.getAttribute('name')
435 if name in templates:
436 raise ServiceError("Two templates with same name: " + name)
437 else:
438 templates[name] = e.toxml()
439 self._templates = templates
440 return self._templates
441
442 @property
444 """
445 The data model for the webservice you are querying
446 ==================================================
447
448 Service.model S{->} L{intermine.model.Model}
449
450 This is used when constructing queries to provide them
451 with information on the structure of the data model
452 they are accessing. You are very unlikely to want to
453 access this object directly.
454
455 raises ModelParseError: if the model cannot be read
456
457 @rtype: L{intermine.model.Model}
458
459 """
460 if self._model is None:
461 model_url = self.root + self.MODEL_PATH
462 self._model = Model(model_url, self)
463 return self._model
464
465 - def get_results(self, path, params, rowformat, view, cld=None):
466 """
467 Return an Iterator over the rows of the results
468 ===============================================
469
470 This method is called internally by the query objects
471 when they are called to get results. You will not
472 normally need to call it directly
473
474 @param path: The resource path (eg: "/query/results")
475 @type path: string
476 @param params: The query parameters for this request as a dictionary
477 @type params: dict
478 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
479 @type rowformat: string
480 @param view: The output columns
481 @type view: list
482
483 @raise WebserviceError: for failed requests
484
485 @return: L{intermine.webservice.ResultIterator}
486 """
487 return ResultIterator(self, path, params, rowformat, view, cld)
488