1 from xml.dom import minidom
2 import urllib
3 from urlparse import urlparse
4 import base64
5 import UserDict
6
7
8
9
10
11
12
13
14
15
16 try:
17 import simplejson as json
18 except ImportError:
19 try:
20 import json
21 except ImportError:
22 raise ImportError("Could not find any JSON module to import - "
23 + "please install simplejson or jsonlib to continue")
24
25
26 from intermine.query import Query, Template
27 from intermine.model import Model, Attribute, Reference, Collection, Column
28 from intermine.lists.listmanager import ListManager
29 from intermine.errors import ServiceError, WebserviceError
30 from intermine.results import InterMineURLOpener, ResultIterator
31
32 """
33 Webservice Interaction Routines for InterMine Webservices
34 =========================================================
35
36 Classes for dealing with communication with an InterMine
37 RESTful webservice.
38
39 """
40
41 __author__ = "Alex Kalderimis"
42 __organization__ = "InterMine"
43 __license__ = "LGPL"
44 __contact__ = "dev@intermine.org"
45
46 -class Registry(object, UserDict.DictMixin):
47 """
48 A Class representing an InterMine registry.
49 ===========================================
50
51 Registries are web-services that mines can automatically register themselves
52 with, and thus enable service discovery by clients.
53
54 SYNOPSIS
55 --------
56
57 example::
58
59 from intermine.webservice import Registry
60
61 # Connect to the default registry service
62 # at www.intermine.org/registry
63 registry = Registry()
64
65 # Find all the available mines:
66 for name, mine in registry.items():
67 print name, mine.version
68
69 # Dict-like interface for accessing mines.
70 flymine = registry["flymine"]
71
72 # The mine object is a Service
73 for gene in flymine.select("Gene.*").results():
74 process(gene)
75
76 This class is meant to aid with interoperation between
77 mines by allowing them to discover one-another, and
78 allow users to always have correct connection information.
79 """
80
81 MINES_PATH = "/mines.json"
82
83 - def __init__(self, registry_url="http://www.intermine.org/registry"):
84 self.registry_url = registry_url
85 opener = InterMineURLOpener()
86 data = opener.open(registry_url + Registry.MINES_PATH).read()
87 mine_data = json.loads(data)
88 self.__mine_dict = dict(( (mine["name"], mine) for mine in mine_data["mines"]))
89 self.__synonyms = dict(( (name.lower(), name) for name in self.__mine_dict.keys() ))
90 self.__mine_cache = {}
91
93 return name.lower() in self.__synonyms
94
96 lc = name.lower()
97 if lc in self.__synonyms:
98 if lc not in self.__mine_cache:
99 self.__mine_cache[lc] = Service(self.__mine_dict[self.__synonyms[lc]]["webServiceRoot"])
100 return self.__mine_cache[lc]
101 else:
102 raise KeyError("Unknown mine: " + name)
103
105 raise NotImplementedError("You cannot add items to a registry")
106
108 raise NotImplementedError("You cannot remove items from a registry")
109
111 return self.__mine_dict.keys()
112
114 """
115 A class representing connections to different InterMine WebServices
116 ===================================================================
117
118 The intermine.webservice.Service class is the main interface for the user.
119 It will provide access to queries and templates, as well as doing the
120 background task of fetching the data model, and actually requesting
121 the query results.
122
123 SYNOPSIS
124 --------
125
126 example::
127
128 from intermine.webservice import Service
129 service = Service("http://www.flymine.org/query/service")
130
131 template = service.get_template("Gene_Pathways")
132 for row in template.results(A={"value":"zen"}):
133 do_something_with(row)
134 ...
135
136 query = service.new_query()
137 query.add_view("Gene.symbol", "Gene.pathway.name")
138 query.add_constraint("Gene", "LOOKUP", "zen")
139 for row in query.results():
140 do_something_with(row)
141 ...
142
143 new_list = service.create_list("some/file/with.ids", "Gene")
144 list_on_server = service.get_list("On server")
145 in_both = new_list & list_on_server
146 in_both.name = "Intersection of these lists"
147 for row in in_both:
148 do_something_with(row)
149 ...
150
151 OVERVIEW
152 --------
153 The two methods the user will be most concerned with are:
154 - L{Service.new_query}: constructs a new query to query a service with
155 - L{Service.get_template}: gets a template from the service
156 - L{ListManager.create_list}: creates a new list on the service
157
158 For list management information, see L{ListManager}.
159
160 TERMINOLOGY
161 -----------
162 X{Query} is the term for an arbitrarily complex structured request for
163 data from the webservice. The user is responsible for specifying the
164 structure that determines what records are returned, and what information
165 about each record is provided.
166
167 X{Template} is the term for a predefined "Query", ie: one that has been
168 written and saved on the webservice you will access. The definition
169 of the query is already done, but the user may want to specify the
170 values of the constraints that exist on the template. Templates are accessed
171 by name, and while you can easily introspect templates, it is assumed
172 you know what they do when you use them
173
174 X{List} is a saved result set containing a set of objects previously identified
175 in the database. Lists can be created and managed using this client library.
176
177 @see: L{intermine.query}
178 """
179 QUERY_PATH = '/query/results'
180 LIST_ENRICHMENT_PATH = '/list/enrichment'
181 WIDGETS_PATH = '/widgets'
182 QUERY_LIST_UPLOAD_PATH = '/query/tolist/json'
183 QUERY_LIST_APPEND_PATH = '/query/append/tolist/json'
184 MODEL_PATH = '/model'
185 TEMPLATES_PATH = '/templates/xml'
186 TEMPLATEQUERY_PATH = '/template/results'
187 LIST_PATH = '/lists/json'
188 LIST_CREATION_PATH = '/lists/json'
189 LIST_RENAME_PATH = '/lists/rename/json'
190 LIST_APPENDING_PATH = '/lists/append/json'
191 LIST_TAG_PATH = '/list/tags/json'
192 SAVEDQUERY_PATH = '/savedqueries/xml'
193 VERSION_PATH = '/version/ws'
194 RELEASE_PATH = '/version/release'
195 SCHEME = 'http://'
196 SERVICE_RESOLUTION_PATH = "/check/"
197
198 - def __init__(self, root,
199 username=None, password=None, token=None,
200 prefetch_depth=1, prefetch_id_only=False):
201 """
202 Constructor
203 ===========
204
205 Construct a connection to a webservice::
206
207 url = "http://www.flymine.org/query/service"
208
209 # An unauthenticated connection - access to all public data
210 service = Service(url)
211
212 # An authenticated connection - access to private and public data
213 service = Service(url, token="ABC123456")
214
215
216 @param root: the root url of the webservice (required)
217 @param username: your login name (optional)
218 @param password: your password (required if a username is given)
219 @param token: your API access token(optional - used in preference to username and password)
220
221 @raise ServiceError: if the version cannot be fetched and parsed
222 @raise ValueError: if a username is supplied, but no password
223
224 There are two alternative authentication systems supported by InterMine
225 webservices. The first is username and password authentication, which
226 is supported by all webservices. Newer webservices (version 6+)
227 also support API access token authentication, which is the recommended
228 system to use. Token access is more secure as you will never have
229 to transmit your username or password, and the token can be easily changed
230 or disabled without changing your webapp login details.
231
232 """
233 o = urlparse(root)
234 if not o.scheme: root = "http://" + root
235 if not root.endswith("/service"): root = root + "/service"
236
237 self.root = root
238 self.prefetch_depth = prefetch_depth
239 self.prefetch_id_only = prefetch_id_only
240
241 self._templates = None
242 self._model = None
243 self._version = None
244 self._release = None
245 self._widgets = None
246 self._list_manager = ListManager(self)
247 self.__missing_method_name = None
248 if token:
249 self.opener = InterMineURLOpener(token=token)
250 elif username:
251 if token:
252 raise ValueError("Both username and token credentials supplied")
253
254 if not password:
255 raise ValueError("Username given, but no password supplied")
256
257 self.opener = InterMineURLOpener((username, password))
258 else:
259 self.opener = InterMineURLOpener()
260
261 try:
262 self.version
263 except WebserviceError, e:
264 raise ServiceError("Could not validate service - is the root url (%s) correct? %s" % (root, e))
265
266 if token and self.version < 6:
267 raise ServiceError("This service does not support API access token authentication")
268
269
270 self.query = self.new_query
271
272
273
274
275 LIST_MANAGER_METHODS = frozenset(["get_list", "get_all_lists",
276 "get_all_list_names",
277 "create_list", "get_list_count", "delete_lists", "l"])
278
281
287
293
294 @property
296 """
297 Returns the webservice version
298 ==============================
299
300 The version specifies what capabilities a
301 specific webservice provides. The most current
302 version is 3
303
304 may raise ServiceError: if the version cannot be fetched
305
306 @rtype: int
307 """
308 if self._version is None:
309 try:
310 url = self.root + self.VERSION_PATH
311 self._version = int(self.opener.open(url).read())
312 except ValueError, e:
313 raise ServiceError("Could not parse a valid webservice version: " + str(e))
314 return self._version
315
320
321 @property
323 """
324 Returns the datawarehouse release
325 =================================
326
327 Service.release S{->} string
328
329 The release is an arbitrary string used to distinguish
330 releases of the datawarehouse. This usually coincides
331 with updates to the data contained within. While a string,
332 releases usually sort in ascending order of recentness
333 (eg: "release-26", "release-27", "release-28"). They can also
334 have less machine readable meanings (eg: "beta")
335
336 @rtype: string
337 """
338 if self._release is None:
339 self._release = urllib.urlopen(self.root + self.RELEASE_PATH).read()
340 return self._release
341
343 """
344 Construct a new Query object for the given webservice
345 =====================================================
346
347 This is the standard method for instantiating new Query
348 objects. Queries require access to the data model, as well
349 as the service itself, so it is easiest to access them through
350 this factory method.
351
352 @return: L{intermine.query.Query}
353 """
354 return Query.from_xml(xml, self.model, root=root)
355
356 - def select(self, *columns, **kwargs):
357 """
358 Construct a new Query object with the given columns selected.
359 =============================================================
360
361 As new_query, except that instead of a root class, a list of
362 output column expressions are passed instead.
363 """
364 if "xml" in kwargs:
365 return self.load_query(kwargs["xml"])
366 if len(columns) == 1:
367 view = columns[0]
368 if isinstance(view, Attribute):
369 return Query(self.model, self).select("%s.%s" % (view.declared_in.name, view))
370 if isinstance(view, Reference):
371 return Query(self.model, self).select("%s.%s.*" % (view.declared_in.name, view))
372 elif not isinstance(view, Column) and not str(view).endswith("*"):
373 path = self.model.make_path(view)
374 if not path.is_attribute():
375 return Query(self.model, self).select(str(view) + ".*")
376 return Query(self.model, self).select(*columns)
377
378 new_query = select
379
381 """
382 Returns a template of the given name
383 ====================================
384
385 Tries to retrieve a template of the given name
386 from the webservice. If you are trying to fetch
387 a private template (ie. one you made yourself
388 and is not available to others) then you may need to authenticate
389
390 @see: L{intermine.webservice.Service.__init__}
391
392 @param name: the template's name
393 @type name: string
394
395 @raise ServiceError: if the template does not exist
396 @raise QueryParseError: if the template cannot be parsed
397
398 @return: L{intermine.query.Template}
399 """
400 try:
401 t = self.templates[name]
402 except KeyError:
403 raise ServiceError("There is no template called '"
404 + name + "' at this service")
405 if not isinstance(t, Template):
406 t = Template.from_xml(t, self.model, self)
407 self.templates[name] = t
408 return t
409
410 @property
433
435 """
436 Flushes any cached data.
437 """
438 self._templates = None
439 self._model = None
440 self._version = None
441 self._release = None
442 self._widgets = None
443
444 @property
446 """
447 The dictionary of templates from the webservice
448 ===============================================
449
450 Service.templates S{->} dict(intermine.query.Template|string)
451
452 For efficiency's sake, Templates are not parsed until
453 they are required, and until then they are stored as XML
454 strings. It is recommended that in most cases you would want
455 to use L{Service.get_template}.
456
457 You can use this property however to test for template existence though::
458
459 if name in service.templates:
460 template = service.get_template(name)
461
462 @rtype: dict
463
464 """
465 if self._templates is None:
466 sock = self.opener.open(self.root + self.TEMPLATES_PATH)
467 dom = minidom.parse(sock)
468 sock.close()
469 templates = {}
470 for e in dom.getElementsByTagName('template'):
471 name = e.getAttribute('name')
472 if name in templates:
473 raise ServiceError("Two templates with same name: " + name)
474 else:
475 templates[name] = e.toxml()
476 self._templates = templates
477 return self._templates
478
479 @property
481 """
482 The data model for the webservice you are querying
483 ==================================================
484
485 Service.model S{->} L{intermine.model.Model}
486
487 This is used when constructing queries to provide them
488 with information on the structure of the data model
489 they are accessing. You are very unlikely to want to
490 access this object directly.
491
492 raises ModelParseError: if the model cannot be read
493
494 @rtype: L{intermine.model.Model}
495
496 """
497 if self._model is None:
498 model_url = self.root + self.MODEL_PATH
499 self._model = Model(model_url, self)
500 return self._model
501
502 - def get_results(self, path, params, rowformat, view, cld=None):
503 """
504 Return an Iterator over the rows of the results
505 ===============================================
506
507 This method is called internally by the query objects
508 when they are called to get results. You will not
509 normally need to call it directly
510
511 @param path: The resource path (eg: "/query/results")
512 @type path: string
513 @param params: The query parameters for this request as a dictionary
514 @type params: dict
515 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
516 @type rowformat: string
517 @param view: The output columns
518 @type view: list
519
520 @raise WebserviceError: for failed requests
521
522 @return: L{intermine.webservice.ResultIterator}
523 """
524 return ResultIterator(self, path, params, rowformat, view, cld)
525