1 from urlparse import urlunsplit, urljoin
2 from xml.dom import minidom
3 import urllib
4 import csv
5 import base64
6
7
8 try:
9 import json
10 except ImportError:
11 import simplejson as json
12
13
14 from .query import Query, Template
15 from .model import Model
16 from .util import ReadableException
17
18 """
19 Webservice Interaction Routines for InterMine Webservices
20 =========================================================
21
22 Classes for dealing with communication with an InterMine
23 RESTful webservice.
24
25 """
26
27 __author__ = "Alex Kalderimis"
28 __organization__ = "InterMine"
29 __license__ = "LGPL"
30 __contact__ = "dev@intermine.org"
33 """
34 A class representing connections to different InterMine WebServices
35 ===================================================================
36
37 The intermine.webservice.Service class is the main interface for the user.
38 It will provide access to queries and templates, as well as doing the
39 background task of fetching the data model, and actually requesting
40 the query results.
41
42 SYNOPSIS
43 --------
44
45 example::
46
47 from intermine.webservice import Service
48 service = Service("http://www.flymine.org/query/service")
49
50 template = service.get_template("Gene_Pathways")
51 for row in template.results(A={"value":"zen"}):
52 do_something_with(row)
53 ...
54
55 query = service.new_query()
56 query.add_view("Gene.symbol", "Gene.pathway.name")
57 query.add_constraint("Gene", "LOOKUP", "zen")
58 for row in query.results():
59 do_something_with(row)
60 ...
61
62 OVERVIEW
63 --------
64 The two methods the user will be most concerned with are:
65 - L{Service.new_query}: constructs a new query to query a service with
66 - L{Service.get_template}: gets a template from the service
67
68 TERMINOLOGY
69 -----------
70 X{Query} is the term for an arbitrarily complex structured request for
71 data from the webservice. The user is responsible for specifying the
72 structure that determines what records are returned, and what information
73 about each record is provided.
74
75 X{Template} is the term for a predefined "Query", ie: one that has been
76 written and saved on the webservice you will access. The definition
77 of the query is already done, but the user may want to specify the
78 values of the constraints that exist on the template. Templates are accessed
79 by name, and while you can easily introspect templates, it is assumed
80 you know what they do when you use them
81
82 @see: L{intermine.query}
83 """
84 QUERY_PATH = '/query/results'
85 MODEL_PATH = '/model'
86 TEMPLATES_PATH = '/templates/xml'
87 TEMPLATEQUERY_PATH = '/template/results'
88 VERSION_PATH = '/version'
89 USER_AGENT = 'WebserviceInterMinePerlAPIClient'
90 LIST_PATH = '/lists/xml'
91 SAVEDQUERY_PATH = '/savedqueries/xml'
92 RELEASE_PATH = '/version/release'
93 SCHEME = 'http://'
94
95 - def __init__(self, root, username=None, password=None):
96 """
97 Constructor
98 ===========
99
100 Construct a connection to a webservice::
101
102 service = Service("http://www.flymine.org/query/service")
103
104 @param root: the root url of the webservice (required)
105 @param username: your login name (optional)
106 @param password: your password (required if a username is given)
107
108 @raise ServiceError: if the version cannot be fetched and parsed
109 @raise ValueError: if a username is supplied, but no password
110 """
111 self.root = root
112 self._templates = None
113 self._model = None
114 self._version = None
115 self._release = None
116 if username:
117 if not password:
118 raise ValueError("No password supplied")
119 self.opener = InterMineURLOpener((username, password))
120 else:
121 self.opener = InterMineURLOpener()
122
123
124
125
126
127
128
129 @property
131 """
132 Returns the webservice version
133 ==============================
134
135 The version specifies what capabilities a
136 specific webservice provides. The most current
137 version is 3
138
139 may raise ServiceError: if the version cannot be fetched
140
141 @rtype: int
142 """
143 if self._version is None:
144 try:
145 url = self.root + self.VERSION_PATH
146 self._version = int(self.opener.open(url).read())
147 except ValueError:
148 raise ServiceError("Could not parse a valid webservice version")
149 return self._version
150 @property
152 """
153 Returns the datawarehouse release
154 =================================
155
156 Service.release S{->} string
157
158 The release is an arbitrary string used to distinguish
159 releases of the datawarehouse. This usually coincides
160 with updates to the data contained within. While a string,
161 releases usually sort in ascending order of recentness
162 (eg: "release-26", "release-27", "release-28"). They can also
163 have less machine readable meanings (eg: "beta")
164
165 @rtype: string
166 """
167 if self._release is None:
168 self._release = urllib.urlopen(self.root + RELEASE_PATH).read()
169 return self._release
170
172 """
173 Construct a new Query object for the given webservice
174 =====================================================
175
176 This is the standard method for instantiating new Query
177 objects. Queries require access to the data model, as well
178 as the service itself, so it is easiest to access them through
179 this factory method.
180
181 @return: L{intermine.query.Query}
182 """
183 return Query(self.model, self)
184
186 """
187 Returns a template of the given name
188 ====================================
189
190 Tries to retrieve a template of the given name
191 from the webservice. If you are trying to fetch
192 a private template (ie. one you made yourself
193 and is not available to others) then you may need to authenticate
194
195 @see: L{intermine.webservice.Service.__init__}
196
197 @param name: the template's name
198 @type name: string
199
200 @raise ServiceError: if the template does not exist
201 @raise QueryParseError: if the template cannot be parsed
202
203 @return: L{intermine.query.Template}
204 """
205 try:
206 t = self.templates[name]
207 except KeyError:
208 raise ServiceError("There is no template called '"
209 + name + "' at this service")
210 if not isinstance(t, Template):
211 t = Template.from_xml(t, self.model, self)
212 self.templates[name] = t
213 return t
214
215 @property
217 """
218 The dictionary of templates from the webservice
219 ===============================================
220
221 Service.templates S{->} dict(intermine.query.Template|string)
222
223 For efficiency's sake, Templates are not parsed until
224 they are required, and until then they are stored as XML
225 strings. It is recommended that in most cases you would want
226 to use L{Service.get_template}.
227
228 You can use this property however to test for template existence though::
229
230 if name in service.templates:
231 template = service.get_template(name)
232
233 @rtype: dict
234
235 """
236 if self._templates is None:
237 sock = self.opener.open(self.root + self.TEMPLATES_PATH)
238 dom = minidom.parse(sock)
239 sock.close()
240 templates = {}
241 for e in dom.getElementsByTagName('template'):
242 name = e.getAttribute('name')
243 if name in templates:
244 raise ServiceError("Two templates with same name: " + name)
245 else:
246 templates[name] = e.toxml()
247 self._templates = templates
248 return self._templates
249
250 @property
252 """
253 The data model for the webservice you are querying
254 ==================================================
255
256 Service.model S{->} L{intermine.model.Model}
257
258 This is used when constructing queries to provide them
259 with information on the structure of the data model
260 they are accessing. You are very unlikely to want to
261 access this object directly.
262
263 raises ModelParseError: if the model cannot be read
264
265 @rtype: L{intermine.model.Model}
266
267 """
268 if self._model is None:
269 model_url = self.root + self.MODEL_PATH
270 self._model = Model(model_url)
271 return self._model
272
274 """
275 Return an Iterator over the rows of the results
276 ===============================================
277
278 This method is called internally by the query objects
279 when they are called to get results. You will not
280 normally need to call it directly
281
282 @param path: The resource path (eg: "/query/results")
283 @type path: string
284 @param params: The query parameters for this request as a dictionary
285 @type params: dict
286 @param rowformat: One of "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
287 @type rowformat: string
288 @param view: The output columns
289 @type view: list
290
291 @raise WebserviceError: for failed requests
292
293 @return: L{intermine.webservice.ResultIterator}
294 """
295 return ResultIterator(self.root, path, params, rowformat, view, self.opener)
296
298 """
299 Return a list of the rows of the results
300 ========================================
301
302 This method is called internally by the query objects
303 when they are called to get results. You will not
304 normally need to call it directly
305
306 @param path: The resource path (eg: "/query/results")
307 @type path: string
308 @param params: The query parameters for this request as a dictionary
309 @type params: dict
310 @param rowformat: One of "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
311 @type rowformat: string
312 @param view: The output columns
313 @type view: list
314
315 @raise WebserviceError: for failed requests
316
317 @return: a list of rows of data
318 """
319 rows = self.get_results(path, params, rowformat, view)
320 return [r for r in rows]
321
323
324 PARSED_FORMATS = frozenset(["list", "dict"])
325 STRING_FORMATS = frozenset(["tsv", "csv"])
326 JSON_FORMATS = frozenset(["jsonrows", "jsonobjects"])
327 ROW_FORMATS = PARSED_FORMATS | STRING_FORMATS | JSON_FORMATS
328
329 - def __init__(self, root, path, params, rowformat, view, opener):
330 """
331 Constructor
332 ===========
333
334 Services are responsible for getting result iterators. You will
335 not need to create one manually.
336
337 @param root: The root path (eg: "http://www.flymine.org/query/service")
338 @type root: string
339 @param path: The resource path (eg: "/query/results")
340 @type path: string
341 @param params: The query parameters for this request
342 @type params: dict
343 @param rowformat: One of "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
344 @type rowformat: string
345 @param view: The output columns
346 @type view: list
347 @param opener: A url opener (user-agent)
348 @type opener: urllib.URLopener
349
350 @raise ValueError: if the row format is incorrect
351 @raise WebserviceError: if the request is unsuccessful
352 """
353 if rowformat not in self.ROW_FORMATS:
354 raise ValueError("'" + rowformat + "' is not a valid row format:" + self.ROW_FORMATS)
355
356 if rowformat in self.PARSED_FORMATS:
357 params.update({"format" : "jsonrows"})
358 else:
359 params.update({"format" : rowformat})
360
361 url = root + path
362 data = urllib.urlencode(params)
363 con = opener.open(url, data)
364 self.reader = {
365 "tsv" : lambda: FlatFileIterator(con, EchoParser()),
366 "csv" : lambda: FlatFileIterator(con, EchoParser()),
367 "list" : lambda: JSONIterator(con, ListValueParser()),
368 "dict" : lambda: JSONIterator(con, DictValueParser(view)),
369 "jsonobjects" : lambda: JSONIterator(con, EchoParser()),
370 "jsonrows" : lambda: JSONIterator(con, EchoParser())
371 }.get(rowformat)()
372
375
377 """
378 Returns the next row, in the appropriate format
379
380 @rtype: whatever the rowformat was determined to be
381 """
382 return self.reader.next()
383
385 """
386 An iterator for handling results returned as a flat file (TSV/CSV).
387 ===================================================================
388
389 This iterator can be used as the sub iterator in a ResultIterator
390 """
391
392 - def __init__(self, connection, parser):
393 """
394 Constructor
395 ===========
396
397 @param connection: The source of data
398 @type connection: socket.socket
399 @param parser: a handler for each row of data
400 @type parser: Parser
401 """
402 self.connection = connection
403 self.parser = parser
404
407
409 """Return a parsed line of data"""
410 line = self.connection.next().strip()
411 if line.startswith("[ERROR]"):
412 raise WebserviceError(line)
413 return self.parser.parse(line)
414
416 """
417 An iterator for handling results returned in the JSONRows format
418 ================================================================
419
420 This iterator can be used as the sub iterator in a ResultIterator
421 """
422
423 - def __init__(self, connection, parser):
424 """
425 Constructor
426 ===========
427
428 @param connection: The source of data
429 @type connection: socket.socket
430 @param parser: a handler for each row of data
431 @type parser: Parser
432 """
433 self.connection = connection
434 self.parser = parser
435 self.header = ""
436 self.footer = ""
437 self.parse_header()
438
441
445
447 """Reads out the header information from the connection"""
448 try:
449 line = self.connection.next().strip()
450 self.header += line
451 if not line.endswith('"results":['):
452 self.parse_header()
453 except StopIteration:
454 raise WebserviceError("The connection returned a bad header" + self.header)
455
457 """
458 Perform status checks
459 =====================
460
461 The footer containts information as to whether the result
462 set was successfully transferred in its entirety. This
463 method makes sure we don't silently accept an
464 incomplete result set.
465
466 @raise WebserviceError: if the footer indicates there was an error
467 """
468 container = self.header + self.footer
469 info = None
470 try:
471 info = json.loads(container)
472 except:
473 raise WebserviceError("Error parsing JSON container: " + container)
474
475 if not info["wasSuccessful"]:
476 raise WebserviceError(info["statusCode"], info["error"])
477
479 """
480 Reads the connection to get the next row, and sends it to the parser
481
482 @raise WebserviceError: if the connection is interrupted
483 """
484 next_row = None
485 try:
486 line = self.connection.next()
487 if line.startswith("]"):
488 self.footer += line;
489 for otherline in self.connection:
490 self.footer += line
491 self.check_return_status()
492 else:
493 line = line.strip().strip(',')
494 row = json.loads(line)
495 next_row = self.parser.parse(row)
496 except StopIteration:
497 raise WebserviceError("Connection interrupted")
498
499 if next_row is None:
500 raise StopIteration
501 else:
502 return next_row
503
505 """
506 Base class for result line parsers
507 ==================================
508
509 Sub-class this class to gain a default constructor
510
511 """
512
514 """
515 Constructor
516 ===========
517
518 @param view: the list of output columns (default: [])
519 @type view: list
520 """
521 self.view = view
522
524 """
525 Abstract method - implementations must provide behaviour
526
527 @param data: a line of data
528 """
529 raise UnimplementedError
530
532 """
533 A result parser that echoes its input
534 =====================================
535
536 Use for parsing situations when you don't
537 actually want to change the data
538 """
539
541 """
542 Most basic parser - just returns the fed in data structure
543
544 @param data: the data from the result set
545 """
546 return data
547
549 """
550 A result parser that produces lists
551 ===================================
552
553 Parses jsonrow formatted rows into lists
554 of values.
555 """
556
558 """
559 Parse a row of JSON results into a list
560
561 @param row: a row of data from a result set
562 @type row: a JSON string
563
564 @rtype: list
565 """
566 return [cell.get("value") for cell in row]
567
569 """
570 A result parser that produces dictionaries
571 ==========================================
572
573 Parses jsonrow formatted rows into dictionaries
574 where the key is the view string for the cell,
575 and the value is the contents of the returned cell.
576 """
577
579 """
580 Parse a row of JSON results into a dictionary
581
582 @param row: a row of data from a result set
583 @type row: a JSON string
584
585 @rtype: dict
586 """
587 pairs = zip(self.view, row)
588 return_dict = {}
589 for view, cell in pairs:
590 return_dict[view] = cell.get("value")
591 return return_dict
592
594 """
595 Specific implementation of urllib.FancyURLOpener for this client
596 ================================================================
597
598 Provides user agent and authentication headers, and handling of errors
599 """
600 version = "InterMine-Python-Client-0.96.00"
601
603 """
604 Constructor
605 ===========
606
607 InterMineURLOpener((username, password)) S{->} InterMineURLOpener
608
609 Return a new url-opener with the appropriate credentials
610 """
611 urllib.FancyURLopener.__init__(self)
612 if credentials and len(credentials) == 2:
613 base64string = base64.encodestring('%s:%s' % credentials)[:-1]
614 self.addheader("Authorization", base64string)
615 self.using_authentication = True
616 else:
617 self.using_authentication = False
618
620 """Re-implementation of http_error_default, with content now supplied by default"""
621 content = fp.read()
622 fp.close()
623 raise WebserviceError(errcode, errmsg, content)
624
625 - def http_error_400(self, url, fp, errcode, errmsg, headers, data=None):
626 """
627 Handle 400 HTTP errors, attempting to return informative error messages
628 =======================================================================
629
630 400 errors indicate that something about our request was incorrect
631
632 @raise WebserviceError: in all circumstances
633
634 """
635 content = fp.read()
636 fp.close()
637 raise WebserviceError("There was a problem with our request", errcode, errmsg, content)
638
639 - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
640 """
641 Handle 401 HTTP errors, attempting to return informative error messages
642 =======================================================================
643
644 401 errors indicate we don't have sufficient permission for the resource
645 we requested - usually a list or a tempate
646
647 @raise WebserviceError: in all circumstances
648
649 """
650 content = fp.read()
651 fp.close()
652 if self.using_authentication:
653 raise WebserviceError("Insufficient permissions", errcode, errmsg, content)
654 else:
655 raise WebserviceError("No permissions - not logged in", errcode, errmsg, content)
656
657 - def http_error_404(self, url, fp, errcode, errmsg, headers, data=None):
658 """
659 Handle 404 HTTP errors, attempting to return informative error messages
660 =======================================================================
661
662 404 errors indicate that the requested resource does not exist - usually
663 a template that is not longer available.
664
665 @raise WebserviceError: in all circumstances
666
667 """
668 content = fp.read()
669 fp.close()
670 raise WebserviceError("Missing resource", errcode, errmsg, content)
671 - def http_error_500(self, url, fp, errcode, errmsg, headers, data=None):
672 """
673 Handle 500 HTTP errors, attempting to return informative error messages
674 =======================================================================
675
676 500 errors indicate that the server borked during the request - ie: it wasn't
677 our fault.
678
679 @raise WebserviceError: in all circumstances
680
681 """
682 content = fp.read()
683 fp.close()
684 raise WebserviceError("Internal server error", errcode, errmsg, content)
685
688
690 """Errors in the creation and use of the Service object"""
691 pass
693 """Errors from interaction with the webservice"""
694 pass
695