Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/redcap/project.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4__author__ = 'Scott Burns <scott.s.burnsgmail.com>'
5__license__ = 'MIT'
6__copyright__ = '2014, Vanderbilt University'
8import json
9import warnings
11from .request import RCRequest, RedcapError, RequestException
12import semantic_version
14try:
15 from StringIO import StringIO
16except ImportError:
17 from io import StringIO
19class Project(object):
20 """Main class for interacting with REDCap projects"""
22 def __init__(self, url, token, name='', verify_ssl=True, lazy=False):
23 """
24 Parameters
25 ----------
26 url : str
27 API URL to your REDCap server
28 token : str
29 API token to your project
30 name : str, optional
31 name for project
32 verify_ssl : boolean, str
33 Verify SSL, default True. Can pass path to CA_BUNDLE.
34 """
36 self.token = token
37 self.name = name
38 self.url = url
39 self.verify = verify_ssl
40 self.metadata = None
41 self.redcap_version = None
42 self.field_names = None
43 # We'll use the first field as the default id for each row
44 self.def_field = None
45 self.field_labels = None
46 self.forms = None
47 self.events = None
48 self.arm_nums = None
49 self.arm_names = None
50 self.configured = False
52 if not lazy:
53 self.configure()
55 def configure(self):
56 try:
57 self.metadata = self.__md()
58 except RequestException:
59 raise RedcapError("Exporting metadata failed. Check your URL and token.")
60 try:
61 self.redcap_version = self.__rcv()
62 except:
63 raise RedcapError("Determination of REDCap version failed")
64 self.field_names = self.filter_metadata('field_name')
65 # we'll use the first field as the default id for each row
66 self.def_field = self.field_names[0]
67 self.field_labels = self.filter_metadata('field_label')
68 self.forms = tuple(set(c['form_name'] for c in self.metadata))
69 # determine whether longitudinal
70 ev_data = self._call_api(self.__basepl('event'), 'exp_event')[0]
71 arm_data = self._call_api(self.__basepl('arm'), 'exp_arm')[0]
73 if isinstance(ev_data, dict) and ('error' in ev_data.keys()):
74 events = tuple([])
75 else:
76 events = ev_data
78 if isinstance(arm_data, dict) and ('error' in arm_data.keys()):
79 arm_nums = tuple([])
80 arm_names = tuple([])
81 else:
82 arm_nums = tuple([a['arm_num'] for a in arm_data])
83 arm_names = tuple([a['name'] for a in arm_data])
84 self.events = events
85 self.arm_nums = arm_nums
86 self.arm_names = arm_names
87 self.configured = True
89 def __md(self):
90 """Return the project's metadata structure"""
91 p_l = self.__basepl('metadata')
92 p_l['content'] = 'metadata'
93 return self._call_api(p_l, 'metadata')[0]
95 def __basepl(self, content, rec_type='flat', format='json'):
96 """Return a dictionary which can be used as is or added to for
97 payloads"""
98 d = {'token': self.token, 'content': content, 'format': format}
99 if content not in ['metadata', 'file']:
100 d['type'] = rec_type
101 return d
103 def __rcv(self):
104 p_l = self.__basepl('version')
105 rcv = self._call_api(p_l, 'version')[0].decode('utf-8')
106 if 'error' in rcv:
107 warnings.warn('Version information not available for this REDCap instance')
108 return ''
109 if semantic_version.validate(rcv):
110 return semantic_version.Version(rcv)
111 else:
112 return rcv
114 def is_longitudinal(self):
115 """
116 Returns
117 -------
118 boolean :
119 longitudinal status of this project
120 """
121 return len(self.events) > 0 and \
122 len(self.arm_nums) > 0 and \
123 len(self.arm_names) > 0
125 def filter_metadata(self, key):
126 """
127 Return a list of values for the metadata key from each field
128 of the project's metadata.
130 Parameters
131 ----------
132 key: str
133 A known key in the metadata structure
135 Returns
136 -------
137 filtered :
138 attribute list from each field
139 """
140 filtered = [field[key] for field in self.metadata if key in field]
141 if len(filtered) == 0:
142 raise KeyError("Key not found in metadata")
143 return filtered
145 def _kwargs(self):
146 """Private method to build a dict for sending to RCRequest
148 Other default kwargs to the http library should go here"""
149 return {'verify': self.verify}
151 def _call_api(self, payload, typpe, **kwargs):
152 request_kwargs = self._kwargs()
153 request_kwargs.update(kwargs)
154 rcr = RCRequest(self.url, payload, typpe)
155 return rcr.execute(**request_kwargs)
157 def export_fem(self, arms=None, format='json', df_kwargs=None):
158 """
159 Export the project's form to event mapping
161 Parameters
162 ----------
163 arms : list
164 Limit exported form event mappings to these arm numbers
165 format : (``'json'``), ``'csv'``, ``'xml'``
166 Return the form event mappings in native objects,
167 csv or xml, ``'df''`` will return a ``pandas.DataFrame``
168 df_kwargs : dict
169 Passed to pandas.read_csv to control construction of
170 returned DataFrame
172 Returns
173 -------
174 fem : list, str, ``pandas.DataFrame``
175 form-event mapping for the project
176 """
177 ret_format = format
178 if format == 'df':
179 ret_format = 'csv'
180 pl = self.__basepl('formEventMapping', format=ret_format)
182 if arms:
183 for i, value in enumerate(arms):
184 pl["arms[{}]".format(i)] = value
186 response, _ = self._call_api(pl, 'exp_fem')
187 if format in ('json', 'csv', 'xml'):
188 return response
189 elif format == 'df':
190 if not df_kwargs:
191 df_kwargs = {}
193 return self.read_csv(StringIO(response), **df_kwargs)
195 def export_metadata(self, fields=None, forms=None, format='json',
196 df_kwargs=None):
197 """
198 Export the project's metadata
200 Parameters
201 ----------
202 fields : list
203 Limit exported metadata to these fields
204 forms : list
205 Limit exported metadata to these forms
206 format : (``'json'``), ``'csv'``, ``'xml'``, ``'df'``
207 Return the metadata in native objects, csv or xml.
208 ``'df'`` will return a ``pandas.DataFrame``.
209 df_kwargs : dict
210 Passed to ``pandas.read_csv`` to control construction of
211 returned DataFrame.
212 by default ``{'index_col': 'field_name'}``
214 Returns
215 -------
216 metadata : list, str, ``pandas.DataFrame``
217 metadata sttructure for the project.
218 """
219 ret_format = format
220 if format == 'df':
221 ret_format = 'csv'
222 pl = self.__basepl('metadata', format=ret_format)
223 to_add = [fields, forms]
224 str_add = ['fields', 'forms']
225 for key, data in zip(str_add, to_add):
226 if data:
227 for i, value in enumerate(data):
228 pl["{}[{}]".format(key, i)] = value
230 response, _ = self._call_api(pl, 'metadata')
231 if format in ('json', 'csv', 'xml'):
232 return response
233 elif format == 'df':
234 if not df_kwargs:
235 df_kwargs = {'index_col': 'field_name'}
236 return self.read_csv(StringIO(response), **df_kwargs)
238 def delete_records(self, records):
239 """
240 Delete records from the Project.
242 Parameters
243 ----------
244 records : list
245 List of record IDs that you want to delete from the project
247 Returns
248 -------
249 response : int
250 Number of records deleted
251 """
252 pl = dict()
253 pl['action'] = 'delete'
254 pl['content'] = 'record'
255 pl['token'] = self.token
256 # Turn list of records into dict, and append to payload
257 records_dict = {
258 "records[{}]".format(idx): record for idx, record in enumerate(records)
259 }
260 pl.update(records_dict)
262 pl['format'] = format
263 response, _ = self._call_api(pl, 'del_record')
264 return response
266 def export_records(self, records=None, fields=None, forms=None,
267 events=None, raw_or_label='raw', event_name='label',
268 format='json', export_survey_fields=False,
269 export_data_access_groups=False, df_kwargs=None,
270 export_checkbox_labels=False, filter_logic=None):
271 """
272 Export data from the REDCap project.
274 Parameters
275 ----------
276 records : list
277 array of record names specifying specific records to export.
278 by default, all records are exported
279 fields : list
280 array of field names specifying specific fields to pull
281 by default, all fields are exported
282 forms : list
283 array of form names to export. If in the web UI, the form
284 name has a space in it, replace the space with an underscore
285 by default, all forms are exported
286 events : list
287 an array of unique event names from which to export records
289 :note: this only applies to longitudinal projects
290 raw_or_label : (``'raw'``), ``'label'``, ``'both'``
291 export the raw coded values or labels for the options of
292 multiple choice fields, or both
293 event_name : (``'label'``), ``'unique'``
294 export the unique event name or the event label
295 format : (``'json'``), ``'csv'``, ``'xml'``, ``'df'``
296 Format of returned data. ``'json'`` returns json-decoded
297 objects while ``'csv'`` and ``'xml'`` return other formats.
298 ``'df'`` will attempt to return a ``pandas.DataFrame``.
299 export_survey_fields : (``False``), True
300 specifies whether or not to export the survey identifier
301 field (e.g., "redcap_survey_identifier") or survey timestamp
302 fields (e.g., form_name+"_timestamp") when surveys are
303 utilized in the project.
304 export_data_access_groups : (``False``), ``True``
305 specifies whether or not to export the
306 ``"redcap_data_access_group"`` field when data access groups
307 are utilized in the project.
309 :note: This flag is only viable if the user whose token is
310 being used to make the API request is *not* in a data
311 access group. If the user is in a group, then this flag
312 will revert to its default value.
313 df_kwargs : dict
314 Passed to ``pandas.read_csv`` to control construction of
315 returned DataFrame.
316 by default, ``{'index_col': self.def_field}``
317 export_checkbox_labels : (``False``), ``True``
318 specify whether to export checkbox values as their label on
319 export.
320 filter_logic : string
321 specify the filterLogic to be sent to the API.
323 Returns
324 -------
325 data : list, str, ``pandas.DataFrame``
326 exported data
327 """
328 ret_format = format
329 if format == 'df':
330 ret_format = 'csv'
331 pl = self.__basepl('record', format=ret_format)
332 fields = self.backfill_fields(fields, forms)
333 keys_to_add = (records, fields, forms, events,
334 raw_or_label, event_name, export_survey_fields,
335 export_data_access_groups, export_checkbox_labels)
336 str_keys = ('records', 'fields', 'forms', 'events', 'rawOrLabel',
337 'eventName', 'exportSurveyFields', 'exportDataAccessGroups',
338 'exportCheckboxLabel')
339 for key, data in zip(str_keys, keys_to_add):
340 if data:
341 if key in ('fields', 'records', 'forms', 'events'):
342 for i, value in enumerate(data):
343 pl["{}[{}]".format(key, i)] = value
344 else:
345 pl[key] = data
347 if filter_logic:
348 pl["filterLogic"] = filter_logic
349 response, _ = self._call_api(pl, 'exp_record')
350 if format in ('json', 'csv', 'xml'):
351 return response
352 elif format == 'df':
353 if not df_kwargs:
354 if self.is_longitudinal():
355 df_kwargs = {'index_col': [self.def_field,
356 'redcap_event_name']}
357 else:
358 df_kwargs = {'index_col': self.def_field}
359 buf = StringIO(response)
360 df = self.read_csv(buf, **df_kwargs)
361 buf.close()
362 return df
364 def read_csv(self, buf, **df_kwargs):
365 """Wrapper around pandas read_csv that handles EmptyDataError"""
366 from pandas import DataFrame, read_csv
367 from pandas.errors import EmptyDataError
369 try:
370 df = read_csv(buf, **df_kwargs)
371 except EmptyDataError:
372 df = DataFrame()
374 return df
376 def metadata_type(self, field_name):
377 """If the given field_name is validated by REDCap, return it's type"""
378 return self.__meta_metadata(field_name,
379 'text_validation_type_or_show_slider_number')
381 def __meta_metadata(self, field, key):
382 """Return the value for key for the field in the metadata"""
383 mf = ''
384 try:
385 mf = str([f[key] for f in self.metadata
386 if f['field_name'] == field][0])
387 except IndexError:
388 print("%s not in metadata field:%s" % (key, field))
389 return mf
390 else:
391 return mf
393 def backfill_fields(self, fields, forms):
394 """
395 Properly backfill fields to explicitly request specific
396 keys. The issue is that >6.X servers *only* return requested fields
397 so to improve backwards compatiblity for PyCap clients, add specific fields
398 when required.
400 Parameters
401 ----------
402 fields: list
403 requested fields
404 forms: list
405 requested forms
407 Returns
408 -------
409 new fields, forms
410 """
411 if forms and not fields:
412 new_fields = [self.def_field]
413 elif fields and self.def_field not in fields:
414 new_fields = list(fields)
415 if self.def_field not in fields:
416 new_fields.append(self.def_field)
417 elif not fields:
418 new_fields = self.field_names
419 else:
420 new_fields = list(fields)
421 return new_fields
423 def names_labels(self, do_print=False):
424 """Simple helper function to get all field names and labels """
425 if do_print:
426 for name, label in zip(self.field_names, self.field_labels):
427 print('%s --> %s' % (str(name), str(label)))
428 return self.field_names, self.field_labels
430 def import_records(self, to_import, overwrite='normal', format='json',
431 return_format='json', return_content='count',
432 date_format='YMD', force_auto_number=False):
433 """
434 Import data into the RedCap Project
436 Parameters
437 ----------
438 to_import : array of dicts, csv/xml string, ``pandas.DataFrame``
439 :note:
440 If you pass a csv or xml string, you should use the
441 ``format`` parameter appropriately.
442 :note:
443 Keys of the dictionaries should be subset of project's,
444 fields, but this isn't a requirement. If you provide keys
445 that aren't defined fields, the returned response will
446 contain an ``'error'`` key.
447 overwrite : ('normal'), 'overwrite'
448 ``'overwrite'`` will erase values previously stored in the
449 database if not specified in the to_import dictionaries.
450 format : ('json'), 'xml', 'csv'
451 Format of incoming data. By default, to_import will be json-encoded
452 return_format : ('json'), 'csv', 'xml'
453 Response format. By default, response will be json-decoded.
454 return_content : ('count'), 'ids', 'nothing'
455 By default, the response contains a 'count' key with the number of
456 records just imported. By specifying 'ids', a list of ids
457 imported will be returned. 'nothing' will only return
458 the HTTP status code and no message.
459 date_format : ('YMD'), 'DMY', 'MDY'
460 Describes the formatting of dates. By default, date strings
461 are formatted as 'YYYY-MM-DD' corresponding to 'YMD'. If date
462 strings are formatted as 'MM/DD/YYYY' set this parameter as
463 'MDY' and if formatted as 'DD/MM/YYYY' set as 'DMY'. No
464 other formattings are allowed.
465 force_auto_number : ('False') Enables automatic assignment of record IDs
466 of imported records by REDCap. If this is set to true, and auto-numbering
467 for records is enabled for the project, auto-numbering of imported records
468 will be enabled.
470 Returns
471 -------
472 response : dict, str
473 response from REDCap API, json-decoded if ``return_format`` == ``'json'``
474 """
475 pl = self.__basepl('record')
476 if hasattr(to_import, 'to_csv'):
477 # We'll assume it's a df
478 buf = StringIO()
479 if self.is_longitudinal():
480 csv_kwargs = {'index_label': [self.def_field,
481 'redcap_event_name']}
482 else:
483 csv_kwargs = {'index_label': self.def_field}
484 to_import.to_csv(buf, **csv_kwargs)
485 pl['data'] = buf.getvalue()
486 buf.close()
487 format = 'csv'
488 elif format == 'json':
489 pl['data'] = json.dumps(to_import, separators=(',', ':'))
490 else:
491 # don't do anything to csv/xml
492 pl['data'] = to_import
493 pl['overwriteBehavior'] = overwrite
494 pl['format'] = format
495 pl['returnFormat'] = return_format
496 pl['returnContent'] = return_content
497 pl['dateFormat'] = date_format
498 pl['forceAutoNumber'] = force_auto_number
499 response = self._call_api(pl, 'imp_record')[0]
500 if 'error' in response:
501 raise RedcapError(str(response))
502 return response
504 def export_file(self, record, field, event=None, return_format='json'):
505 """
506 Export the contents of a file stored for a particular record
508 Notes
509 -----
510 Unlike other export methods, this works on a single record.
512 Parameters
513 ----------
514 record : str
515 record ID
516 field : str
517 field name containing the file to be exported.
518 event: str
519 for longitudinal projects, specify the unique event here
520 return_format: ('json'), 'csv', 'xml'
521 format of error message
523 Returns
524 -------
525 content : bytes
526 content of the file
527 content_map : dict
528 content-type dictionary
529 """
530 self._check_file_field(field)
531 # load up payload
532 pl = self.__basepl(content='file', format=return_format)
533 # there's no format field in this call
534 del pl['format']
535 pl['returnFormat'] = return_format
536 pl['action'] = 'export'
537 pl['field'] = field
538 pl['record'] = record
539 if event:
540 pl['event'] = event
541 content, headers = self._call_api(pl, 'exp_file')
542 #REDCap adds some useful things in content-type
543 if 'content-type' in headers:
544 splat = [kv.strip() for kv in headers['content-type'].split(';')]
545 kv = [(kv.split('=')[0], kv.split('=')[1].replace('"', '')) for kv
546 in splat if '=' in kv]
547 content_map = dict(kv)
548 else:
549 content_map = {}
550 return content, content_map
552 def import_file(self, record, field, fname, fobj, event=None,
553 repeat_instance=None, return_format='json'):
554 """
555 Import the contents of a file represented by fobj to a
556 particular records field
558 Parameters
559 ----------
560 record : str
561 record ID
562 field : str
563 field name where the file will go
564 fname : str
565 file name visible in REDCap UI
566 fobj : file object
567 file object as returned by `open`
568 event : str
569 for longitudinal projects, specify the unique event here
570 repeat_instance : int
571 (only for projects with repeating instruments/events)
572 The repeat instance number of the repeating event (if longitudinal)
573 or the repeating instrument (if classic or longitudinal).
574 return_format : ('json'), 'csv', 'xml'
575 format of error message
577 Returns
578 -------
579 response :
580 response from server as specified by ``return_format``
581 """
582 self._check_file_field(field)
583 # load up payload
584 pl = self.__basepl(content='file', format=return_format)
585 # no format in this call
586 del pl['format']
587 pl['returnFormat'] = return_format
588 pl['action'] = 'import'
589 pl['field'] = field
590 pl['record'] = record
591 if event:
592 pl['event'] = event
593 if repeat_instance:
594 pl['repeat_instance'] = repeat_instance
595 file_kwargs = {'files': {'file': (fname, fobj)}}
596 return self._call_api(pl, 'imp_file', **file_kwargs)[0]
598 def delete_file(self, record, field, return_format='json', event=None):
599 """
600 Delete a file from REDCap
602 Notes
603 -----
604 There is no undo button to this.
606 Parameters
607 ----------
608 record : str
609 record ID
610 field : str
611 field name
612 return_format : (``'json'``), ``'csv'``, ``'xml'``
613 return format for error message
614 event : str
615 If longitudinal project, event to delete file from
617 Returns
618 -------
619 response : dict, str
620 response from REDCap after deleting file
621 """
622 self._check_file_field(field)
623 # Load up payload
624 pl = self.__basepl(content='file', format=return_format)
625 del pl['format']
626 pl['returnFormat'] = return_format
627 pl['action'] = 'delete'
628 pl['record'] = record
629 pl['field'] = field
630 if event:
631 pl['event'] = event
632 return self._call_api(pl, 'del_file')[0]
634 def _check_file_field(self, field):
635 """Check that field exists and is a file field"""
636 is_field = field in self.field_names
637 is_file = self.__meta_metadata(field, 'field_type') == 'file'
638 if not (is_field and is_file):
639 msg = "'%s' is not a field or not a 'file' field" % field
640 raise ValueError(msg)
641 else:
642 return True
644 def export_users(self, format='json'):
645 """
646 Export the users of the Project
648 Notes
649 -----
650 Each user will have the following keys:
652 * ``'firstname'`` : User's first name
653 * ``'lastname'`` : User's last name
654 * ``'email'`` : Email address
655 * ``'username'`` : User's username
656 * ``'expiration'`` : Project access expiration date
657 * ``'data_access_group'`` : data access group ID
658 * ``'data_export'`` : (0=no access, 2=De-Identified, 1=Full Data Set)
659 * ``'forms'`` : a list of dicts with a single key as the form name and
660 value is an integer describing that user's form rights,
661 where: 0=no access, 1=view records/responses and edit
662 records (survey responses are read-only), 2=read only, and
663 3=edit survey responses,
666 Parameters
667 ----------
668 format : (``'json'``), ``'csv'``, ``'xml'``
669 response return format
671 Returns
672 -------
673 users: list, str
674 list of users dicts when ``'format'='json'``,
675 otherwise a string
676 """
677 pl = self.__basepl(content='user', format=format)
678 return self._call_api(pl, 'exp_user')[0]
680 def export_survey_participant_list(self, instrument, event=None, format='json'):
681 """
682 Export the Survey Participant List
684 Notes
685 -----
686 The passed instrument must be set up as a survey instrument.
688 Parameters
689 ----------
690 instrument: str
691 Name of instrument as seen in second column of Data Dictionary.
692 event: str
693 Unique event name, only used in longitudinal projects
694 format: (json, xml, csv), json by default
695 Format of returned data
696 """
697 pl = self.__basepl(content='participantList', format=format)
698 pl['instrument'] = instrument
699 if event:
700 pl['event'] = event
701 return self._call_api(pl, 'exp_survey_participant_list')
703 def generate_next_record_name(self):
704 pl = self.__basepl(content='generateNextRecordName')
706 return self._call_api(pl, 'exp_next_id')[0]
708 def export_project_info(self, format='json'):
709 """
710 Export Project Information
712 Parameters
713 ----------
714 format: (json, xml, csv), json by default
715 Format of returned data
716 """
718 pl = self.__basepl(content='project', format=format)
720 return self._call_api(pl, 'exp_proj')[0]