Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4__author__ = 'Scott Burns <scott.s.burnsgmail.com>' 

5__license__ = 'MIT' 

6__copyright__ = '2014, Vanderbilt University' 

7 

8import json 

9import warnings 

10 

11from .request import RCRequest, RedcapError, RequestException 

12import semantic_version 

13 

14try: 

15 from StringIO import StringIO 

16except ImportError: 

17 from io import StringIO 

18 

19class Project(object): 

20 """Main class for interacting with REDCap projects""" 

21 

22 def __init__(self, url, token, name='', verify_ssl=True, lazy=False): 

23 """ 

24 Parameters 

25 ---------- 

26 url : str 

27 API URL to your REDCap server 

28 token : str 

29 API token to your project 

30 name : str, optional 

31 name for project 

32 verify_ssl : boolean, str 

33 Verify SSL, default True. Can pass path to CA_BUNDLE. 

34 """ 

35 

36 self.token = token 

37 self.name = name 

38 self.url = url 

39 self.verify = verify_ssl 

40 self.metadata = None 

41 self.redcap_version = None 

42 self.field_names = None 

43 # We'll use the first field as the default id for each row 

44 self.def_field = None 

45 self.field_labels = None 

46 self.forms = None 

47 self.events = None 

48 self.arm_nums = None 

49 self.arm_names = None 

50 self.configured = False 

51 

52 if not lazy: 

53 self.configure() 

54 

55 def configure(self): 

56 try: 

57 self.metadata = self.__md() 

58 except RequestException: 

59 raise RedcapError("Exporting metadata failed. Check your URL and token.") 

60 try: 

61 self.redcap_version = self.__rcv() 

62 except: 

63 raise RedcapError("Determination of REDCap version failed") 

64 self.field_names = self.filter_metadata('field_name') 

65 # we'll use the first field as the default id for each row 

66 self.def_field = self.field_names[0] 

67 self.field_labels = self.filter_metadata('field_label') 

68 self.forms = tuple(set(c['form_name'] for c in self.metadata)) 

69 # determine whether longitudinal 

70 ev_data = self._call_api(self.__basepl('event'), 'exp_event')[0] 

71 arm_data = self._call_api(self.__basepl('arm'), 'exp_arm')[0] 

72 

73 if isinstance(ev_data, dict) and ('error' in ev_data.keys()): 

74 events = tuple([]) 

75 else: 

76 events = ev_data 

77 

78 if isinstance(arm_data, dict) and ('error' in arm_data.keys()): 

79 arm_nums = tuple([]) 

80 arm_names = tuple([]) 

81 else: 

82 arm_nums = tuple([a['arm_num'] for a in arm_data]) 

83 arm_names = tuple([a['name'] for a in arm_data]) 

84 self.events = events 

85 self.arm_nums = arm_nums 

86 self.arm_names = arm_names 

87 self.configured = True 

88 

89 def __md(self): 

90 """Return the project's metadata structure""" 

91 p_l = self.__basepl('metadata') 

92 p_l['content'] = 'metadata' 

93 return self._call_api(p_l, 'metadata')[0] 

94 

95 def __basepl(self, content, rec_type='flat', format='json'): 

96 """Return a dictionary which can be used as is or added to for 

97 payloads""" 

98 d = {'token': self.token, 'content': content, 'format': format} 

99 if content not in ['metadata', 'file']: 

100 d['type'] = rec_type 

101 return d 

102 

103 def __rcv(self): 

104 p_l = self.__basepl('version') 

105 rcv = self._call_api(p_l, 'version')[0].decode('utf-8') 

106 if 'error' in rcv: 

107 warnings.warn('Version information not available for this REDCap instance') 

108 return '' 

109 if semantic_version.validate(rcv): 

110 return semantic_version.Version(rcv) 

111 else: 

112 return rcv 

113 

114 def is_longitudinal(self): 

115 """ 

116 Returns 

117 ------- 

118 boolean : 

119 longitudinal status of this project 

120 """ 

121 return len(self.events) > 0 and \ 

122 len(self.arm_nums) > 0 and \ 

123 len(self.arm_names) > 0 

124 

125 def filter_metadata(self, key): 

126 """ 

127 Return a list of values for the metadata key from each field 

128 of the project's metadata. 

129 

130 Parameters 

131 ---------- 

132 key: str 

133 A known key in the metadata structure 

134 

135 Returns 

136 ------- 

137 filtered : 

138 attribute list from each field 

139 """ 

140 filtered = [field[key] for field in self.metadata if key in field] 

141 if len(filtered) == 0: 

142 raise KeyError("Key not found in metadata") 

143 return filtered 

144 

145 def _kwargs(self): 

146 """Private method to build a dict for sending to RCRequest 

147 

148 Other default kwargs to the http library should go here""" 

149 return {'verify': self.verify} 

150 

151 def _call_api(self, payload, typpe, **kwargs): 

152 request_kwargs = self._kwargs() 

153 request_kwargs.update(kwargs) 

154 rcr = RCRequest(self.url, payload, typpe) 

155 return rcr.execute(**request_kwargs) 

156 

157 def export_fem(self, arms=None, format='json', df_kwargs=None): 

158 """ 

159 Export the project's form to event mapping 

160 

161 Parameters 

162 ---------- 

163 arms : list 

164 Limit exported form event mappings to these arm numbers 

165 format : (``'json'``), ``'csv'``, ``'xml'`` 

166 Return the form event mappings in native objects, 

167 csv or xml, ``'df''`` will return a ``pandas.DataFrame`` 

168 df_kwargs : dict 

169 Passed to pandas.read_csv to control construction of 

170 returned DataFrame 

171 

172 Returns 

173 ------- 

174 fem : list, str, ``pandas.DataFrame`` 

175 form-event mapping for the project 

176 """ 

177 ret_format = format 

178 if format == 'df': 

179 ret_format = 'csv' 

180 pl = self.__basepl('formEventMapping', format=ret_format) 

181 

182 if arms: 

183 for i, value in enumerate(arms): 

184 pl["arms[{}]".format(i)] = value 

185 

186 response, _ = self._call_api(pl, 'exp_fem') 

187 if format in ('json', 'csv', 'xml'): 

188 return response 

189 elif format == 'df': 

190 if not df_kwargs: 

191 df_kwargs = {} 

192 

193 return self.read_csv(StringIO(response), **df_kwargs) 

194 

195 def export_metadata(self, fields=None, forms=None, format='json', 

196 df_kwargs=None): 

197 """ 

198 Export the project's metadata 

199 

200 Parameters 

201 ---------- 

202 fields : list 

203 Limit exported metadata to these fields 

204 forms : list 

205 Limit exported metadata to these forms 

206 format : (``'json'``), ``'csv'``, ``'xml'``, ``'df'`` 

207 Return the metadata in native objects, csv or xml. 

208 ``'df'`` will return a ``pandas.DataFrame``. 

209 df_kwargs : dict 

210 Passed to ``pandas.read_csv`` to control construction of 

211 returned DataFrame. 

212 by default ``{'index_col': 'field_name'}`` 

213 

214 Returns 

215 ------- 

216 metadata : list, str, ``pandas.DataFrame`` 

217 metadata sttructure for the project. 

218 """ 

219 ret_format = format 

220 if format == 'df': 

221 ret_format = 'csv' 

222 pl = self.__basepl('metadata', format=ret_format) 

223 to_add = [fields, forms] 

224 str_add = ['fields', 'forms'] 

225 for key, data in zip(str_add, to_add): 

226 if data: 

227 for i, value in enumerate(data): 

228 pl["{}[{}]".format(key, i)] = value 

229 

230 response, _ = self._call_api(pl, 'metadata') 

231 if format in ('json', 'csv', 'xml'): 

232 return response 

233 elif format == 'df': 

234 if not df_kwargs: 

235 df_kwargs = {'index_col': 'field_name'} 

236 return self.read_csv(StringIO(response), **df_kwargs) 

237 

238 def delete_records(self, records): 

239 """ 

240 Delete records from the Project. 

241 

242 Parameters 

243 ---------- 

244 records : list 

245 List of record IDs that you want to delete from the project 

246 

247 Returns 

248 ------- 

249 response : int 

250 Number of records deleted 

251 """ 

252 pl = dict() 

253 pl['action'] = 'delete' 

254 pl['content'] = 'record' 

255 pl['token'] = self.token 

256 # Turn list of records into dict, and append to payload 

257 records_dict = { 

258 "records[{}]".format(idx): record for idx, record in enumerate(records) 

259 } 

260 pl.update(records_dict) 

261 

262 pl['format'] = format 

263 response, _ = self._call_api(pl, 'del_record') 

264 return response 

265 

266 def export_records(self, records=None, fields=None, forms=None, 

267 events=None, raw_or_label='raw', event_name='label', 

268 format='json', export_survey_fields=False, 

269 export_data_access_groups=False, df_kwargs=None, 

270 export_checkbox_labels=False, filter_logic=None): 

271 """ 

272 Export data from the REDCap project. 

273 

274 Parameters 

275 ---------- 

276 records : list 

277 array of record names specifying specific records to export. 

278 by default, all records are exported 

279 fields : list 

280 array of field names specifying specific fields to pull 

281 by default, all fields are exported 

282 forms : list 

283 array of form names to export. If in the web UI, the form 

284 name has a space in it, replace the space with an underscore 

285 by default, all forms are exported 

286 events : list 

287 an array of unique event names from which to export records 

288 

289 :note: this only applies to longitudinal projects 

290 raw_or_label : (``'raw'``), ``'label'``, ``'both'`` 

291 export the raw coded values or labels for the options of 

292 multiple choice fields, or both 

293 event_name : (``'label'``), ``'unique'`` 

294 export the unique event name or the event label 

295 format : (``'json'``), ``'csv'``, ``'xml'``, ``'df'`` 

296 Format of returned data. ``'json'`` returns json-decoded 

297 objects while ``'csv'`` and ``'xml'`` return other formats. 

298 ``'df'`` will attempt to return a ``pandas.DataFrame``. 

299 export_survey_fields : (``False``), True 

300 specifies whether or not to export the survey identifier 

301 field (e.g., "redcap_survey_identifier") or survey timestamp 

302 fields (e.g., form_name+"_timestamp") when surveys are 

303 utilized in the project. 

304 export_data_access_groups : (``False``), ``True`` 

305 specifies whether or not to export the 

306 ``"redcap_data_access_group"`` field when data access groups 

307 are utilized in the project. 

308 

309 :note: This flag is only viable if the user whose token is 

310 being used to make the API request is *not* in a data 

311 access group. If the user is in a group, then this flag 

312 will revert to its default value. 

313 df_kwargs : dict 

314 Passed to ``pandas.read_csv`` to control construction of 

315 returned DataFrame. 

316 by default, ``{'index_col': self.def_field}`` 

317 export_checkbox_labels : (``False``), ``True`` 

318 specify whether to export checkbox values as their label on 

319 export. 

320 filter_logic : string 

321 specify the filterLogic to be sent to the API. 

322 

323 Returns 

324 ------- 

325 data : list, str, ``pandas.DataFrame`` 

326 exported data 

327 """ 

328 ret_format = format 

329 if format == 'df': 

330 ret_format = 'csv' 

331 pl = self.__basepl('record', format=ret_format) 

332 fields = self.backfill_fields(fields, forms) 

333 keys_to_add = (records, fields, forms, events, 

334 raw_or_label, event_name, export_survey_fields, 

335 export_data_access_groups, export_checkbox_labels) 

336 str_keys = ('records', 'fields', 'forms', 'events', 'rawOrLabel', 

337 'eventName', 'exportSurveyFields', 'exportDataAccessGroups', 

338 'exportCheckboxLabel') 

339 for key, data in zip(str_keys, keys_to_add): 

340 if data: 

341 if key in ('fields', 'records', 'forms', 'events'): 

342 for i, value in enumerate(data): 

343 pl["{}[{}]".format(key, i)] = value 

344 else: 

345 pl[key] = data 

346 

347 if filter_logic: 

348 pl["filterLogic"] = filter_logic 

349 response, _ = self._call_api(pl, 'exp_record') 

350 if format in ('json', 'csv', 'xml'): 

351 return response 

352 elif format == 'df': 

353 if not df_kwargs: 

354 if self.is_longitudinal(): 

355 df_kwargs = {'index_col': [self.def_field, 

356 'redcap_event_name']} 

357 else: 

358 df_kwargs = {'index_col': self.def_field} 

359 buf = StringIO(response) 

360 df = self.read_csv(buf, **df_kwargs) 

361 buf.close() 

362 return df 

363 

364 def read_csv(self, buf, **df_kwargs): 

365 """Wrapper around pandas read_csv that handles EmptyDataError""" 

366 from pandas import DataFrame, read_csv 

367 from pandas.errors import EmptyDataError 

368 

369 try: 

370 df = read_csv(buf, **df_kwargs) 

371 except EmptyDataError: 

372 df = DataFrame() 

373 

374 return df 

375 

376 def metadata_type(self, field_name): 

377 """If the given field_name is validated by REDCap, return it's type""" 

378 return self.__meta_metadata(field_name, 

379 'text_validation_type_or_show_slider_number') 

380 

381 def __meta_metadata(self, field, key): 

382 """Return the value for key for the field in the metadata""" 

383 mf = '' 

384 try: 

385 mf = str([f[key] for f in self.metadata 

386 if f['field_name'] == field][0]) 

387 except IndexError: 

388 print("%s not in metadata field:%s" % (key, field)) 

389 return mf 

390 else: 

391 return mf 

392 

393 def backfill_fields(self, fields, forms): 

394 """ 

395 Properly backfill fields to explicitly request specific 

396 keys. The issue is that >6.X servers *only* return requested fields 

397 so to improve backwards compatiblity for PyCap clients, add specific fields 

398 when required. 

399 

400 Parameters 

401 ---------- 

402 fields: list 

403 requested fields 

404 forms: list 

405 requested forms 

406 

407 Returns 

408 ------- 

409 new fields, forms 

410 """ 

411 if forms and not fields: 

412 new_fields = [self.def_field] 

413 elif fields and self.def_field not in fields: 

414 new_fields = list(fields) 

415 if self.def_field not in fields: 

416 new_fields.append(self.def_field) 

417 elif not fields: 

418 new_fields = self.field_names 

419 else: 

420 new_fields = list(fields) 

421 return new_fields 

422 

423 def names_labels(self, do_print=False): 

424 """Simple helper function to get all field names and labels """ 

425 if do_print: 

426 for name, label in zip(self.field_names, self.field_labels): 

427 print('%s --> %s' % (str(name), str(label))) 

428 return self.field_names, self.field_labels 

429 

430 def import_records(self, to_import, overwrite='normal', format='json', 

431 return_format='json', return_content='count', 

432 date_format='YMD', force_auto_number=False): 

433 """ 

434 Import data into the RedCap Project 

435 

436 Parameters 

437 ---------- 

438 to_import : array of dicts, csv/xml string, ``pandas.DataFrame`` 

439 :note: 

440 If you pass a csv or xml string, you should use the 

441 ``format`` parameter appropriately. 

442 :note: 

443 Keys of the dictionaries should be subset of project's, 

444 fields, but this isn't a requirement. If you provide keys 

445 that aren't defined fields, the returned response will 

446 contain an ``'error'`` key. 

447 overwrite : ('normal'), 'overwrite' 

448 ``'overwrite'`` will erase values previously stored in the 

449 database if not specified in the to_import dictionaries. 

450 format : ('json'), 'xml', 'csv' 

451 Format of incoming data. By default, to_import will be json-encoded 

452 return_format : ('json'), 'csv', 'xml' 

453 Response format. By default, response will be json-decoded. 

454 return_content : ('count'), 'ids', 'nothing' 

455 By default, the response contains a 'count' key with the number of 

456 records just imported. By specifying 'ids', a list of ids 

457 imported will be returned. 'nothing' will only return 

458 the HTTP status code and no message. 

459 date_format : ('YMD'), 'DMY', 'MDY' 

460 Describes the formatting of dates. By default, date strings 

461 are formatted as 'YYYY-MM-DD' corresponding to 'YMD'. If date 

462 strings are formatted as 'MM/DD/YYYY' set this parameter as 

463 'MDY' and if formatted as 'DD/MM/YYYY' set as 'DMY'. No 

464 other formattings are allowed. 

465 force_auto_number : ('False') Enables automatic assignment of record IDs 

466 of imported records by REDCap. If this is set to true, and auto-numbering 

467 for records is enabled for the project, auto-numbering of imported records 

468 will be enabled. 

469 

470 Returns 

471 ------- 

472 response : dict, str 

473 response from REDCap API, json-decoded if ``return_format`` == ``'json'`` 

474 """ 

475 pl = self.__basepl('record') 

476 if hasattr(to_import, 'to_csv'): 

477 # We'll assume it's a df 

478 buf = StringIO() 

479 if self.is_longitudinal(): 

480 csv_kwargs = {'index_label': [self.def_field, 

481 'redcap_event_name']} 

482 else: 

483 csv_kwargs = {'index_label': self.def_field} 

484 to_import.to_csv(buf, **csv_kwargs) 

485 pl['data'] = buf.getvalue() 

486 buf.close() 

487 format = 'csv' 

488 elif format == 'json': 

489 pl['data'] = json.dumps(to_import, separators=(',', ':')) 

490 else: 

491 # don't do anything to csv/xml 

492 pl['data'] = to_import 

493 pl['overwriteBehavior'] = overwrite 

494 pl['format'] = format 

495 pl['returnFormat'] = return_format 

496 pl['returnContent'] = return_content 

497 pl['dateFormat'] = date_format 

498 pl['forceAutoNumber'] = force_auto_number 

499 response = self._call_api(pl, 'imp_record')[0] 

500 if 'error' in response: 

501 raise RedcapError(str(response)) 

502 return response 

503 

504 def export_file(self, record, field, event=None, return_format='json'): 

505 """ 

506 Export the contents of a file stored for a particular record 

507 

508 Notes 

509 ----- 

510 Unlike other export methods, this works on a single record. 

511 

512 Parameters 

513 ---------- 

514 record : str 

515 record ID 

516 field : str 

517 field name containing the file to be exported. 

518 event: str 

519 for longitudinal projects, specify the unique event here 

520 return_format: ('json'), 'csv', 'xml' 

521 format of error message 

522 

523 Returns 

524 ------- 

525 content : bytes 

526 content of the file 

527 content_map : dict 

528 content-type dictionary 

529 """ 

530 self._check_file_field(field) 

531 # load up payload 

532 pl = self.__basepl(content='file', format=return_format) 

533 # there's no format field in this call 

534 del pl['format'] 

535 pl['returnFormat'] = return_format 

536 pl['action'] = 'export' 

537 pl['field'] = field 

538 pl['record'] = record 

539 if event: 

540 pl['event'] = event 

541 content, headers = self._call_api(pl, 'exp_file') 

542 #REDCap adds some useful things in content-type 

543 if 'content-type' in headers: 

544 splat = [kv.strip() for kv in headers['content-type'].split(';')] 

545 kv = [(kv.split('=')[0], kv.split('=')[1].replace('"', '')) for kv 

546 in splat if '=' in kv] 

547 content_map = dict(kv) 

548 else: 

549 content_map = {} 

550 return content, content_map 

551 

552 def import_file(self, record, field, fname, fobj, event=None, 

553 repeat_instance=None, return_format='json'): 

554 """ 

555 Import the contents of a file represented by fobj to a 

556 particular records field 

557 

558 Parameters 

559 ---------- 

560 record : str 

561 record ID 

562 field : str 

563 field name where the file will go 

564 fname : str 

565 file name visible in REDCap UI 

566 fobj : file object 

567 file object as returned by `open` 

568 event : str 

569 for longitudinal projects, specify the unique event here 

570 repeat_instance : int 

571 (only for projects with repeating instruments/events) 

572 The repeat instance number of the repeating event (if longitudinal) 

573 or the repeating instrument (if classic or longitudinal). 

574 return_format : ('json'), 'csv', 'xml' 

575 format of error message 

576 

577 Returns 

578 ------- 

579 response : 

580 response from server as specified by ``return_format`` 

581 """ 

582 self._check_file_field(field) 

583 # load up payload 

584 pl = self.__basepl(content='file', format=return_format) 

585 # no format in this call 

586 del pl['format'] 

587 pl['returnFormat'] = return_format 

588 pl['action'] = 'import' 

589 pl['field'] = field 

590 pl['record'] = record 

591 if event: 

592 pl['event'] = event 

593 if repeat_instance: 

594 pl['repeat_instance'] = repeat_instance 

595 file_kwargs = {'files': {'file': (fname, fobj)}} 

596 return self._call_api(pl, 'imp_file', **file_kwargs)[0] 

597 

598 def delete_file(self, record, field, return_format='json', event=None): 

599 """ 

600 Delete a file from REDCap 

601 

602 Notes 

603 ----- 

604 There is no undo button to this. 

605 

606 Parameters 

607 ---------- 

608 record : str 

609 record ID 

610 field : str 

611 field name 

612 return_format : (``'json'``), ``'csv'``, ``'xml'`` 

613 return format for error message 

614 event : str 

615 If longitudinal project, event to delete file from 

616 

617 Returns 

618 ------- 

619 response : dict, str 

620 response from REDCap after deleting file 

621 """ 

622 self._check_file_field(field) 

623 # Load up payload 

624 pl = self.__basepl(content='file', format=return_format) 

625 del pl['format'] 

626 pl['returnFormat'] = return_format 

627 pl['action'] = 'delete' 

628 pl['record'] = record 

629 pl['field'] = field 

630 if event: 

631 pl['event'] = event 

632 return self._call_api(pl, 'del_file')[0] 

633 

634 def _check_file_field(self, field): 

635 """Check that field exists and is a file field""" 

636 is_field = field in self.field_names 

637 is_file = self.__meta_metadata(field, 'field_type') == 'file' 

638 if not (is_field and is_file): 

639 msg = "'%s' is not a field or not a 'file' field" % field 

640 raise ValueError(msg) 

641 else: 

642 return True 

643 

644 def export_users(self, format='json'): 

645 """ 

646 Export the users of the Project 

647 

648 Notes 

649 ----- 

650 Each user will have the following keys: 

651 

652 * ``'firstname'`` : User's first name 

653 * ``'lastname'`` : User's last name 

654 * ``'email'`` : Email address 

655 * ``'username'`` : User's username 

656 * ``'expiration'`` : Project access expiration date 

657 * ``'data_access_group'`` : data access group ID 

658 * ``'data_export'`` : (0=no access, 2=De-Identified, 1=Full Data Set) 

659 * ``'forms'`` : a list of dicts with a single key as the form name and 

660 value is an integer describing that user's form rights, 

661 where: 0=no access, 1=view records/responses and edit 

662 records (survey responses are read-only), 2=read only, and 

663 3=edit survey responses, 

664 

665 

666 Parameters 

667 ---------- 

668 format : (``'json'``), ``'csv'``, ``'xml'`` 

669 response return format 

670 

671 Returns 

672 ------- 

673 users: list, str 

674 list of users dicts when ``'format'='json'``, 

675 otherwise a string 

676 """ 

677 pl = self.__basepl(content='user', format=format) 

678 return self._call_api(pl, 'exp_user')[0] 

679 

680 def export_survey_participant_list(self, instrument, event=None, format='json'): 

681 """ 

682 Export the Survey Participant List 

683 

684 Notes 

685 ----- 

686 The passed instrument must be set up as a survey instrument. 

687 

688 Parameters 

689 ---------- 

690 instrument: str 

691 Name of instrument as seen in second column of Data Dictionary. 

692 event: str 

693 Unique event name, only used in longitudinal projects 

694 format: (json, xml, csv), json by default 

695 Format of returned data 

696 """ 

697 pl = self.__basepl(content='participantList', format=format) 

698 pl['instrument'] = instrument 

699 if event: 

700 pl['event'] = event 

701 return self._call_api(pl, 'exp_survey_participant_list') 

702 

703 def generate_next_record_name(self): 

704 pl = self.__basepl(content='generateNextRecordName') 

705 

706 return self._call_api(pl, 'exp_next_id')[0] 

707 

708 def export_project_info(self, format='json'): 

709 """ 

710 Export Project Information 

711 

712 Parameters 

713 ---------- 

714 format: (json, xml, csv), json by default 

715 Format of returned data 

716 """ 

717 

718 pl = self.__basepl(content='project', format=format) 

719 

720 return self._call_api(pl, 'exp_proj')[0]