Module data_request_api.command_line.export_dreq_lists_json
Command line interface for retrieving simple variable lists from the data request.
Functions
def main()-
Expand source code
def main(): """ main routine """ args = parse_args() use_dreq_version = args.dreq_version # Download specified version of data request content (if not locally cached) dc.retrieve(use_dreq_version) # Load content into python dict content = dc.load(use_dreq_version) # Render data request tables as dreq_table objects base = dq.create_dreq_tables_for_request(content, use_dreq_version) # Deal with opportunities if args.opportunities_file: # Select opportunities by their title, as given in a user-specified json file opportunities_file = args.opportunities_file dreq_opps = base['Opportunity'] if not os.path.exists(opportunities_file): # create opportunities file template use_opps = sorted([opp.title for opp in dreq_opps.records.values()], key=str.lower) default_opportunity_dict = OrderedDict({ 'Header': OrderedDict({ 'Description': 'Opportunities template file for use with export_dreq_lists_json. Set supported/unsupported Opportunities to true/false.', 'dreq content version': use_dreq_version, 'dreq api version': data_request_api.version, }), 'Opportunity': OrderedDict({title: True for title in use_opps}) }) with open(opportunities_file, 'w') as fh: json.dump(default_opportunity_dict, fh, indent=4) print("written opportunities dict to {}. Please edit and re-run".format(opportunities_file)) sys.exit(0) else: # load existing opportunities file with open(opportunities_file, 'r') as fh: opportunity_dict = json.load(fh) dreq_version = opportunity_dict['Header']['dreq content version'] if dreq_version != use_dreq_version: raise ValueError('Data request version mismatch!' + f'\nOpportunities file was generated for data request version {dreq_version}' + f'\nPlease regenerate the file using version {use_dreq_version}') opportunity_dict = opportunity_dict['Opportunity'] # validate opportunities # (mismatches can occur if an opportunities file created with an earlier data request version is loaded) valid_opps = [opp.title for opp in dreq_opps.records.values()] invalid_opps = [title for title in opportunity_dict if title not in valid_opps] if invalid_opps: raise ValueError(f'\nInvalid opportunities were found in {opportunities_file}:\n' + '\n'.join(sorted(invalid_opps, key=str.lower))) # filter opportunities use_opps = [title for title in opportunity_dict if opportunity_dict[title]] elif args.opportunity_ids: # Select opportunities by their integer IDs, specified from the command line dreq_opps = base['Opportunity'] all_opp_ids = [opp.opportunity_id for opp in dreq_opps.records.values()] if len(all_opp_ids) != len(set(all_opp_ids)): raise ValueError(f'Opportunity IDs (integers) in data request {use_dreq_version} are not unique!') oppid2title = {int(opp.opportunity_id): opp.title for opp in dreq_opps.records.values()} use_opps = [] invalid_opp_ids = set() for opp_id in args.opportunity_ids: try: opp_id = int(opp_id) except BaseException: ValueError('Opportunity ID should be an integer') if opp_id in oppid2title: use_opps.append(oppid2title[opp_id]) else: invalid_opp_ids.add(opp_id) if len(invalid_opp_ids) > 0: raise ValueError(f'The following Opportunity IDs were not found in data request {use_dreq_version}: ' + ', '.join([str(opp_id) for opp_id in sorted(invalid_opp_ids)])) elif args.all_opportunities: # Use all available opportunities in the data request use_opps = 'all' else: print("Please use one of the opportunities arguments") sys.exit(1) # Get the requested variables for each opportunity and aggregate them into variable lists by experiment # (i.e., for every experiment, a list of the variables that should be produced to support all of the specified opportunities) expt_vars = dq.get_requested_variables(base, use_dreq_version, use_opps=use_opps, priority_cutoff=args.priority_cutoff, time_subsets=args.time_subsets, combined_request=args.add_combined, verbose=False, check_core_variables=not args.disable_core_vars_check) # filter output by requested experiments if args.experiments: experiments = list(expt_vars['experiment'].keys()) # names of experiments requested by opportunities in use_opps # validate the requested experiment names Expts = base['Experiments'] valid_experiments = [expt.experiment for expt in Expts.records.values()] # all valid experiment names in data request invalid_experiments = [entry for entry in args.experiments if entry not in valid_experiments] if invalid_experiments: raise ValueError('\nInvalid experiments: ' + ', '.join(sorted(invalid_experiments, key=str.lower)) + '\nValid experiment names: ' + ', '.join(sorted(valid_experiments, key=str.lower))) # discard experiments that aren't requested for entry in experiments: if entry not in args.experiments: del expt_vars['experiment'][entry] # Construct output if len(expt_vars['experiment']) > 0: # Show user what was found dq.show_requested_vars_summary(expt_vars, use_dreq_version) # Write json file with the variable lists content_path = dc._dreq_content_loaded['json_path'] outfile = args.output_file dq.write_requested_vars_json(outfile, expt_vars, use_dreq_version, args.priority_cutoff, content_path) else: print(f'\nFor data request version {use_dreq_version}, no requested variables were found') if args.variables_metadata: # Get all variable names for all requested experiments all_var_names = set() for vars_by_priority in expt_vars['experiment'].values(): for var_names in vars_by_priority.values(): all_var_names.update(var_names) # Get metadata for variables all_var_info = dq.get_variables_metadata( base, use_dreq_version, compound_names=all_var_names, verbose=False, ) # Write output file(s) filepath = args.variables_metadata dq.write_variables_metadata( all_var_info, use_dreq_version, filepath, api_version=data_request_api.version, content_path=dc._dreq_content_loaded['json_path'] )main routine
def parse_args()-
Expand source code
def parse_args(): """ Parse command-line arguments """ parser = argparse.ArgumentParser( description='Get lists of requested variables by experiment, and write them to a json file.' ) # Positional (mandatory) input arguments parser.add_argument('dreq_version', choices=dc.get_versions(), help="data request version") parser.add_argument('output_file', help='file to write JSON output to') sep = ',' def parse_input_list(input_str: str, sep=sep) -> list: '''Create list of input args separated by separator "sep" (str)''' input_args = input_str.split(sep) # Guard against leading, trailing, or repeated instances of the separator input_args = [s for s in input_args if s not in ['']] return input_args # Optional input arguments parser.add_argument('-a', '--all_opportunities', action='store_true', help="respond to all opportunities") parser.add_argument('-f', '--opportunities_file', type=str, help="path to JSON file listing opportunities to respond to. \ If it doesn't exist, a template will be created") parser.add_argument('-i', '--opportunity_ids', type=parse_input_list, help=f'opportunity ids (integers) of opportunities to respond to, \ example: -i 69{sep}22{sep}37') parser.add_argument('-e', '--experiments', type=parse_input_list, help=f'limit output to the specified experiments (case sensitive), \ example: -e historical{sep}piControl') parser.add_argument('-p', '--priority_cutoff', default='low', choices=dq.PRIORITY_LEVELS, help="discard variables that are requested at lower priority than this cutoff priority") parser.add_argument('-m', '--variables_metadata', type=str, help='output file containing metadata of requested variables, can be ".json" or ".csv" file') parser.add_argument("-c", "--add_combined", action="store_true", default=False, help="Include combined request from all selected opportunities and for all experiments. " "Will create the new entry 'all_experiments' for the combined request.") parser.add_argument("-t", "--time_subsets", action="store_true", default=False, help="Include time_subsets that variables are requested for.") parser.add_argument("-d", "--disable_core_vars_check", action="store_true", default=False, help="Don't require that core variables are included in the request.") return parser.parse_args()Parse command-line arguments