Source code for samsifter.filters.filter_taxon_list

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 18 09:34:28 2014

.. moduleauthor:: Florian Aldehoff <faldehoff@student.uni-tuebingen.de>
"""

import sys
if not (sys.version_info[0] >= 3):
    print("Error, I need python 3.x or newer")
    exit(1)

import argparse
import fileinput
import csv
from os.path import basename, splitext

""" custom libraries """
from samsifter.models.filter import FilterItem
from samsifter.models.parameter import FilterSwitch, FilterFilepath
from samsifter.util.arg_sanitation import check_sam, check_csv
from samsifter.util.wrappers import grep

""" global variables """
TEXT = "Filter taxa by list of taxon IDs"
DESC = ("filtering references by a list of NCBI taxon IDs given in a "
        "tab-separated CSV file")


[docs]def item(): """Create item representing this tool in list and tree views. Returns ------- FilterItem Item for use in item-based list and tree views. """ item = FilterItem(text=TEXT, desc=DESC) item.set_command(splitext(basename(__file__))[0]) item.add_parameter(FilterFilepath( text="taxon list file", desc="tab-separated CSV file with NCBI taxon IDs in first column", cli_name="--list", default="taxa.csv", extensions=['csv'], required=True )) item.add_parameter(FilterSwitch( text="filter direction", desc="Keep or discard entries passing the filter criteria?", cli_name="--discard", default=0, options=["discard", "keep"] )) return item
[docs]def main(): # parse arguments parser = argparse.ArgumentParser(description=DESC) parser.add_argument('-i', '--input', type=check_sam, help="specify SAM file to be analysed (default: " "STDIN)", required=False) parser.add_argument('-l', '--list', type=check_csv, help="tab-separated CSV file with accession numbers " "in first column", required=True) parser.add_argument('--discard', type=int, help="keep or discard entries passing the filter " "criteria?", required=False, default=0) (args, remainArgs) = parser.parse_known_args() # generate pattern list from CSV file patterns = [] with open(args.list, newline='') as csvfile: reader = csv.reader(csvfile, delimiter='\t') for row in reader: patterns.append("tax|" + row[0] + "|") # open SAM file from either command line argument or STDIN if args.input: handle = open(args.input, 'r') else: handle = fileinput.input(remainArgs) grep(patterns, handle, discard=(args.discard == 0)) handle.close() exit()
if __name__ == "__main__": main()