#!/usr/bin/python

import argparse
import multiprocessing as mp
import sys

def getArgs():
	parser = argparse.ArgumentParser("cradle")

	subparsers = parser.add_subparsers(help="", dest="commandName")

	########### correctBias
	correctBias_parser = subparsers.add_parser("correctBias", help="Correct shear, pcr, mappability, g-quadruplex structure bias from bigwig files")

	correctBias_required = correctBias_parser.add_argument_group("Required Args")
	correctBias_required.add_argument('-ctrlbw', help="Ctrl bigwig files. Un-noramlized files are recommended. Each file name should be spaced. ex) -ctrlbw file1.bw file2.bw", nargs='+', required=True)
	correctBias_required.add_argument('-expbw', help="Experimental bigwig files. Un-noramlized files are recommended. Each file name should be spaced. ex) -expbw file1.bw file2.bw", nargs='+', required=True)
	correctBias_required.add_argument('-l', type=int, help="Fragment length.", required=True)
	correctBias_required.add_argument('-r', help="Text file that shows regions of analysis. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t100\t3000", required=True)
	correctBias_required.add_argument('-biasType', help="Type of biases you want to correct among 'shear', 'pcr', 'map', 'gquad'. If you want to correct 'shear' and 'pcr' bias, you should type -biasType shear pcr. If you type map, -mapFile and -kmer are required. If you type gquad, -gquadFile is required", nargs='+', required=True)
	correctBias_required.add_argument('-genome', '-faFile', help=".2bit file.", required=True) # faFile is deprecated

	correctBias_optional = correctBias_parser.add_argument_group("Optional Args")
	correctBias_optional.add_argument('-binSize', type=int, help="The size of bin (bp) for correction. If you put '1', it means you want to correct read counts in single-bp resolution. (default=1)", default=1)
	correctBias_optional.add_argument('-mi', type=int, help="The minimum number of fragments. Positions that have less fragments than this value are filtered out. default=the number of samples")
	correctBias_optional.add_argument('-mapFile', help="Mappability file in bigwig format. Required when 'map' is in '-biasType'")
	correctBias_optional.add_argument('-kmer', help="The length of sequencing reads. If you have paired-end sequencing with 50mer from each end, type '50'. Required when 'map' is in '-biasType'")
	correctBias_optional.add_argument('-gquadFile', help="Gqaudruplex files in bigwig format. Multiple files are allowed. Required when 'gquad' is in '-biasType'", nargs="+")
	correctBias_optional.add_argument('-o', help="Output directory. All corrected bigwig files will be stored here. If the directory doesn't exist, cradle will make the directory. default=CRADLE_correctionResult.", required=False, default="CRADLE_correctionResult")
	correctBias_optional.add_argument('-p', type=int, help="The number of cpus. default=(available cpus)/2")
	correctBias_optional.add_argument('-bl', help="Text file that shows regions you want to filter out. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t1\t100")
	correctBias_optional.add_argument('-norm', help="Whether normalization is needed for input bigwig files. Choose either 'True' or 'False'. default=True", default='True')
	correctBias_optional.add_argument('-generateNormBW', help="If you want to generate normalized observed bigwig files, type 'True' (only works when '-norm True'). If you don't want, type 'False'. default=False", default='False')
	correctBias_optional.add_argument('-rngSeed', type=int, help="Set seed value for the RNG. Enables repeatable runs.", default=None)


	########### correctBias_stored
	correctBiasStored_parser = subparsers.add_parser("correctBias_stored", help="Correct shear, pcr, mappability, g-quadruplex structure bias from bigwig files when there are stored covariate hdf5 files. This is much faster than correctBias.")

	correctBiasStored_required = correctBiasStored_parser.add_argument_group("Required Args")
	correctBiasStored_required.add_argument('-ctrlbw', help="Ctrl bigwig files. Un-noramlized files are recommended. Each file name should be spaced. ex) -ctrlbw file1.bw file2.bw", nargs='+', required=True)
	correctBiasStored_required.add_argument('-expbw', help="Experimental bigwig files. Un-noramlized files are recommended. Each file name should be spaced. ex) -expbw file1.bw file2.bw", nargs='+', required=True)
	correctBiasStored_required.add_argument('-r', help="Text file that shows regions of analysis. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t100\t3000", required=True)
	correctBiasStored_required.add_argument('-biasType', help="Type of biases you want to correct among 'shear', 'pcr', 'map', 'gquad'. If you want to correct 'shear' and 'pcr' bias, you should type -biasType shear pcr.", nargs='+', required=True)
	correctBiasStored_required.add_argument('-covariDir', help="The directory of hdf files that have covariate values. The directory name of covariate files should be 'refGenome_fragLen(fragment length)_kmer(the length of sequenced reads)' ex) hg38_fragLen300_kmer36", required=True)
	correctBiasStored_required.add_argument('-genome', '-faFile', help=".2bit file.", required=True) # faFile is deprecated

	correctBiasStored_optional = correctBiasStored_parser.add_argument_group("Optional Args")
	correctBiasStored_optional.add_argument('-mi', type=int, help="The minimum number of fragments. Positions that have less fragments than this value are filtered out. default=the number of samples")
	correctBiasStored_optional.add_argument('-o', help="Output directory. All corrected bigwig files will be stored here. If the directory doesn't exist, cradle will make the directory. default=CRADLE_correctionResult.", required=False, default="CRADLE_correctionResult")
	correctBiasStored_optional.add_argument('-p', type=int, help="The number of cpus. default=(available cpus)/2")
	correctBiasStored_optional.add_argument('-bl', help="Text file that shows regions you want to filter out. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t1\t100")
	correctBiasStored_optional.add_argument('-norm', help="Whether normalization is needed for input bigwig files. Choose either 'True' or 'False'. default=True", default='True')
	correctBiasStored_optional.add_argument('-generateNormBW', help="If you want to generate normalized observed bigwig files, type 'True' (only works when '-norm True'). If you don't want, type 'False'. default=False", default='False')
	correctBiasStored_optional.add_argument('-rngSeed', type=int, help="Set seed value for the RNG. Enables repeatable runs.", default=None)


	########### callPeak
	callPeak_parser = subparsers.add_parser("callPeak", help="Correct peaks with corrected bigwig files")

	callPeak_required = callPeak_parser.add_argument_group("Required Args")
	callPeak_required.add_argument('-ctrlbw', help="Ctrl bigwig files. Corrected bigwig files are recommended. Each file name should be spaced. ex) -ctrlbw file1.bw file2.bw", nargs='+', required=True)
	callPeak_required.add_argument('-expbw', help="Experimental bigwig files. Corrected bigwig files are recommended. Each file name should be spaced. ex) -expbw file1.bw file2.bw", nargs='+', required=True)
	callPeak_required.add_argument('-r', help="Text file that shows regions of analysis. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t100\t3000", required=True)
	callPeak_required.add_argument('-fdr', help="FDR control", required=True)

	callPeak_optional = callPeak_parser.add_argument_group("Optional Args")
	callPeak_optional.add_argument('-o', help="Output directory. All corrected bigwig files will be stored here. If the directory doesn't exist, cradle will make the directory. default=CRADLE_peak_result.", required=False, default="CRADLE_peak_result")
	callPeak_optional.add_argument('-bl', help="Text file that shows regions you want to filter out. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t1\t100")
	callPeak_optional.add_argument('-rbin', help="The size of a bin used for defining regions. rbin cannot be smaller than wbin. default = 300")
	callPeak_optional.add_argument('-wbin', help="The size of a bin used for testing differential activity. wbin cannot be larger than rbin. default = rbin/6")
	callPeak_optional.add_argument('-p', type=int, help="The number of cpus. default=(available cpus)/2")
	callPeak_optional.add_argument('-d', type=int, help="The minimum distance between peaks. Peaks distanced less than this value(bp) are merged. default=10", default=10)
	callPeak_optional.add_argument('-pl', type=int, help="Minimum peak length. default=wbin")
	callPeak_optional.add_argument('-stat', help="Choose a statistical testing: 't-test' for t-test and  'welch' for welch's t-test  default=t-test")
	callPeak_optional.add_argument('-normCtrlbw', help="Normalized observed ctrl bigwig files. The bigwigs normalized from CRADLE (using -generateNormBW in either correctBias or correctBias_stored subcommand) are recommended. If you use this parameter along with -normExpbw, CRADLE  will report pseudo log2 fold change in the output", nargs='+')
	callPeak_optional.add_argument('-normExpbw', help="Normalized observed experimental bigwig files. The bigwigs normalized from CRADLE (using -generateNormBW in either correctBias or correctBias_stored subcommand) are recommended. If you use this parameter along with -normCtrlbw, CRADLE  will report pseudo log2 fold change in the output", nargs='+')

	########### normalize
	normalize_parser = subparsers.add_parser("normalize", help="Normalize bigwgis across samples and across different regions of one sample. This is useful for BAC STARR-seq where you can even uneven coverage for each BAC regions or for each overlapping BAC regions")

	normalize_required = normalize_parser.add_argument_group("Required Args")
	normalize_required.add_argument('-r', help="Text file that shows regions of analysis. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t100\t3000", required=True)
	normalize_required.add_argument('-ctrlbw', help="Unnormalized control bigwig files. Each file name should be spaced. ex) -ctrlbw file1.bw file2.bw", required=True, nargs="+")
	normalize_required.add_argument('-expbw', help="Unnormalized experimental  bigwig files. Each file name should be spaced. ex) -ctrlbw file1.bw file2.bw", required=True, nargs="+")

	normalize_optional = normalize_parser.add_argument_group("Optional Args")
	normalize_optional.add_argument('-p', type=int, help="The number of cpus. default=(available cpus)/2", required=False)
	normalize_optional.add_argument('-o', help="Output directory. All normalized bigwig files will be stored here. If the directory doesn't exist, cradle will make the directory. default=CRADLE_normalization.", required=False, default="CRADLE_normalization")


	########### covariates
	covariate_parser = subparsers.add_parser("covariates", help="Calculate covariate values")

	covariate_required = covariate_parser.add_argument_group("Required Args")
	covariate_required.add_argument('-l', type=int, help="Fragment length.", required=True)
	covariate_required.add_argument('-r', help="Text file that shows regions of analysis. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t100\t3000", required=True)
	covariate_required.add_argument('-biasType', help="Type of biases you want to correct among 'shear', 'pcr', 'map', 'gquad'. If you want to correct 'shear' and 'pcr' bias, you should type -biasType shear pcr. If you type map, -mapFile and -kmer are required. If you type gquad, -gquadFile is required", nargs='+', required=True)
	covariate_required.add_argument('-genome', '-faFile', help=".2bit file.", required=True) # faFile is deprecated

	covariate_optional = covariate_parser.add_argument_group("Optional Args")
	covariate_optional.add_argument('-mapFile', help="Mappability file in bigwig format. Required when 'map' is in '-biasType'")
	covariate_optional.add_argument('-kmer', help="The length of sequencing reads. If you have paired-end sequencing with 50mer from each end, type '50'. Required when 'map' is in '-biasType'")
	covariate_optional.add_argument('-gquadFile', help="Gqaudruplex files in bigwig format. Multiple files are allowed. Required when 'gquad' is in '-biasType'", nargs="+")
	covariate_optional.add_argument('-o', help="""Output directory. All calculated covariate files will be stored here. If the directory doesn't exist, cradle will make the directory. default='CRADLE_covariates'. The output covariate files will be named {output_directory}_{chromosome}.hdf5 (e.g., 'CRADLE_covariates_chrX.hdf5')

Note that, to make the files compatible with the CRADLE correctBias_stored step, the directory should be named {genome}_fragLen{fragment length}_kmer{sequencing read count}. For example, 'hg38_fragLen1000_kmer100'.""", required=False, default="CRADLE_covariates")
	covariate_optional.add_argument('-p', type=int, help="The number of cpus. default=(available cpus)/2")
	covariate_optional.add_argument('-bl', help="Text file that shows regions you want to filter out. Each line in the text file should have chromosome, start site, and end site that are tab-spaced. ex) chr1\t1\t100")

	return parser

def checkDeprecations():
	if '-faFile' in sys.argv:
		print("""
********************************************************************************
* The "-faFile" argument has been deprecated in favor of "-genome" and may be  *
* removed in a future release. The functionality isn't changing, just the      *
* argument name. Please update your scripts to use "-genome".                  *
********************************************************************************
		""")

def main():
	args = getArgs().parse_args()

	checkDeprecations()

	### Correcting bias
	if args.commandName == "correctBias":
		from CRADLE.CorrectBias.correctBias import run # excectue 'def run' in correctBias.py
		run(args)

	### Correcting bias when there are stored covariates
	if args.commandName == "correctBias_stored":
		from CRADLE.CorrectBiasStored.correctBias import run
		run(args)

	### Call peaks
	if args.commandName == "callPeak":
		from CRADLE.CallPeak.callPeak import run
		run(args)

	### Normalize
	if args.commandName == "normalize":
		from CRADLE.Normalize.normalize import run
		run(args)

	### Compute Covariates
	if args.commandName == "covariates":
		from CRADLE.CalculateCovariates.covariates import run
		run(args)

if __name__ == '__main__':
	mp.set_start_method('fork')
	main()





