#!python
# -*- coding: utf-8 -*-
#  
#  Copyright 2019 Gabriele Orlando <orlando.gabriele89@gmail.com>
#  

from chaplin.src import parse,utils
from chaplin.optimize import optimize
import os,time
from chaplin.predict import run_prediction
def quality_check_DNASeq(sequences):

    allowedLetters = parse.allowed_codons
    for k in sequences:
        if len(sequences[k])%3!=0:
            raise ValueError("the input are cDNA seuquences. They are made of triplets, so a sequence must be divisible by 3. "+k+" as a length of "+str(len(sequences[k])))

        s = [sequences[k][i:i + 3] for i in range(0, len(sequences[k]), 3)]
        if s[-1] in parse.aa2codon[""]: #removing eventual stop codon
            s=s[:-1]
        for l in s:
            if not l in allowedLetters:
                raise ValueError("non codon triplet "+l+" found in sequence"+k)

def quality_check_aaSeq(sequences):
    allowedLetters = ['A', 'C', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'N', 'Q', 'P', 'S', 'R', 'T', 'W', 'V', 'Y']
    for k in sequences:
        for l in sequences[k]:
            if not l in allowedLetters:
                raise ValueError("Non-amino acid letter "+l+" found in sequence"+k)

def run_optimization(sequence,outfile,verbose=True,iterations=100,mating_parents=200,pop_size=1500,target_sol=1.0,num_optimized_seqs=1):
    print("Running optimization")
    old_time = time.time()
    if os.path.exists(sequence):
        sequences = parse.leggifasta(sequence)
    else:
        sequences = {"inputSequence":sequence}

    quality_check_aaSeq(sequences)

    final_diz= {}
    for sname in sequences.keys():

        s = optimize(target_seq = sequences[sname],verbose=verbose,iterations=iterations,num_parents_mating=mating_parents,pop_size=pop_size,TARGET_SOL=target_sol,num_optimized_seqs=num_optimized_seqs)
        if num_optimized_seqs!=1:
            for k in range(len(s)):
                final_diz[sname+"_"+str(k)] = s[k]
        else:
            final_diz[sname] = s[0]

    if outfile is None:
        print("###########")
        print("# CHAPLIN RESULTS #")
        for k in final_diz.keys():
            print(">"+k+"\n"+ final_diz[k]+"\n")
        print("###################")
    else:
        f=open(outfile,"w")
        f.write("Name\tScore\n")
        for k in final_diz.keys():
            f.write(">"+k+"\n"+ final_diz[k]+"\n")
        f.close()

    print("Done in ",round(time.time()-old_time,4),"seconds. The monkeys are listening")

def prediction(sequence,outfile=None):
    print("Running prediction")
    old_time = time.time()
    if os.path.exists(sequence):
        sequences = parse.leggifasta(sequence)
    else:
        sequences = {"inputSequence":sequence}
    quality_check_DNASeq(sequences)
    pred = run_prediction(sequences,printPreds=False)
    if outfile is None:
        print("###########")
        print("# CHAPLIN RESULTS #")
        for k in pred.keys():
            print(k, round(pred[k],3))
        print("###################")
    else:
        f=open(outfile,"w")
        f.write("Name\tScore\n")
        for k in pred.keys():
            f.write(k+ "\t"+str(round(pred[k],3))+"\n")
        f.close()
    print("Done in ",round(time.time()-old_time,4),"seconds. The monkeys are listening")

if __name__ == '__main__':
    import argparse,sys
    import textwrap
    parser = argparse.ArgumentParser(
        prog='Chaplin',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=textwrap.dedent('''\
             if you have problems or you bugs,
             mail orlando.gabriele89@gmail.com.
             
             The monkeys are listening
             '''))


    parser.add_argument('--command',"-c", default="predict",help="The action to perform. predict evaluates with chaplin cDNA sequence(s), optimize runs an optimization of amino acid sequence(s) to obtain an encoding with interaction probability defined by target_optimization",choices=['predict', "optimize","explain"])
    parser.add_argument('sequence', help='If predict, the input is either a cDNA sequence or a fasta file with cDNA sequences, if optimize, the input is either a amino acid sequence or a fasta file with amino acid sequences')
    parser.add_argument('--iterations',"-i", help='number of iterations for the genetic optimization. Ignored if command is predict',default=100,type=int)
    parser.add_argument('--mating_parents', help='number of parents mating for the genetic optimization. Ignored if command is predict',default=200,type=int)
    parser.add_argument('--pop_size', help='size of a generation for the genetic optimization. Ignored if command is predict',default=1500,type=int )
    parser.add_argument('--target_optimization',"-t", help='target fitness for the genetic optimization. 1 means the sequence is optimized TO INTERACT with chaperon, 0 means the sequence is optimized NOT TO INTERACT with chaperon. Ignored if command is predict',default=1 ,type=int)
    parser.add_argument('--num_optimized_seqs',"-n", help='number of output optimized sequences per input sequence. Ignored if command is predict',default=1,type=int )
    parser.add_argument('--outfile',"-o", help='the output file. if not provided, it prints on screen',default=None )
    parser.add_argument('--silent',"-v", action='store_true',help='does not print text while optimizing. ### CAREFULL ### you will not be able to check if the optimization converges or not')

    args = parser.parse_args()

    if args.command == "predict":
        prediction(args.sequence,outfile=args.outfile)

    elif args.command == "optimize":
        run_optimization(args.sequence,outfile=args.outfile,verbose = not args.silent,iterations=args.iterations,mating_parents=args.mating_parents,pop_size=args.pop_size,target_sol=args.target_optimization,num_optimized_seqs=args.num_optimized_seqs)

    print()



