#!/usr/bin/python3
# -*- coding: utf-8 -*-

from Bio import PDB

import os, os.path
import argparse
import sys

AA = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D',
      'CYS':'C', 'GLN':'Q', 'GLU':'E', 'GLY':'G',
      'HIS':'H','ILE':'I','LEU':'L','LYS':'K',
      'MET':'M', 'PHE':'F','PRO':'P','SER':'S',
      'THR':'T', 'TRP':'W','TYR':'Y', 'VAL':'V','TERM':'',
      ' DA':'A',' DG':'G', ' DC':'C', ' DT':'T',
      '  A':'A','  G':'G', '  C':'C', '  T':'T', '  U':'U', 'UNK':'X'}

def _parse_args():
    '''
        Parses the arguments needed for this script to work
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('-i','--input', required=True, dest='input', action='store')
    parser.add_argument('-p','--prefix', required=True, dest='prefix', action='store')
    return parser.parse_args()


if(__name__ == '__main__'):
    '''
        Main routine of the program
        Checks the arguments to know if the input is correct. Then tries to parse the pdb structure indicated in arguments.input
        (may throw a Bio.PDB warning though). If output folder is a file, exits with error -1. If not, create the folder in case it doesn't exists and continues.
        For every chain of the complex, creates a new empty structure object, adds the chain and saves it with the estructure {prefix}_{chain_id}.pdb.
        In case the flag -f was indicate, also saves the fasta sequence in {prefix}_{chain_id}.fa. Both files are saved in the output directory
    '''
    arguments = _parse_args()

    fhand = open(arguments.prefix+'.fa', 'w')
    print('Saving sequence in {prefix}.fa'.format(prefix=arguments.prefix))
    if(os.path.isfile(arguments.input)):  # If the input is a file, tries to read it
        pdb_file = arguments.input
        PDBparser = PDB.PDBParser(QUIET=True)
        structure = PDBparser.get_structure(pdb_file[:-3], pdb_file) # Parses the PDB
        io = PDB.PDBIO()
        # For each chain save it in a separate file. Log is provided for very long proteins
        for chain in structure.get_chains():
            splitted = PDB.Structure.Structure(pdb_file[:-3]+chain.id)
            splitted.add(chain)
            io.set_structure(chain)
            fhand.write('>{prefix}_{chain_id}\n'.format(prefix=arguments.prefix, chain_id=chain.id))
            for residue in chain:
                fhand.write(AA.get(residue.resname, '').replace(' ',''))
            fhand.write('\n')

    fhand.close()
