#!/usr/bin/env python3

from unittest.mock import patch
import unittest
import filecmp
import os
import subprocess
from rrap import controller
import pandas as pd

class TestOutputs(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        super(TestOutputs, cls).setUpClass()
        # use relative paths
        bin_dir = os.path.dirname(__file__)
        output_dir = os.path.join(bin_dir, "..", 'tests', 'output')
        tests_dir = os.path.join(bin_dir, "..", 'tests')

        # remove concat file if it exists
        if os.path.isdir(os.path.join(output_dir, 'allgenomes_cat_test.fna')):
            subprocess.run('rm {0}'.format(os.path.join(output_dir, 'allgenomes_cat_test.fna')), shell=True)

        # remove output dirs for test if they exist
        dirs = ['bam/test', 'index/test', 'rpkm/test', 'stats/test']
        for sub_dir in dirs:
            if os.path.isdir(os.path.join(output_dir, sub_dir)):
                subprocess.run('rm -rf {0}'.format(os.path.join(output_dir, sub_dir)), shell=True)

        # rewrite metaG_paths.txt so that it lists relative paths (generalizable to any system)
        # create -i metaG_paths.txt_dir
        lines = [os.path.join(tests_dir, 'metaGs', 'dir_01'), os.path.join(tests_dir, 'metaGs', 'dir_02')]
        with open(os.path.join(tests_dir, 'metaG_paths.txt'), 'w') as f:
            f.write("\n".join(lines))
            f.close()

        rrap_cmd = ['rrap', 
                    '-i',
                    os.path.join(tests_dir, 'metaG_paths.txt'), 
                    '-rg', 
                    os.path.join(tests_dir, 'reference'), 
                    '-o', 
                    os.path.join(tests_dir, 'output'), 
                    '-n',
                    'test',  
                    '-suffix', 
                    '_toy_R1.fastq']

        print("testing RRAP with the following command: \n{0}\n".format(" ".join(rrap_cmd)))
        # run through entire rrap pipeline
        with patch("sys.argv", rrap_cmd):
            controller.main()
        print("\n")

    def test_concatenating_files(self):
        # use relative paths
        bin_dir = os.path.dirname(__file__)
        tests_dir = os.path.join(bin_dir, "..", 'tests')

        val_concat = os.path.join(tests_dir, "output", "allgenomes_cat_val.fna")
        test_concat = os.path.join(tests_dir, "output", "allgenomes_cat_test.fna")
        self.assertTrue(os.path.isfile(test_concat))
        self.assertTrue(filecmp.cmp(val_concat, test_concat, shallow=False))
    
    def test_indexing_reference_genomes(self):
        # use relative paths
        bin_dir = os.path.dirname(__file__)
        index_dir = os.path.join(bin_dir, "..", 'tests', 'output', 'index')

        # make sure all index files exist
        extensions = [".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"]
        for extension in extensions:
            self.assertTrue(os.path.isfile(os.path.join(index_dir, "test", "test{0}".format(extension))))

        # compare one of the index files to make sure it matches the expected output
        val_index = os.path.join(index_dir, "val", "val.1.bt2")
        test_index = os.path.join(index_dir, "test", "test.1.bt2")
        self.assertTrue(filecmp.cmp(val_index, test_index, shallow=False))
    
    def test_read_recruitment(self):
        # use relative paths
        bin_dir = os.path.dirname(__file__)
        bam_test_dir = os.path.join(bin_dir, "..", 'tests', 'output', 'bam', 'test')
        files = ['ERR864077.bam', 'SRR11803378.bam', 'ERR864073.bam', 
                 'ERR864077.bam.bai', 'SRR11803378.bam.bai', 
                 'ERR864073.bam.bai']
        
        # compare files
        for file in files:
            self.assertTrue(os.path.isfile(os.path.join(bam_test_dir, file)))

    def test_data_transform(self):
        # use relative paths
        bin_dir = os.path.dirname(__file__)
        stats_val_dir = os.path.join(bin_dir, "..", 'tests', 'output', 'stats', 'val')
        stats_test_dir = os.path.join(bin_dir, "..", 'tests', 'output', 'stats', 'test')
        files = ['ERR864077.bam.stats', 'SRR11803378.bam.stats', 'ERR864073.bam.stats']
        
        # compare files
        for file in files:
            self.assertTrue(filecmp.cmp(os.path.join(stats_val_dir, file), 
                                        os.path.join(stats_test_dir, file), 
                                        shallow=False))
        
    def test_rpkm_data(self):
        # use relative paths
        bin_dir = os.path.dirname(__file__)
        rpkm_val_dir = os.path.join(bin_dir, "..", 'tests', 'output', 'rpkm', 'val')
        rpkm_test_dir = os.path.join(bin_dir, "..", 'tests', 'output', 'rpkm', 'test')
        val_file = 'val_rpkm_noLog.csv'
        test_file = 'test_rpkm_noLog.csv'

        val_csv = pd.read_csv(os.path.join(rpkm_val_dir, val_file))
        test_csv = pd.read_csv(os.path.join(rpkm_test_dir, test_file))

        self.assertTrue(val_csv.equals(test_csv))



if __name__ == "__main__":
    unittest.main(verbosity=2)
