#!/usr/bin/env python3
#
#     Copyright (C) 2017–2026, Jose Manuel Martí Martínez
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU Affero General Public License as
#     published by the Free Software Foundation, either version 3 of the
#     License, or (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#     GNU Affero General Public License for more details.
#
#     You should have received a copy of the GNU Affero General Public License
#     along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
Get needed taxdump files from NCBI.
"""

import argparse
import sys
import time
import urllib.request

from ftplib import FTP, error_temp, error_proto, error_reply
from zipfile import ZipFile

from recentrifuge import __version__, __author__, __date__
from recentrifuge.config import gray, red, green, yellow
from recentrifuge.config import LICENSE, NODES_FILE, NAMES_FILE, ZIPFILE
from recentrifuge.config import TAXDUMP_PATH

RETRY_TIMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 30, 60, 120, 300]
EXCEPTS = (OSError, EOFError, error_temp, error_proto, error_reply)
FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
PATH_NCBI = '/pub/taxonomy/'
CONN_TIMEOUT = 30
BUFF_SIZE = 2**20


def main():
    """Main entry point to script."""
    # Argument Parser Configuration
    parser = argparse.ArgumentParser(
        description='Get needed taxdump files from NCBI servers',
        epilog=f'%(prog)s  - Release {__version__} - {__date__}' + LICENSE,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        '-V', '--version',
        action='version',
        version=f'%(prog)s release {__version__} ({__date__})'
    )
    parser.add_argument(
        '-n', '--nodespath',
        action='store',
        metavar='PATH',
        default=TAXDUMP_PATH,
        help=('path for the nodes information files (nodes.dmp and names.dmp' +
              ' from NCBI)')
    )

    # Parse arguments
    args = parser.parse_args()

    # Program header
    print(f'\n=-= {sys.argv[0]} =-= v{__version__} - {__date__}'
          f' =-= by {__author__} =-=\n')
    sys.stdout.flush()

    # Load NCBI nodes, names and build children
    print(f'\033[90mDownloading {ZIPFILE} from NCBI FTP...\033[0m', end='')
    sys.stdout.flush()
    ftp = None
    error = None
    for retry_time in RETRY_TIMES:
        if retry_time:
            print(gray(' Retrying in %s seconds...' % retry_time),
                  end='', flush=True)
        time.sleep(retry_time)
        try:
            ftp = FTP(FTP_SERVER, timeout=30)
            ftp.login()
            ftp.cwd(PATH_NCBI)
            ftp.retrbinary('RETR ' + ZIPFILE, open(ZIPFILE, 'wb').write)
        except EXCEPTS as err:
            print(yellow(' PROBLEM!'))
            error = err
            print(gray(' Retrying with HTTPS...'), end='', flush=True)
            try:
                url = 'https://' + FTP_SERVER + PATH_NCBI + ZIPFILE
                https = urllib.request.urlopen(url, timeout=30)
                zipfile = open(ZIPFILE, 'wb')
                while True:
                    buff = https.read(BUFF_SIZE)
                    if not buff:
                        break
                    zipfile.write(buff)
            except EXCEPTS as err:
                print(yellow(' PROBLEM!'))
                error = err
            except KeyboardInterrupt:
                print(gray(' User'), yellow('interrupted!'))
                exit(9)
            else:
                break
        except KeyboardInterrupt:
            print(gray(' User'), yellow('interrupted!'))
            exit(9)
        else:
            break
    else:  # Too many problems, quit
        print(red(' FAILED!'),
              gray('Exceeded number of attempts!'))
        print(gray('Error message:'), error)
        exit(5)
    print(green(' OK!'))
    if ftp:
        try:
            ftp.quit()
        except EXCEPTS:
            try:
                ftp.close()
            except EXCEPTS:
                pass
    filezip = ZipFile(ZIPFILE)
    for filename in [NODES_FILE, NAMES_FILE]:
        print(gray(f'Extracting {filename}... '), end='')
        try:
            filezip.extract(filename, path=args.nodespath)
        except KeyError:
            print(red('ERROR!'))
        else:
            print(green('OK!'))


if __name__ == '__main__':
    main()
