Source code for pyllelic.__main__

#!/usr/bin/env python3
"""pyllelic: module level interface to run pyllelic from the command line.

Example usage:

    python -m pyllelic -o my_data -f fh_cellline_tissue.fastq.gz -g hg19chr5 \
                        -chr chr5 -s 1293000 -e 1296000 --viz plotly

This command would save pyllelic results in files with the prefix `my_data`, analyzing
the specified fastq file using the specified reference genome, in the genomic region
indicated.
"""

import argparse
from pathlib import Path
from typing import List

from pyllelic import process, pyllelic
from pyllelic.config import Config


def _parsing() -> argparse.Namespace:
    """Parse command line arguments.

    Returns:
        argparse.Namespace: parsed arguments.
    """

    parser: argparse.ArgumentParser = argparse.ArgumentParser(
        description="run pyllelic on bisulfite sequencing fastq files",
        prog="python -m pyllelic",
    )

    parser.add_argument("-o", "--output_fname", type=str, required=True)
    parser.add_argument("-f", "--fastq", type=str, required=True)
    parser.add_argument("-g", "--genome", type=str, required=True)
    parser.add_argument("-chr", "--chrom", type=str, required=True)
    parser.add_argument("-s", "--start", type=int, required=True)
    parser.add_argument("-e", "--end", type=int, required=True)
    parser.add_argument("--viz", type=str, default="plotly")
    parser.add_argument(
        "--fname_pattern", type=str, default="^[a-zA-Z]+_([a-zA-Z0-9]+)_.+bam$"
    )
    parser.add_argument("--testdir", type=str, default="test")

    args: argparse.Namespace = parser.parse_args()
    return args


def _process_files(args: argparse.Namespace) -> None:
    """Process fastq and genome files using Bismark.

    Args:
        args (argparse.Namespace): parsed args
    """

    process.retrieve_seq("genome.txt", chrom=args.chrom, start=args.start, end=args.end)
    genome: Path = Path.cwd() / args.genome
    fastq: Path = Path.cwd() / args.fastq
    process.prepare_genome(genome)
    process.bismark(genome, fastq)

    bamfile: Path = Path.cwd() / (Path(args.fastq).stem + ".bam")
    process.sort_bam(bamfile)
    process.index_bam(Path(bamfile.parent) / (bamfile.stem + "_sorted.bam"))


def _call_pyllelic(args: argparse.Namespace) -> pyllelic.GenomicPositionData:
    """Run pyllelic data analysis.

    Args:
        args (argparse.Namespace): parsed args

    Returns:
        GenomicPositionData: pyllelic data object
    """

    fname_pattern: str = rf"{args.fname_pattern}"

    config: Config = pyllelic.configure(
        base_path=str(Path.cwd()),
        prom_file="genome.txt",
        prom_start=args.start,
        prom_end=args.end,
        chrom=args.chrom,
        offset=args.start,
        viz_backend=args.viz,
        fname_pattern=fname_pattern,
        test_dir=args.testdir,
    )

    files_set: List[str] = pyllelic.make_list_of_bam_files(config)

    data: pyllelic.GenomicPositionData = pyllelic.pyllelic(
        config=config, files_set=files_set
    )
    return data


[docs]def run_pyllelic() -> None: """Run all processing and analysis steps of pyllelic.""" args: argparse.Namespace = _parsing() print("Preparing genome and processing fastq file... this will take a while.") _process_files(args) print("Running pyllelic...") data: pyllelic.GenomicPositionData = _call_pyllelic(args) print("Data processed, saving output files...") data.save_pickle(args.output_fname + ".pickle") data.save(args.output_fname + ".xlsx") print("Pyllelic run complete")
# Run the whole process if __name__ == "__main__": # pragma: no cover run_pyllelic()