# CRISPR gRNA Design Tool - Snakemake Workflow
# Drop-in replacement for CRISPRDesigner
# Targets hg38 and mm10 genomes
# Uses RS3 for on-target scoring and GuideScan2 for off-target scoring

import os

configfile: "config/config.yaml"

# Output directory from config
OUTPUT_DIR = config["output_dir"]
INTERMEDIATE_DIR = os.path.join(OUTPUT_DIR, "intermediate")

# Check if GuideScan2 index is available
GUIDESCAN_INDEX = config.get("guidescan2_index", "")
USE_GUIDESCAN = GUIDESCAN_INDEX and os.path.exists(GUIDESCAN_INDEX)


rule all:
    input:
        os.path.join(OUTPUT_DIR, "designGuides.txt"),
        os.path.join(OUTPUT_DIR, "designGuides.bed")


rule extract_and_filter_guides:
    """
    Extract all candidate gRNA spacers from input regions and apply
    sequence quality filters.

    Combines Tasks 2 (guide extraction) and 3 (sequence filtering).
    """
    input:
        regions=config["regions"],
        genome_fasta=config["genome_fasta"]
    output:
        guides=os.path.join(INTERMEDIATE_DIR, "filtered_guides.tsv")
    params:
        predesigned=config.get("predesigned_guides", "")
    log:
        os.path.join("logs", "extract_and_filter_guides.log")
    script:
        "workflow/scripts/snakemake_extract_filter.py"


rule score_ontarget:
    """
    Score guides for on-target efficiency using RS3 model.

    Task 4: RS3 on-target scoring.
    Uses the official gpp-rnd/rs3 library which has the model built in.
    """
    input:
        guides=os.path.join(INTERMEDIATE_DIR, "filtered_guides.tsv")
    output:
        scored=os.path.join(INTERMEDIATE_DIR, "ontarget_scored.tsv")
    params:
        tracr=config.get("tracr", "Hsu2013"),
        n_jobs=config.get("rs3_n_jobs", 1)
    log:
        os.path.join("logs", "score_ontarget.log")
    script:
        "workflow/scripts/snakemake_score_ontarget.py"


if USE_GUIDESCAN:
    rule score_offtarget:
        """
        Score guides for off-target specificity using GuideScan2.

        Task 5: GuideScan2 off-target scoring.
        """
        input:
            guides=os.path.join(INTERMEDIATE_DIR, "ontarget_scored.tsv"),
            index=config["guidescan2_index"]
        output:
            scored=os.path.join(INTERMEDIATE_DIR, "fully_scored.tsv")
        params:
            genome=config["genome"]
        log:
            os.path.join("logs", "score_offtarget.log")
        script:
            "workflow/scripts/snakemake_score_offtarget.py"

    FINAL_SCORED_FILE = os.path.join(INTERMEDIATE_DIR, "fully_scored.tsv")
else:
    # Skip off-target scoring if GuideScan2 index is not available
    FINAL_SCORED_FILE = os.path.join(INTERMEDIATE_DIR, "ontarget_scored.tsv")


rule write_outputs:
    """
    Write final output files: designGuides.txt and designGuides.bed.

    Task 6: Output writing.
    Also handles merging with pre-designed guides (Task 7).
    """
    input:
        guides=FINAL_SCORED_FILE
    output:
        txt=os.path.join(OUTPUT_DIR, "designGuides.txt"),
        bed=os.path.join(OUTPUT_DIR, "designGuides.bed")
    params:
        predesigned=config.get("predesigned_guides", ""),
        output_dir=OUTPUT_DIR
    log:
        os.path.join("logs", "write_outputs.log")
    script:
        "workflow/scripts/snakemake_write_outputs.py"
