"""
Known-optimal benchmark workflow.

Certified optimal makespan: 14s (verified by time-indexed MILP in compute_optimal.py)
Critical path lower bound:  12s (root+heavy_A+reduce_A+merge+final)
Resource lower bound:        9s (ceil(68/8))
True optimal = 14s (resource contention adds 2s above critical path)

Structure:
  root(1c,1s) ──► heavy_A(4c,5s) ──► reduce_A(2c,3s) ──┐
               ├─► heavy_B(4c,5s) ──► reduce_B(2c,3s) ──┤
               ├─► medium_C(2c,3s) ──► light_C(1c,2s) ──┼─► merge(2c,2s) ──► final(1c,1s)
               └─► qc(1c,2s) ─────────────────────────────┘

Key scheduling insight:
  heavy_A and heavy_B must both start at t=1 (full 8 cores) for optimal.
  medium_C and qc are delayed until t=6 by resource contention.
  Any ordering that delays a heavy job adds to makespan.
"""

rule all:
    input: "results/final.txt"

rule root:
    output: "results/root.txt"
    benchmark: "benchmarks/root.txt"
    threads: 1
    resources: mem_mb=512, runtime=1
    shell: "sleep 1 && echo root > {output}"

rule heavy_A:
    input: "results/root.txt"
    output: "results/heavy_A.txt"
    benchmark: "benchmarks/heavy_A.txt"
    threads: 4
    resources: mem_mb=8192, runtime=5
    shell: "sleep 5 && echo heavy_A > {output}"

rule heavy_B:
    input: "results/root.txt"
    output: "results/heavy_B.txt"
    benchmark: "benchmarks/heavy_B.txt"
    threads: 4
    resources: mem_mb=8192, runtime=5
    shell: "sleep 5 && echo heavy_B > {output}"

rule medium_C:
    input: "results/root.txt"
    output: "results/medium_C.txt"
    benchmark: "benchmarks/medium_C.txt"
    threads: 2
    resources: mem_mb=4096, runtime=3
    shell: "sleep 3 && echo medium_C > {output}"

rule qc:
    input: "results/root.txt"
    output: "results/qc.txt"
    benchmark: "benchmarks/qc.txt"
    threads: 1
    resources: mem_mb=1024, runtime=2
    shell: "sleep 2 && echo qc > {output}"

rule reduce_A:
    input: "results/heavy_A.txt"
    output: "results/reduce_A.txt"
    benchmark: "benchmarks/reduce_A.txt"
    threads: 2
    resources: mem_mb=4096, runtime=3
    shell: "sleep 3 && echo reduce_A > {output}"

rule reduce_B:
    input: "results/heavy_B.txt"
    output: "results/reduce_B.txt"
    benchmark: "benchmarks/reduce_B.txt"
    threads: 2
    resources: mem_mb=4096, runtime=3
    shell: "sleep 3 && echo reduce_B > {output}"

rule light_C:
    input: "results/medium_C.txt"
    output: "results/light_C.txt"
    benchmark: "benchmarks/light_C.txt"
    threads: 1
    resources: mem_mb=1024, runtime=2
    shell: "sleep 2 && echo light_C > {output}"

rule merge:
    input:
        "results/reduce_A.txt",
        "results/reduce_B.txt",
        "results/light_C.txt",
        "results/qc.txt"
    output: "results/merge.txt"
    benchmark: "benchmarks/merge.txt"
    threads: 2
    resources: mem_mb=4096, runtime=2
    shell: "sleep 2 && cat {input} > {output}"

rule final:
    input: "results/merge.txt"
    output: "results/final.txt"
    benchmark: "benchmarks/final.txt"
    threads: 1
    resources: mem_mb=512, runtime=1
    shell: "sleep 1 && echo done > {output}"
