# Benchmark workflow: 20 tasks with diamond DAG, varying resources
# Designed to show scheduling differences under resource contention
#
# DAG structure (3 parallel chains with cross-dependencies):
#
#   source
#   /  |  \
#  A1  B1  C1       (3 parallel heavy tasks)
#  |   |   |
#  A2  B2  C2       (3 parallel medium tasks)
#  | X |   |        (A2->D1, B2->D1: join)
#  D1  |   C3
#  |   B3  |
#  D2  |   C4
#  \   |   /
#   merge1          (join from D2, B3, C4)
#   /    \
#  post1  post2     (parallel post-processing)
#   \    /
#   final
#
# With 8 cores total and tasks requiring 2-4 cores each,
# ordering determines how much parallelism is achieved.

import time, os

# Write timestamp on start for makespan measurement
onstart:
    with open("benchmark_start.txt", "w") as f:
        f.write(str(time.time()))

onsuccess:
    with open("benchmark_end.txt", "w") as f:
        f.write(str(time.time()))

rule all:
    input: "results/final.txt"

rule source:
    output: "results/source.txt"
    threads: 1
    resources: mem_mb=512
    shell: "sleep 1 && echo 'source' > {output}"

# Chain A: heavy compute path (critical path candidate)
rule A1:
    input: "results/source.txt"
    output: "results/A1.txt"
    threads: 4
    resources: mem_mb=4096
    shell: "sleep 3 && echo 'A1' > {output}"

rule A2:
    input: "results/A1.txt"
    output: "results/A2.txt"
    threads: 2
    resources: mem_mb=2048
    shell: "sleep 2 && echo 'A2' > {output}"

# Chain B: medium compute path
rule B1:
    input: "results/source.txt"
    output: "results/B1.txt"
    threads: 3
    resources: mem_mb=3072
    shell: "sleep 2 && echo 'B1' > {output}"

rule B2:
    input: "results/B1.txt"
    output: "results/B2.txt"
    threads: 2
    resources: mem_mb=2048
    shell: "sleep 2 && echo 'B2' > {output}"

rule B3:
    input: "results/B2.txt"
    output: "results/B3.txt"
    threads: 2
    resources: mem_mb=1024
    shell: "sleep 1 && echo 'B3' > {output}"

# Chain C: light but long path
rule C1:
    input: "results/source.txt"
    output: "results/C1.txt"
    threads: 2
    resources: mem_mb=1024
    shell: "sleep 2 && echo 'C1' > {output}"

rule C2:
    input: "results/C1.txt"
    output: "results/C2.txt"
    threads: 1
    resources: mem_mb=512
    shell: "sleep 1 && echo 'C2' > {output}"

rule C3:
    input: "results/C2.txt"
    output: "results/C3.txt"
    threads: 2
    resources: mem_mb=1024
    shell: "sleep 2 && echo 'C3' > {output}"

rule C4:
    input: "results/C3.txt"
    output: "results/C4.txt"
    threads: 1
    resources: mem_mb=512
    shell: "sleep 1 && echo 'C4' > {output}"

# Diamond join: D depends on A2 and B2
rule D1:
    input: "results/A2.txt", "results/B2.txt"
    output: "results/D1.txt"
    threads: 3
    resources: mem_mb=3072
    shell: "sleep 2 && echo 'D1' > {output}"

rule D2:
    input: "results/D1.txt"
    output: "results/D2.txt"
    threads: 2
    resources: mem_mb=2048
    shell: "sleep 2 && echo 'D2' > {output}"

# Merge: depends on all three chains
rule merge1:
    input: "results/D2.txt", "results/B3.txt", "results/C4.txt"
    output: "results/merge1.txt"
    threads: 4
    resources: mem_mb=4096
    shell: "sleep 3 && echo 'merge1' > {output}"

# Post-processing: parallel after merge
rule post1:
    input: "results/merge1.txt"
    output: "results/post1.txt"
    threads: 3
    resources: mem_mb=2048
    shell: "sleep 2 && echo 'post1' > {output}"

rule post2:
    input: "results/merge1.txt"
    output: "results/post2.txt"
    threads: 3
    resources: mem_mb=2048
    shell: "sleep 2 && echo 'post2' > {output}"

# Final
rule final:
    input: "results/post1.txt", "results/post2.txt"
    output: "results/final.txt"
    threads: 1
    resources: mem_mb=512
    shell: "sleep 1 && echo 'final' > {output}"
