# examples/rtpe/Makefile — Self-contained RTPE pipeline
#
# Usage:
#   make GW231226_101520          # full pipeline for one event
#   make all                      # process default event list
#   make download                 # download all shared data (bank + coinc)
#   make clean                    # remove outputs (keep downloads)
#   make distclean                # remove everything including downloads
#
# Override any variable on the command line, e.g.:
#   make GW231226_101520 PARALLEL=16 N_POSTERIOR=50000

SHELL := /bin/bash
.DELETE_ON_ERROR:

# ──────────────────────────────────────────────────────────────────────
#  User-tunable parameters
# ──────────────────────────────────────────────────────────────────────
PARALLEL           ?= 32
N_POSTERIOR         ?= 100000
BUDGET_MULTIPLIER   ?= 1
N_SKY_DRAW          ?= 1
NEAREST             ?= 5000
FLOW                ?= 10.0
MAX_DURATION        ?= 128
PSD_FFT_LENGTH      ?= 8
WHITEN_SAMPLE_RATE  ?= 2048
ALGORITHM           ?= rectangle
DEFAULT_EVENTS      ?= GW231226_101520

# ──────────────────────────────────────────────────────────────────────
#  Data URLs & catalogs
# ──────────────────────────────────────────────────────────────────────
BANK_URL    := https://dcc.ligo.org/public/0184/T2200343/003/H1L1V1-O4_MANIFOLD_BANK-0-2000000000.h5
COINC_URL   := https://zenodo.org/api/records/16053641/files/IGWN-GWTC4p0-0f954158d_720-Archived_SearchResults.tar.gz/content
COINC_PIPE  ?= gstlal
GWOSC_API   := https://gwosc.org/eventapi/json
GWOSC_SEG   := https://gwosc.org/timeline/segments/json
SEGMENTS_FLAG ?= DATA
CATALOGS    ?= GWTC-4.0 GWTC-3-confident GWTC-2.1-confident GWTC-1-confident O4_Discovery_Papers

# ──────────────────────────────────────────────────────────────────────
#  Shared data paths
# ──────────────────────────────────────────────────────────────────────
DATA        := data
BANK        := $(DATA)/bank.h5
COINC_DIR   := $(DATA)/coinc
COINC_STAMP := $(COINC_DIR)/.extracted

# ──────────────────────────────────────────────────────────────────────
#  Python helper — extracts params.mk from GWOSC event JSON
#
#  Uses %-formatting only (no f-strings / ${} / $) so that make's
#  define-export mechanism passes it to the shell safely.
# ──────────────────────────────────────────────────────────────────────
define EXTRACT_PARAMS
import json, sys, re

event = sys.argv[1]
with open(sys.argv[2]) as f:
    data = json.load(f)

# Find event (keys may have version suffix like GW231226_101520-v1)
ev = None
for key, val in data.get("events", {}).items():
    cn = val.get("commonName", "").replace(" ", "_")
    if cn == event or key.startswith(event):
        ev = val
        break

if ev is None:
    sys.exit("Cannot find %s in JSON" % event)

gps = ev["GPS"]

# Prefer 4096 Hz gwf files; fall back to 16384 Hz
strain = [s for s in ev.get("strain", [])
          if s.get("format") == "gwf" and s.get("sampling_rate") == 4096]
if not strain:
    strain = [s for s in ev.get("strain", [])
              if s.get("format") == "gwf" and s.get("sampling_rate") == 16384]
if not strain:
    sys.exit("No gwf strain files found for %s" % event)

sr = strain[0]["sampling_rate"]

# One file per detector — longest duration
best = {}
for s in strain:
    d = s["detector"]
    if d not in best or s["duration"] > best[d]["duration"]:
        best[d] = s

dets = sorted(best)

# GPS window: 1200 s before, 500 s after event, clamped to frame bounds
gps_start = max(int(gps) - 1200,
                max(best[d]["GPSstart"] for d in dets))
gps_end = min(int(gps) + 500,
              min(best[d]["GPSstart"] + best[d]["duration"] for d in dets))

# Channel suffix from sample rate
if sr == 4096:
    chan = "GWOSC-4KHZ_R1_STRAIN"
elif sr == 16384:
    chan = "GWOSC-16KHZ_R1_STRAIN"
else:
    chan = "GWOSC-%dHZ_R1_STRAIN" % sr

# Frame description name from URL
# e.g. .../H-H1_GWOSC_O4b3Disc_4KHZ_R1-1420877824-4096.gwf
#       -> H1_GWOSC_O4b3Disc_4KHZ_R1
def frame_name(url):
    fn = url.rsplit("/", 1)[-1]
    m = re.match(r"[A-Z]-([^-]+)-(\d+)-(\d+)\.gwf", fn)
    return m.group(1) if m else fn.replace(".gwf", "")

lines = []
lines.append("EVENT_GPS         := %s" % gps)
lines.append("DETECTORS         := %s" % " ".join(dets))
lines.append("INPUT_SAMPLE_RATE := %d" % sr)
lines.append("GPS_START         := %d" % gps_start)
lines.append("GPS_END           := %d" % gps_end)
lines.append("CHANNEL_SUFFIX    := %s" % chan)

for d in dets:
    s = best[d]
    lines.append("STRAIN_URL_%s   := %s" % (d, s["url"]))
    lines.append("FRAME_START_%s  := %d" % (d, s["GPSstart"]))
    lines.append("FRAME_DUR_%s    := %d" % (d, s["duration"]))
    lines.append("FRAME_NAME_%s   := %s" % (d, frame_name(s["url"])))
    # Pre-built cache line (IFO letter, name, start, duration)
    lines.append("CACHE_LINE_%s   := %s %s %d %d" % (
        d, d[0], frame_name(s["url"]), s["GPSstart"], s["duration"]))
    # Dataset name for GWOSC segments API
    # e.g. H1_GWOSC_O4a_4KHZ_R1 -> O4a_4KHZ_R1 (strip IFO_GWOSC_ prefix)
    fn = frame_name(s["url"])
    prefix = d + "_GWOSC_"
    dataset = fn[len(prefix):] if fn.startswith(prefix) else fn[len(d) + 1:]
    lines.append("DATASET_%s      := %s" % (d, dataset))

with open(sys.argv[3], "w") as f:
    f.write("\n".join(lines) + "\n")

sys.stderr.write("  params: GPS=%s  dets=%s  sr=%d  window=[%d,%d]\n" % (
    gps, dets, sr, gps_start, gps_end))
endef
export EXTRACT_PARAMS

# ──────────────────────────────────────────────────────────────────────
#  Python helper — download GWOSC segments and write LIGO_LW XML
#
#  Usage: python3 -c "$MAKE_SEGMENTS" <seg_api> <dataset> <flag> \
#             <det1> <start1> <dur1> [<det2> <start2> <dur2> ...] <out.xml>
# ──────────────────────────────────────────────────────────────────────
define MAKE_SEGMENTS
import json, sys, urllib.request
from igwn_segments import segment, segmentlist
from igwn_ligolw import ligolw, lsctables, utils as ligolw_utils

seg_api = sys.argv[1]
dataset = sys.argv[2]
flag    = sys.argv[3]

args = sys.argv[4:]
outfile = args[-1]
det_args = args[:-1]

detectors = []
for i in range(0, len(det_args), 3):
    detectors.append((det_args[i], int(det_args[i+1]), int(det_args[i+2])))

# Download segments for each detector independently (no intersection).
# Each detector gets its own segment definer so the pipeline can gate
# each IFO according to its own data-quality timeline.
per_det_segs = {}
for det, fstart, fdur in detectors:
    timeline = "%s_%s" % (det, flag)
    url = "%s/%s/%s/%d/%d/" % (seg_api, dataset, timeline, fstart, fdur)
    sys.stderr.write("  segments: fetching %s\n" % url)
    try:
        resp = urllib.request.urlopen(url)
        data = json.loads(resp.read())
        segs = segmentlist(segment(s, e) for s, e in data.get("segments", []))
    except Exception as e:
        sys.stderr.write("  WARNING: segments fetch failed: %s\n" % e)
        segs = segmentlist([segment(fstart, fstart + fdur)])
    segs.coalesce()
    per_det_segs[det] = segs
    sys.stderr.write("  segments: %s: %d segments, total %.0f s\n" % (
        det, len(segs), abs(segs)))

if not any(per_det_segs.values()):
    sys.exit("ERROR: no valid segments found for any detector")

# Build LIGO_LW XML document
xmldoc = ligolw.Document()
xmldoc.appendChild(ligolw.LIGO_LW())

process_table = lsctables.New(lsctables.ProcessTable)
xmldoc.childNodes[0].appendChild(process_table)

seg_def_table = lsctables.New(lsctables.SegmentDefTable)
xmldoc.childNodes[0].appendChild(seg_def_table)

seg_table = lsctables.New(lsctables.SegmentTable)
xmldoc.childNodes[0].appendChild(seg_table)

# One segment definer per detector, each with its own segments
seg_id = 0
for def_id, (det, segs) in enumerate(sorted(per_det_segs.items())):
    seg_def = seg_def_table.RowType()
    seg_def.process_id = 0
    seg_def.segment_def_id = def_id
    seg_def.ifos = det
    seg_def.name = flag
    seg_def.version = 1
    seg_def.comment = "GWOSC %s segments" % dataset
    seg_def.creator_db = 0
    seg_def.insertion_time = None
    seg_def_table.append(seg_def)

    for s in segs:
        row = seg_table.RowType()
        row.process_id = 0
        row.segment_id = seg_id
        row.segment_def_id = def_id
        row.segment_def_cdb = 0
        row.creator_db = 0
        row.start_time = int(s[0])
        row.start_time_ns = 0
        row.end_time = int(s[1])
        row.end_time_ns = 0
        seg_table.append(row)
        seg_id += 1

ligolw_utils.write_filename(xmldoc, outfile)
sys.stderr.write("  segments: wrote %s\n" % outfile)
endef
export MAKE_SEGMENTS

# ======================================================================
#  Shared rules (available in both top-level and sub-make)
# ======================================================================

# Template bank (downloaded once, ~250 MB)
$(BANK):
	@mkdir -p $(@D)
	curl -fSL -o $@.tmp '$(BANK_URL)'
	mv $@.tmp $@

# Coinc archive from Zenodo (downloaded once, ~1 GB)
$(COINC_STAMP):
	@mkdir -p $(COINC_DIR)
	curl -fSL '$(COINC_URL)' | tar -xz -C $(COINC_DIR)
	touch $@

# GWOSC event metadata JSON (per event, tiny)
$(DATA)/meta/%.json:
	@mkdir -p $(@D)
	@for catalog in $(CATALOGS); do \
		url="$(GWOSC_API)/$$catalog/$*/"; \
		echo "  trying $$url ..."; \
		if curl -sfSL -o $@.tmp "$$url"; then \
			echo "  -> found $* in $$catalog"; \
			mv $@.tmp $@; \
			exit 0; \
		fi; \
	done; \
	echo "ERROR: event $* not found in catalogs: $(CATALOGS)" >&2; \
	rm -f $@.tmp; exit 1

# Per-event params.mk (extracted from GWOSC JSON)
%/params.mk: $(DATA)/meta/%.json
	@mkdir -p $(@D)
	@python3 -c "$$EXTRACT_PARAMS" "$*" "$<" "$@"

# ======================================================================
#  Sub-make mode: EVENT is set, per-event targets are defined
# ======================================================================
ifdef EVENT

include $(EVENT)/params.mk

STRAIN_FILES := $(foreach d,$(DETECTORS),$(EVENT)/strain/$(d).gwf)
CHAN_ARGS     := $(foreach d,$(DETECTORS),--channel-name $(d)=$(CHANNEL_SUFFIX))

# ── Per-detector strain download ─────────────────────────────────────
$(EVENT)/strain/%.gwf:
	@mkdir -p $(@D)
	curl -fSL -o $@.tmp '$(STRAIN_URL_$*)'
	mv $@.tmp $@

# ── Frame cache ──────────────────────────────────────────────────────
$(EVENT)/frame.cache: $(STRAIN_FILES)
	@rm -f $@
	@$(foreach d,$(DETECTORS),echo '$(CACHE_LINE_$(d)) file://localhost$(CURDIR)/$(EVENT)/strain/$(d).gwf' >> $@;)
	@echo "  frame.cache:" && cat $@

# ── Segments XML (from GWOSC timeline API) ───────────────────────────
$(EVENT)/segments.xml: $(EVENT)/params.mk
	@python3 -c "$$MAKE_SEGMENTS" \
		"$(GWOSC_SEG)" "$(DATASET_$(firstword $(DETECTORS)))" "$(SEGMENTS_FLAG)" \
		$(foreach d,$(DETECTORS),$(d) $(FRAME_START_$(d)) $(FRAME_DUR_$(d))) \
		"$@"

# ── Run optimizer + posterior ────────────────────────────────────────
$(EVENT)/optimized_coinc.xml: $(EVENT)/frame.cache $(EVENT)/segments.xml $(BANK) $(COINC_STAMP)
	@coinc=$$(find $(COINC_DIR) -path "*/$(COINC_PIPE)/*" -name "*$(EVENT)*" \( -name "*.xml" -o -name "*.xml.gz" \) 2>/dev/null | head -1); \
	if [ -z "$$coinc" ]; then \
		coinc=$$(find $(COINC_DIR) -name "*$(EVENT)*" \( -name "*.xml" -o -name "*.xml.gz" \) 2>/dev/null | head -1); \
	fi; \
	if [ -z "$$coinc" ]; then \
		echo "ERROR: no coinc XML for $(EVENT) in $(COINC_DIR)" >&2; \
		echo "  (looked in $(COINC_DIR) for *$(EVENT)*.xml)" >&2; exit 1; \
	fi; \
	echo "  coinc: $$coinc"; \
	sgn-manifold-cbc-bank-snr-optimizer \
		--max-subdivision-depth 10 \
		--data-source frames \
		--frame-cache $(EVENT)/frame.cache \
		--segments-file $(EVENT)/segments.xml \
		--segments-name $(SEGMENTS_FLAG) \
		$(CHAN_ARGS) \
		--gps-start-time $(GPS_START) \
		--gps-end-time $(GPS_END) \
		--input-sample-rate $(INPUT_SAMPLE_RATE) \
		--whiten-sample-rate $(WHITEN_SAMPLE_RATE) \
		--psd-fft-length $(PSD_FFT_LENGTH) \
		--seed-bank $(BANK) \
		--flow $(FLOW) \
		--max-duration $(MAX_DURATION) \
		--algorithm $(ALGORITHM) \
		--nearest-templates $(NEAREST) \
		--parallel-chunks $(PARALLEL) \
		--max-output-samples $(N_POSTERIOR) \
		--posterior-budget-multiplier $(BUDGET_MULTIPLIER) \
		--n-sky-draw $(N_SKY_DRAW) \
		--posterior-output-dir $(EVENT) \
		--output-xml-file $@ \
		--verbose \
		--min-instruments 1 \
		--torch-rtpe-device cpu \
		--torch-rtpe-dtype float32 \
		"$$coinc"

# ======================================================================
#  Top-level mode: user-facing entry points
# ======================================================================
else

.PHONY: all download clean distclean

all: $(DEFAULT_EVENTS)

download: $(BANK) $(COINC_STAMP)

clean:
	rm -rf $(foreach e,$(DEFAULT_EVENTS),$(e)/)

distclean: clean
	rm -rf $(DATA)

# Event entry point — three-phase recursive make:
#   1. Download metadata & extract params.mk
#   2. Build per-event targets (strain, cache, optimizer) with params included
# FORCE ensures re-running after a partial/killed run (the GW* directory
# existing would otherwise make "make" think the target is up to date).
FORCE:
.PHONY: FORCE
GW%: FORCE $(BANK) $(COINC_STAMP)
	@echo "=== $@ ==="
	@$(MAKE) --no-print-directory $(DATA)/meta/$@.json
	@$(MAKE) --no-print-directory $@/params.mk
	@$(MAKE) --no-print-directory $@/optimized_coinc.xml EVENT=$@
	@echo "=== $@ done ==="

endif
