# This Makefile will be invoked by the python build system (e.g. via 'pip install'),
# but you can also build individual targets by invoking 'make' directly.

# Disable built-in rules and variables (must be first).
MAKEFLAGS += --no-builtin-rules 
MAKEFLAGS += --no-builtin-variables

# Default target 'all' must be first target in Makefile.
# The 'bin' target builds a bunch of binaries in bin/...
# The 'lib' target builds the C++ library lib/libpirate.so, and the python extension pirate_frb/pirate_pybind11...so.
# The 'build_wheel' and 'build_sdist' targets are invoked by 'pip' (or 'make all').
all: bin lib build_wheel build_sdist

.PHONY: all bin lib build_wheel build_sdist clean


####################################################################################################
#
# Variables encoding configuration: PYTHON, NVCC, NVCC_ARCH, NVCC_DEPFLAGS.
#
# FIXME some day I'll define a configure-script mechanism for setting these variables.
# For now, if you want to change the defaults, just edit the Makfile.

PYTHON ?= python3
NVCC ?= nvcc -std=c++17 -m64 -O3 --compiler-options -Wall,-fPIC,-march=x86-64-v3

# Extra nvcc flags needed to build Makefile dependencies
#   -MMD create dep file, omitting "system" headers
#   -MP add phony target for each header in dep file (makes error reporting less confusing)
# Note: we don't need "-MT $@", since we use in-tree object filenames (x.cu -> x.o).
# Note: we don't need "-MT $*.d", since we use in-tree depfile names (x.cu -> x.d).
NVCC_DEPFLAGS ?= -MMD -MP

# NVIDIA archictecture.
DEFAULT_NVCC_ARCH = -gencode arch=compute_80,code=sm_80
DEFAULT_NVCC_ARCH += -gencode arch=compute_86,code=sm_86
DEFAULT_NVCC_ARCH += -gencode arch=compute_89,code=sm_89
# DEFAULT_ARCH += -gencode arch=compute_90,code=sm_90
NVCC_ARCH ?= $(DEFAULT_NVCC_ARCH)


####################################################################################################
#
# "Derived" config variables: PYTHON_INCDIR, NUMPY_INCDIR, PYBIND11_INCDIR, PYEXT_SUFFIX, KSGPU_DIR.
#
# These are autogenerated by makefile_helper.py, and cached in makefile_helper.out.
# PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.


ifneq ($(MAKECMDGOALS),clean)
  include makefile_helper.out
endif

makefile_helper.out: makefile_helper.py Makefile
	$(PYTHON) makefile_helper.py


####################################################################################################


# The main output of the build process is these two libraries.
# Reminder: PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.
PIRATE_LIB := lib/libpirate.so
PIRATE_PYEXT := pirate_frb/pirate_pybind11$(PYEXT_SUFFIX)

# These get compiled into lib/libpirate.so.
LIB_SRCFILES = \
  src_lib/cpu_downsample.cu \
  src_lib/file_utils.cu \
  src_lib/gpu_downsample.cu \
  src_lib/gpu_transpose.cu \
  src_lib/system_utils.cu \
  src_lib/utils.cu \
  src_lib/DedispersionConfig.cu \
  src_lib/DedispersionPlan.cu \
  src_lib/Directory.cu \
  src_lib/Epoll.cu \
  src_lib/FakeCorrelator.cu \
  src_lib/FakeServer.cu \
  src_lib/File.cu \
  src_lib/GpuDedispersionKernel.cu \
  src_lib/GpuLaggedDownsamplingKernel.cu \
  src_lib/ReferenceDedisperser.cu \
  src_lib/ReferenceDedispersionKernel.cu \
  src_lib/ReferenceLagbuf.cu \
  src_lib/ReferenceLaggedDownsamplingKernel.cu \
  src_lib/ReferenceTree.cu \
  src_lib/Socket.cu \
  src_lib/UntypedArray.cu \
  src_lib/YamlFile.cu \
  src_lib/template_instantiations/dedisp_simple_float16.cu \
  src_lib/template_instantiations/dedisp_simple_float32.cu \
  src_lib/template_instantiations/dedisp_simple_nolag_float16.cu \
  src_lib/template_instantiations/dedisp_simple_nolag_float32.cu \
  src_lib/template_instantiations/dedisp_stage0_float16.cu \
  src_lib/template_instantiations/dedisp_stage0_float32.cu \
  src_lib/template_instantiations/dedisp_stage1_float16.cu \
  src_lib/template_instantiations/dedisp_stage1_float32.cu

# These get compiled into pirate_frb/pirate_pybind11....so.
PYEXT_SRCFILES = \
  src_pybind11/pirate_pybind11.cu

# Must list all python source files here.
# (Otherwise they won't show up in 'pip install' or pypi.)
PYFILES = \
  pirate_frb/__init__.py

# These are in 1-1 corresponding with executables in bin/
# For example, 'src_bin/fake_correlator.cu' gets compiled to 'bin/fake_correlator'.
BIN_SRCFILES = \
  src_bin/fake_correlator.cu \
  src_bin/fake_server.cu \
  src_bin/scratch.cu \
  src_bin/show_dedispersion_plan.cu \
  src_bin/test-avx256-m64-outbuf.cu \
  src_bin/test-cpu-downsampler.cu \
  src_bin/test-gpu-dedispersion-kernels.cu \
  src_bin/test-gpu-downsample.cu \
  src_bin/test-gpu-lagged-downsampler.cu \
  src_bin/test-gpu-reduce2.cu \
  src_bin/test-gpu-transpose.cu \
  src_bin/test-reference-dedisperser.cu \
  src_bin/test-reference-tree.cu \
  src_bin/time-cpu-downsample.cu \
  src_bin/time-gpu-dedispersion-kernels.cu \
  src_bin/time-gpu-downsample.cu \
  src_bin/time-gpu-lagged-downsampler.cu \
  src_bin/time-gpu-transpose.cu

# Must list all header files here.
# (Otherwise they won't show up in 'pip install' or pypi.)
HFILES = \
  include/pirate/constants.hpp \
  include/pirate/DedispersionConfig.hpp \
  include/pirate/DedispersionPlan.hpp \
  include/pirate/avx256/downsample.hpp \
  include/pirate/avx256/m64_outbuf.hpp \
  include/pirate/avx256/m128_outbuf.hpp \
  include/pirate/gpu/reduce2.hpp \
  include/pirate/gpu/DownsampleKernel.hpp \
  include/pirate/gpu/TransposeKernel.hpp \
  include/pirate/internals/bitvec.hpp \
  include/pirate/internals/cpu_downsample.hpp \
  include/pirate/internals/dedispersion_kernel_implementation.hpp \
  include/pirate/internals/dedispersion_inbufs.hpp \
  include/pirate/internals/dedispersion_outbufs.hpp \
  include/pirate/internals/gpu_downsample.hpp \
  include/pirate/internals/gpu_transpose.hpp \
  include/pirate/internals/file_utils.hpp \
  include/pirate/internals/inlines.hpp \
  include/pirate/internals/system_utils.hpp \
  include/pirate/internals/utils.hpp \
  include/pirate/internals/Directory.hpp \
  include/pirate/internals/Epoll.hpp \
  include/pirate/internals/FakeCorrelator.hpp \
  include/pirate/internals/FakeServer.hpp \
  include/pirate/internals/File.hpp \
  include/pirate/internals/GpuDedispersionKernel.hpp \
  include/pirate/internals/GpuLaggedDownsamplingKernel.hpp \
  include/pirate/internals/ReferenceDedisperser.hpp \
  include/pirate/internals/ReferenceDedispersionKernel.hpp \
  include/pirate/internals/ReferenceLagbuf.hpp \
  include/pirate/internals/ReferenceLaggedDownsamplingKernel.hpp \
  include/pirate/internals/ReferenceTree.hpp \
  include/pirate/internals/Socket.hpp \
  include/pirate/internals/UntypedArray.hpp \
  include/pirate/internals/YamlFile.hpp

# 'make clean' deletes {*~, *.o, *.d, *.so, *.pyc} from these dirs.
CLEAN_DIRS := . lib src_bin src_lib src_lib/template_instantiations pirate_frb/__pycache__ include include/pirate include/pirate/avx256 include/pirate/gpu include/pirate/internals

# Extra files to be deleted by 'make clean'.
# Note that 'pirate_frb/include' and 'pirate_frb/lib' are symlinks, so we put them in CLEAN_FILES, not CLEAN_RMDIRS
CLEAN_FILES := sdist_files.txt wheel_files.txt makefile_helper.out pirate_frb/include pirate_frb/lib

# Directories that should be empty at the end of 'make clean', and can be deleted.
CLEAN_RMDIRS := bin lib pirate_frb/__pycache__


####################################################################################################


LIB_OFILES := $(LIB_SRCFILES:%.cu=%.o)
PYEXT_OFILES := $(PYEXT_SRCFILES:%.cu=%.o)
BIN_XFILES := $(BIN_SRCFILES:src_bin/%.cu=bin/%)

# Must include all .d files, or build will break!
ALL_SRCFILES := $(LIB_SRCFILES) $(PYEXT_SRCFILES) $(BIN_SRCFILES)
DEPFILES := $(ALL_SRCFILES:%.cu=%.d)

SDIST_FILES := pyproject.toml Makefile makefile_helper.py
SDIST_FILES += $(PYFILES) $(ALL_SRCFILES) $(HFILES)

# Some symlinks for the wheel:
#  - header file include/%.hpp gets symlinked to pirate_frb/include/%.hpp
#  - library lib/libpirate.so gets symlinked to pirate_frb/lib/libpirate.so
#  - python extension pirate_frb/pirate_pybind11...so does not need to be symlinked/renamed.
WHEEL_FILES := $(PYFILES) $(PIRATE_PYEXT) pirate_frb/$(PIRATE_LIB)
WHEEL_FILES += $(HFILES:%=pirate_frb/%)

# Phony targets. The special targets 'build_wheel' and 'build_sdist' are needed by pip/pipmake.
lib: $(PIRATE_LIB) $(PIRATE_PYEXT)
bin: $(BIN_XFILES)
build_wheel: wheel_files.txt $(PIRATE_LIB) $(PIRATE_PYEXT)
build_sdist: sdist_files.txt

# Symlink {include,lib} into python directory 'pirate_frb'.
pirate_frb/include:
	ln -s ../include $@
pirate_frb/lib:
	ln -s ../lib $@

# Build object files in src_lib/, src_bin/, and src_lib/template_instantiations/ with default flags.
%.o: %.cu %.d
	$(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -c -o $@ $<

# Build object files in src_pybind11/ with special flags.
src_pybind11/%.o: src_pybind11/%.cu src_pybind11/%.d
	$(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -I$(PYTHON_INCDIR) -I$(NUMPY_INCDIR) -I$(PYBIND11_INCDIR) -c -o $@ $<

# Build the C++ library (lib/libpirate.so)
# We want it to automatically pull in the C++ library $(KSGPU_DIR)/lib/libkspgu.so.
#
# The python extension has been built correctly if 'objdump -x' shows the following:
#   NEEDED   libksgpu.so
#   RUNPATH  $(KSGPU_DIR)/lib    # where Makefile var $(KSGPU_DIR) is read from makefile_helper.out
#
# The quoting can be understood by working backwards as follows:
#  - g++ command line should look like:   g++ -Wl,-rpath="$(KSGPU_DIR)/lib"
#  - nvcc command line should look like:  nvcc -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'

$(PIRATE_LIB): $(LIB_OFILES)
	@mkdir -p lib
	$(NVCC) $(NVCC_ARCH) -shared -o $@ $^ -lksgpu -lyaml-cpp -L$(KSGPU_DIR)/lib -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'

# Build C++ binaries (bin/*)
# Link flags are similar to previous rule -- see comments above.
bin/%: src_bin/%.o $(PIRATE_LIB)
	@mkdir -p bin/
	$(NVCC) $(NVCC_ARCH) -o $@ $^ -lksgpu -lyaml-cpp -L$(KSGPU_DIR)/lib -Xcompiler '"-Wl,-rpath=$(KSGPU_DIR)/lib"'

# Build the python extension (pirate_frb/pirate_pybind11...so)
# We want it to automatically pull in the C++ library pirate_frb/lib/libpirate.so.
#
# The python extension has been built correctly if 'objdump -x' shows the following:
#   NEEDED   libpirate.so
#   RUNPATH  $ORIGIN/lib
#
# The quoting can be understood by working backwards as follows:
#  - g++ command line should look like:   g++ -Wl,-rpath="\$ORIGIN/lib"
#  - nvcc command line should look like:  nvcc -Xcompiler '"-Wl,-rpath=\\$ORIGIN/lib"'
#  - Makefile line should look like:      nvcc -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'
#
# Note that we don't link to libksgpu.so or ksgpu_pybind11...so in this step.
# These libraries end up getting imported as follows:
#
#  1. When 'pirate_frb' is imported, we do 'import ksgpu' (in pirate_frb/__init__.py)
#     before 'import pirate_pybind11'.
#
#  2. When 'ksgpu' is imported, we use the "ctypes trick" (see comment in ksgpu/__init__.py)
#     to load the libraries libksgpu.so and ksgpu_pybind11...so with globally visible symbols.

$(PIRATE_PYEXT): $(PYEXT_OFILES) $(PIRATE_LIB) pirate_frb/lib
	$(NVCC) $(NVCC_ARCH) -shared -o $@ $(PYEXT_OFILES) -lpirate -Lpirate_frb/lib -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'

# Needed by pip/pipmake: list of all files that go into the (non-editable) wheel.
wheel_files.txt: Makefile pirate_frb/include pirate_frb/lib
	rm -f $@
	for f in $(WHEEL_FILES); do echo $$f; done >>$@

# Needed by pip/pipmake: list of all files that go into the sdist.
sdist_files.txt: Makefile
	rm -f $@
	for f in $(SDIST_FILES); do echo $$f; done >>$@

clean:
	@for f in $(foreach d,$(CLEAN_DIRS),$(wildcard $d/*~ $d/*.o $d/*.d $d/*.so $d/*.pyc)); do echo rm $$f; rm $$f; done
	@for f in $(wildcard $(CLEAN_FILES) $(BIN_XFILES)); do echo rm $$f; rm $$f; done
	@for d in $(wildcard $(CLEAN_RMDIRS)); do echo rmdir $$d; rmdir $$d; done

# Specifying .SECONDARY with no prerequisites disables auto-deletion of intermediate files.
.SECONDARY:

# If a depfile is absent, build can still proceed.
$(DEPFILES):

# Include any depfiles which are present.
include $(wildcard $(DEPFILES))
