# This Makefile will be invoked by the python build system (e.g. via 'pip install'),
# but you can also build individual targets by invoking 'make' directly.

# Disable built-in rules and variables (must be first).
MAKEFLAGS += --no-builtin-rules 
MAKEFLAGS += --no-builtin-variables

# Default target 'all' must be first target in Makefile.
# The 'lib' target builds the C++ library lib/libgpu_mm.so, and the python extension gpu_mm/gpu_mm_pybind11...so.
# The 'build_wheel' and 'build_sdist' targets are invoked by 'pip' (or 'make all').
all: lib build_wheel build_sdist

.PHONY: all lib build_wheel build_sdist clean


####################################################################################################
#
# Variables encoding configuration: PYTHON, NVCC, NVCC_ARCH, NVCC_DEPFLAGS, CULIBS.
#
# FIXME some day I'll define a configure-script mechanism for setting these variables.
# For now, if you want to change the defaults, just edit the Makfile.

PYTHON ?= python3
NVCC ?= nvcc -std=c++17 -m64 -O3 --compiler-options -Wall,-fPIC
CULIBS ?= -lcufft -lcublas

# Extra nvcc flags needed to build Makefile dependencies
#   -MMD create dep file, omitting "system" headers
#   -MP add phony target for each header in dep file (makes error reporting less confusing)
# Note: we don't need "-MT $@", since we use in-tree object filenames (x.cu -> x.o).
# Note: we don't need "-MT $*.d", since we use in-tree depfile names (x.cu -> x.d).
NVCC_DEPFLAGS ?= -MMD -MP

# NVIDIA archictecture.
DEFAULT_NVCC_ARCH = -gencode arch=compute_80,code=sm_80
DEFAULT_NVCC_ARCH += -gencode arch=compute_86,code=sm_86
DEFAULT_NVCC_ARCH += -gencode arch=compute_89,code=sm_89
# DEFAULT_ARCH += -gencode arch=compute_90,code=sm_90
NVCC_ARCH ?= $(DEFAULT_NVCC_ARCH)


####################################################################################################
#
# "Derived" config variables: PYTHON_INCDIR, NUMPY_INCDIR, PYBIND11_INCDIR, PYEXT_SUFFIX, KSGPU_DIR.
#
# These are autogenerated by makefile_helper.py, and cached in makefile_helper.out.
# PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.


ifneq ($(MAKECMDGOALS),clean)
  include makefile_helper.out
endif

makefile_helper.out: makefile_helper.py Makefile
	$(PYTHON) makefile_helper.py


####################################################################################################


# The main output of the build process is these two libraries.
# Reminder: PYEXT_SUFFIX is something like .cpython-312-x86_64-linux-gnu.so.
GPU_MM_LIB := lib/libgpu_mm.so
GPU_MM_PYEXT := gpu_mm/gpu_mm_pybind11$(PYEXT_SUFFIX)

# These get compiled into lib/libgpu_mm.so.
LIB_SRCFILES = \
  src_lib/LocalPixelization.cu \
  src_lib/PointingPlan.cu \
  src_lib/PointingPlanTester.cu \
  src_lib/PointingPrePlan.cu \
  src_lib/ToyPointing.cu \
  src_lib/cell_broadcast.cu \
  src_lib/cell_reduce.cu \
  src_lib/check_arguments.cu \
  src_lib/cuts.cu \
  src_lib/expand_dynamic_map.cu \
  src_lib/gpu_point.cu \
  src_lib/gpu_utils.cu \
  src_lib/local_map_to_global.cu \
  src_lib/map2tod.cu \
  src_lib/map2tod_reference.cu \
  src_lib/map2tod_unplanned.cu \
  src_lib/misc.cu \
  src_lib/pycufft.cu \
  src_lib/test_plan_iterator.cu \
  src_lib/tod2map.cu \
  src_lib/tod2map_reference.cu \
  src_lib/tod2map_unplanned.cu

# These get compiled into gpu_mm/gpu_mm_pybind11....so.
PYEXT_SRCFILES = \
  src_pybind11/gpu_mm_pybind11.cu

# Must list all python source files here.
# (Otherwise they won't show up in 'pip install' or pypi.)
PYFILES = \
  gpu_mm/__init__.py \
  gpu_mm/__main__.py \
  gpu_mm/gpu_mm.py \
  gpu_mm/gpu_pointing.py \
  gpu_mm/gpu_utils.py \
  gpu_mm/pycufft.py \
  gpu_mm/tests.py \
  gpu_mm/tests_mpi.py

# Must list all header files here.
# (Otherwise they won't show up in 'pip install' or pypi.)
HFILES = \
  include/gpu_mm.hpp \
  include/gpu_mm_internals.hpp \
  include/plan_iterator.hpp

# 'make clean' deletes {*~, *.o, *.d, *.so, *.pyc} from these dirs.
CLEAN_DIRS := . include include/gpu_mm lib src_lib src_pybind11 gpu_mm gpu_mm/__pycache__

# Extra files to be deleted by 'make clean'.
# Note that 'gpu_mm/include' and 'gpu_mm/lib' are symlinks, so we put them in CLEAN_FILES, not CLEAN_RMDIRS
CLEAN_FILES := sdist_files.txt wheel_files.txt makefile_helper.out gpu_mm/include gpu_mm/lib

# Directories that should be empty at the end of 'make clean', and can be deleted.
CLEAN_RMDIRS := lib gpu_mm/__pycache__


####################################################################################################


LIB_OFILES := $(LIB_SRCFILES:%.cu=%.o)
PYEXT_OFILES := $(PYEXT_SRCFILES:%.cu=%.o)

# Must include all .d files, or build will break!
ALL_SRCFILES := $(LIB_SRCFILES) $(PYEXT_SRCFILES)
DEPFILES := $(ALL_SRCFILES:%.cu=%.d)

SDIST_FILES := pyproject.toml Makefile makefile_helper.py
SDIST_FILES += $(PYFILES) $(ALL_SRCFILES) $(HFILES)

# Some symlinks for the wheel:
#  - header file include/%.hpp gets symlinked to gpu_mm/include/%.hpp
#  - library lib/libgpu_mm.so gets symlinked to gpu_mm/lib/libgpu_mm.so
#  - python extension gpu_mm/gpu_mm_pybind11...so does not need to be symlinked/renamed.
WHEEL_FILES := $(PYFILES) $(GPU_MM_PYEXT) gpu_mm/$(GPU_MM_LIB)
WHEEL_FILES += $(HFILES:%=gpu_mm/%)

# Phony targets. The special targets 'build_wheel' and 'build_sdist' are needed by pip/pipmake.
lib: $(GPU_MM_LIB) $(GPU_MM_PYEXT)
build_wheel: wheel_files.txt $(GPU_MM_LIB) $(GPU_MM_PYEXT)
build_sdist: sdist_files.txt

# Symlink {include,lib} into python directory 'gpu_mm'.
gpu_mm/include:
	ln -s ../include $@
gpu_mm/lib:
	ln -s ../lib $@

# Build object files in src_lib/.
%.o: %.cu %.d
	$(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -c -o $@ $<

# Build object files in src_pybind11/ with special flags.
src_pybind11/%.o: src_pybind11/%.cu src_pybind11/%.d
	$(NVCC) $(NVCC_ARCH) $(NVCC_DEPFLAGS) -I$(KSGPU_DIR)/include -I$(PYTHON_INCDIR) -I$(NUMPY_INCDIR) -I$(PYBIND11_INCDIR) -c -o $@ $<

# Build the C++ library (lib/libgpu_mm.so)
# FIXME I'm not linking to libksgpu.so in this step. This is okay as long as gpu_mm is
# only used via its python interface (thanks to the "ctypes trick", see below) but may
# need more thought if we ever decide to call libgpu_mm from C++.
$(GPU_MM_LIB): $(LIB_OFILES)
	@mkdir -p lib
	$(NVCC) $(NVCC_ARCH) -shared -o $@ $^ $(CULIBS)

# Build the python extension (gpu_mm/gpu_mm_pybind11...so)
# We want it to automatically pull in the C++ library gpu_mm/lib/libgpu_mm.so.
#
# The python extension has been built correctly if 'objdump -x' shows the following:
#   NEEDED   libgpu_mm.so
#   RUNPATH  $ORIGIN/lib
#
# The quoting can be understood by working backwards as follows:
#  - g++ command line should look like:   g++ -Wl,-rpath="\$ORIGIN/lib"
#  - nvcc command line should look like:  nvcc -Xcompiler '"-Wl,-rpath=\\$ORIGIN/lib"'
#  - Makefile line should look like:      nvcc -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'
#
# Note that we don't link to libksgpu.so or ksgpu_pybind11...so in this step.
# These libraries end up getting imported as follows:
#
#  1. When 'gpu_mm' is imported, we do 'import ksgpu' (in gpu_mm/__init__.py)
#     before 'import gpu_mm_pybind11'.
#
#  2. When 'ksgpu' is imported, we use the "ctypes trick" (see comment in ksgpu/__init__.py)
#     to load the libraries libksgpu.so and ksgpu_pybind11...so with globally visible symbols.

$(GPU_MM_PYEXT): $(PYEXT_OFILES) $(GPU_MM_LIB) gpu_mm/lib
	$(NVCC) $(NVCC_ARCH) -shared -o $@ $(PYEXT_OFILES) -lgpu_mm -Lgpu_mm/lib -Xcompiler '"-Wl,-rpath=\\$$ORIGIN/lib"'

# Needed by pip/pipmake: list of all files that go into the (non-editable) wheel.
wheel_files.txt: Makefile gpu_mm/include gpu_mm/lib
	rm -f $@
	for f in $(WHEEL_FILES); do echo $$f; done >>$@

# Needed by pip/pipmake: list of all files that go into the sdist.
sdist_files.txt: Makefile
	rm -f $@
	for f in $(SDIST_FILES); do echo $$f; done >>$@

clean:
	@for f in $(foreach d,$(CLEAN_DIRS),$(wildcard $d/*~ $d/*.o $d/*.d $d/*.so $d/*.pyc)); do echo rm $$f; rm $$f; done
	@for f in $(wildcard $(CLEAN_FILES)); do echo rm $$f; rm $$f; done
	@for d in $(wildcard $(CLEAN_RMDIRS)); do echo rmdir $$d; rmdir $$d; done

# Specifying .SECONDARY with no prerequisites disables auto-deletion of intermediate files.
.SECONDARY:

# If a depfile is absent, build can still proceed.
$(DEPFILES):

# Include any depfiles which are present.
include $(wildcard $(DEPFILES))
