#src/optfunc/cvxs/cones/cpp/CMakeLists.txt
# 依赖顶层CmakeList 导入
#find_package(CUDAToolkit REQUIRED)
#find_package(OpenMP COMPONENTS CXX)
#
#execute_process(
#  COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir
#  OUTPUT_STRIP_TRAILING_WHITESPACE
#  OUTPUT_VARIABLE nanobind_ROOT
#)
#find_package(nanobind CONFIG REQUIRED)

set(OPTFUNC_NATIVE_TARGET "_cpp" CACHE STRING
        "Native cone extension module target name"
)
set(OPTFUNC_NATIVE_INSTALL_DIR "optfunc/cvxs/cones/cpp" CACHE STRING
        "Python package directory for the native cone extension"
)
option(OPTFUNC_CUDA_ADDON
        "Build the native module as the optfuncs-cuda130 CUDA addon"
        OFF
)

set(_optfunc_native_sources
  NOMINSIZE
  bindings.cpp
  data_template.cpp
  blas_config.cpp
  scaling.cpp
  zero.cpp
  nonnegative.cpp
  second_order.cpp
  exponential.cpp
  psd/psd.cpp
)

if (OPTFUNC_WITH_CUDA)
  list(APPEND _optfunc_native_sources cuda_runtime.cu)
else()
  list(APPEND _optfunc_native_sources cuda_stubs.cpp)
endif()

nanobind_add_module(
  ${OPTFUNC_NATIVE_TARGET}
  ${_optfunc_native_sources}
)

option(OPTFUNC_NATIVE_CPU_TUNE
        "Enable native host CPU tuning for local benchmark builds"
        OFF
)
set(OPTFUNC_MSVC_ARCH "" CACHE STRING
        "Optional MSVC /arch value for native cone C++ files, such as AVX2 or AVX512"
)

if (OPTFUNC_WITH_CUDA)
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE OPTFUNC_WITH_CUDA=1)
else()
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE OPTFUNC_WITH_CUDA=0)
endif()

if (OPTFUNC_CUDA_ADDON)
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE OPTFUNC_CUDA_ADDON=1)
else()
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE OPTFUNC_CUDA_ADDON=0)
endif()

target_include_directories(${OPTFUNC_NATIVE_TARGET} PRIVATE
        "${CMAKE_CURRENT_SOURCE_DIR}"
)

if (OPTFUNC_WITH_CUDA)
  if (TARGET CUDA::cudart_static)
    target_link_libraries(${OPTFUNC_NATIVE_TARGET} PRIVATE CUDA::cudart_static)
  else()
    target_link_libraries(${OPTFUNC_NATIVE_TARGET} PRIVATE CUDA::cudart)
  endif()
endif()

if (OPTFUNC_WITH_CUDA AND OPTFUNC_USE_CUBLAS)
  target_link_libraries(${OPTFUNC_NATIVE_TARGET} PRIVATE CUDA::cublas)
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE OPTFUNC_WITH_CUBLAS=1)
else()
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE OPTFUNC_WITH_CUBLAS=0)
endif()

if (OpenMP_CXX_FOUND)
  target_link_libraries(${OPTFUNC_NATIVE_TARGET} PRIVATE OpenMP::OpenMP_CXX)
endif()

if (TARGET BLAS::BLAS)
  target_link_libraries(${OPTFUNC_NATIVE_TARGET} PRIVATE BLAS::BLAS)
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE
          OPTFUNC_WITH_HOST_BLAS=1
          OPTFUNC_HOST_BLAS_LABEL="${OPTFUNC_BLAS}"
  )
else()
  target_compile_definitions(${OPTFUNC_NATIVE_TARGET} PRIVATE
          OPTFUNC_WITH_HOST_BLAS=0
          OPTFUNC_HOST_BLAS_LABEL="none"
  )
endif()

set_target_properties(${OPTFUNC_NATIVE_TARGET} PROPERTIES
        POSITION_INDEPENDENT_CODE ON
)

if (OPTFUNC_WITH_CUDA)
  set_target_properties(${OPTFUNC_NATIVE_TARGET} PROPERTIES
          CUDA_SEPARABLE_COMPILATION ON
  )
endif()

if (MSVC)
  target_compile_options(${OPTFUNC_NATIVE_TARGET} PRIVATE
          # CXX flags are passed directly to cl.exe; CUDA host flags go through
          # NVCC's -Xcompiler bridge. Keep them separate for CLion/MSBuild.
          $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:Release>>:/O2>
          $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RelWithDebInfo>>:/O2>
          $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:Debug>>:/Od>
          $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:Debug>>:/RTC1>
          $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:Release>>:-Xcompiler=/O2>
          $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:RelWithDebInfo>>:-Xcompiler=/O2>
          $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:Debug>>:-Xcompiler=/Od>
          $<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:Debug>>:-Xcompiler=/RTC1>
          # MSVC 以 UTF-8 解释源码字符集和执行字符集
          $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>
          $<$<COMPILE_LANGUAGE:CXX>:/utf-8>
  )
else()
  target_compile_options(${OPTFUNC_NATIVE_TARGET} PRIVATE
    $<$<COMPILE_LANGUAGE:CXX>:-O3>
    $<$<COMPILE_LANGUAGE:CUDA>:-O3>
  )
endif()

if (OPTFUNC_NATIVE_CPU_TUNE)
  if (MSVC)
    if (OPTFUNC_MSVC_ARCH)
      target_compile_options(${OPTFUNC_NATIVE_TARGET} PRIVATE
              $<$<COMPILE_LANGUAGE:CXX>:/arch:${OPTFUNC_MSVC_ARCH}>
      )
    endif()
  else()
    target_compile_options(${OPTFUNC_NATIVE_TARGET} PRIVATE
            $<$<COMPILE_LANGUAGE:CXX>:-march=native>
    )
  endif()
endif()

# 关键：安装到 Python 包内部，而不是随便装到 lib/bin。
# 这样最终 import 路径是：
# optfunc.cvxs.cones.cpp._cpp
install(
  TARGETS ${OPTFUNC_NATIVE_TARGET}
  LIBRARY DESTINATION ${OPTFUNC_NATIVE_INSTALL_DIR}
  RUNTIME DESTINATION ${OPTFUNC_NATIVE_INSTALL_DIR}
)
