From af5ba1e4a10de4624531e2f72c21398d39e2bad3 Mon Sep 17 00:00:00 2001 From: antonl Date: Wed, 4 Feb 2026 21:27:40 +0100 Subject: [PATCH] working build and load of cuda compiled .dll --- build.bat | 59 ++++++++++++++++++++++---- include/util.h | 33 +++++++++++++++ src/commlayer.cu | 14 +++++++ src/main.cu | 105 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 7 deletions(-) create mode 100644 include/util.h create mode 100644 src/commlayer.cu create mode 100644 src/main.cu diff --git a/build.bat b/build.bat index c0cb142..133e8cc 100644 --- a/build.bat +++ b/build.bat @@ -1,19 +1,64 @@ @echo off setlocal +@rem I rely on environment variables here. @rem set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4 @rem set CUDSS_PATH=C:\Program Files\NVIDIA\cuDSS -nvcc -o build/cudss_test.exe src/cudss_test.cu ^ - -I"%CUDA_PATH%\include" ^ - -I"%CUDSS_PATH%\include" ^ - -L"%CUDA_PATH%\lib\x64" ^ - -L"%CUDSS_PATH%\lib\12" ^ +set nvccArch=sm_120 +set commLayerSource=commlayer +set mainSource=main +set CommonXCompFlags=/W3 /O2 /EHsc +set DllXcompilerFlags="%CommonXCompFlags% /MD" +set ExeXcompilerFlags="%CommonXCompFlags% /MT" +set IgnoreWarnings=-Wno-deprecated-gpu-targets + +set CudaIncludes=-I"%CUDA_PATH%\include" -I"%CUDSS_PATH%\include" +set OtherIncludes=-Iinclude +set CudaLibPaths=-L"%CUDA_PATH%\lib\x64" -L"%CUDSS_PATH%\lib\12" + +nvcc -c -o build/%commLayerSource%.obj src/%commLayerSource%.cu ^ + -arch=%nvccArch% ^ + %CudaIncludes% %OtherIncludes% %CudaLibPaths% ^ -lcudss -lcudart ^ - -Xcompiler "/W3 /O2" + %IgnoreWarnings% ^ + -Xcompiler %DllXcompilerFlags% + +if %ERRORLEVEL% NEQ 0 ( + echo Compile failed: %commLayerSource%.cu + exit /b 1 +) + +nvcc -shared -o build/%commLayerSource%.dll build/%commLayerSource%.obj ^ + -arch=%nvccArch% ^ + %IgnoreWarnings% ^ + -Xcompiler %DllXcompilerFlags% ^ + -Xlinker "/NODEFAULTLIB:LIBCMT" + +if %ERRORLEVEL% NEQ 0 ( + echo DLL link failed + exit /b 1 +) + +nvcc -o build/%mainSource% src/%mainSource%.cu ^ + -arch=%nvccArch% ^ + %CudaIncludes% %OtherIncludes% %CudaLibPaths% ^ + -lcudss -lcudart ^ + %IgnoreWarnings% ^ + -Xcompiler %ExeXcompilerFlags% + + + +@rem nvcc -o build/cudss_test.exe src/cudss_test.cu ^ +@rem -I"%CUDA_PATH%\include" ^ +@rem -I"%CUDSS_PATH%\include" ^ +@rem -L"%CUDA_PATH%\lib\x64" ^ +@rem -L"%CUDSS_PATH%\lib\12" ^ +@rem -lcudss -lcudart ^ +@rem -Xcompiler "/W3 /O2" if %ERRORLEVEL% EQU 0 ( - echo Build successful: cudss_test.exe + echo Build successful: %mainSource%.exe ) else ( echo Build failed exit /b 1 diff --git a/include/util.h b/include/util.h new file mode 100644 index 0000000..06a2190 --- /dev/null +++ b/include/util.h @@ -0,0 +1,33 @@ +#pragma once + +/* ============================================================================ + * Logging + * ============================================================================ */ + +#define LOG(fmt, ...) fprintf(stdout, "[cudss_test] " fmt "\n", ##__VA_ARGS__) +#define LOG_ERROR(fmt, ...) fprintf(stderr, "[cudss_test ERROR] " fmt "\n", ##__VA_ARGS__) + +/* ============================================================================ + * Error Checking Macros + * ============================================================================ */ + +#define CUDA_CHECK(call) \ + do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) { \ + LOG_ERROR("CUDA error at %s:%d - %s", __FILE__, __LINE__, \ + cudaGetErrorString(err)); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define CUDSS_CHECK(call) \ + do { \ + cudssStatus_t status = (call); \ + if (status != CUDSS_STATUS_SUCCESS) { \ + LOG_ERROR("cuDSS error at %s:%d - status %d", __FILE__, __LINE__, \ + (int)status); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + diff --git a/src/commlayer.cu b/src/commlayer.cu new file mode 100644 index 0000000..7448a77 --- /dev/null +++ b/src/commlayer.cu @@ -0,0 +1,14 @@ +#include +#include + +#define __CSCUDSS_EXPORT extern "C" __declspec(dllexport) + + +__CSCUDSS_EXPORT int libraryMain() { + + std::cout << "HELLO LIBRARY" << std::endl; + + + return 0; + +} diff --git a/src/main.cu b/src/main.cu new file mode 100644 index 0000000..f31393d --- /dev/null +++ b/src/main.cu @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#define WIN32_LEAN_AND_MEAN +#include + +#include "util.h" + + + +/* ============================================================================ + * Version Information + * ============================================================================ */ + +void printCudaVersion(void) { + int runtimeVersion = 0; + int driverVersion = 0; + + CUDA_CHECK(cudaRuntimeGetVersion(&runtimeVersion)); + CUDA_CHECK(cudaDriverGetVersion(&driverVersion)); + + int runtimeMajor = runtimeVersion / 1000; + int runtimeMinor = (runtimeVersion % 1000) / 10; + + int driverMajor = driverVersion / 1000; + int driverMinor = (driverVersion % 1000) / 10; + + LOG("CUDA Runtime Version: %d.%d", runtimeMajor, runtimeMinor); + LOG("CUDA Driver Version: %d.%d", driverMajor, driverMinor); +} + +void printCudssVersion(void) { + int major = 0; + int minor = 0; + int patch = 0; + + CUDSS_CHECK(cudssGetProperty(MAJOR_VERSION, &major)); + CUDSS_CHECK(cudssGetProperty(MINOR_VERSION, &minor)); + CUDSS_CHECK(cudssGetProperty(PATCH_LEVEL, &patch)); + + LOG("cuDSS Version: %d.%d.%d", major, minor, patch); +} + +void printDeviceInfo(void) { + int deviceCount = 0; + CUDA_CHECK(cudaGetDeviceCount(&deviceCount)); + + if (deviceCount == 0) { + LOG_ERROR("No CUDA-capable devices found"); + exit(EXIT_FAILURE); + } + + int device = 0; + CUDA_CHECK(cudaGetDevice(&device)); + + struct cudaDeviceProp props; + CUDA_CHECK(cudaGetDeviceProperties(&props, device)); + + LOG("Device: %s (compute %d.%d)", props.name, props.major, props.minor); + LOG("Memory: %.2f GB", (double)props.totalGlobalMem / (1024.0 * 1024.0 * 1024.0)); +} + +/* ============================================================================ + * Main Entry Point + * ============================================================================ */ + +using libraryMain = int(*)(); + +int main(int argc, char **argv) { + LOG("cuDSS Test Program"); + LOG("=================="); + + /* Print version information */ + printCudaVersion(); + printCudssVersion(); + printDeviceInfo(); + + HMODULE hLib = LoadLibraryA("commlayer.dll"); + if (!hLib) { + std::cerr << "Failed to load DLL. Error: " << GetLastError() << std::endl; + return 1; + } + + libraryMain libMain = (libraryMain)GetProcAddress(hLib, "libraryMain"); + if (!libMain) { + std::cerr << "Failed to find function. Error: " << GetLastError() << std::endl; + FreeLibrary(hLib); + return 1; + } + libMain(); + + /* Initialize cuDSS */ + cudssHandle_t handle = NULL; + CUDSS_CHECK(cudssCreate(&handle)); + LOG("cuDSS handle created successfully"); + + /* Cleanup */ + CUDSS_CHECK(cudssDestroy(handle)); + LOG("cuDSS handle destroyed"); + + LOG("Done."); + return 0; +}