working dsyevd example on GPU with logging
This commit is contained in:
parent
879ae444a1
commit
48b67775f8
2
.gitignore
vendored
2
.gitignore
vendored
@ -94,3 +94,5 @@ dkms.conf
|
|||||||
*.out
|
*.out
|
||||||
*.app
|
*.app
|
||||||
|
|
||||||
|
#visual studio
|
||||||
|
.vs
|
||||||
|
|||||||
25
build.bat
25
build.bat
@ -4,16 +4,31 @@ ctime -begin timeBuild.ctm
|
|||||||
|
|
||||||
@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456
|
@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456
|
||||||
@rem set CommonCompilerFlags="/nologo /Zi /FC"
|
@rem set CommonCompilerFlags="/nologo /Zi /FC"
|
||||||
set CommonCompilerFlags=/nologo /Zi /FC
|
set CommonCompilerFlags=/nologo /Zi /FC /Od
|
||||||
@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456
|
@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456
|
||||||
|
@rem
|
||||||
|
@rem
|
||||||
|
|
||||||
|
set cuda_root=D:/lib/cudatoolkit/lib/x64
|
||||||
|
|
||||||
|
set mkl_root=D:/lib/oneAPI_mkl/mkl/2021.3.0
|
||||||
|
set mkl_core=%mkl_root%/lib/intel64/mkl_core.lib
|
||||||
|
set mkl_intel_lp64=%mkl_root%/lib/intel64/mkl_intel_lp64.lib
|
||||||
|
set mkl_intel_thread=%mkl_root%/lib/intel64/mkl_intel_thread.lib
|
||||||
|
set MKLCOMPILER=D:/lib/oneAPI_mkl/compiler/2021.3.0/windows/compiler
|
||||||
|
set libiomp5md=%MKLCOMPILER%/lib/intel64_win/libiomp5md.lib
|
||||||
|
|
||||||
set CudaSources=../src/cuda_entry_point.cu
|
set CudaSources=../src/cuda_entry_point.cu
|
||||||
|
set CudaObj=cuda_entry_point.obj
|
||||||
set Sources=../src/main.c
|
set Sources=../src/main.c
|
||||||
set CudaLib=D:/lib/cudatoolkit/lib/x64
|
|
||||||
set CudaSolver=%CudaLib%/cusolver.lib
|
|
||||||
IF NOT EXIST .\build mkdir .\build
|
IF NOT EXIST .\build mkdir .\build
|
||||||
pushd .\build
|
pushd .\build
|
||||||
cl %CommonCompilerFlags% %Sources% -Feprogram.exe
|
rem cl /c %CommonCompilerFlags% %Sources% /I"%mkl_root%\include"
|
||||||
@rem nvcc -Xcompiler %CommonCompilerFlags% -o program.exe %Sources% %CudaSources% -lcusolver
|
rem FOR MSVC /link %mkl_core% %mkl_intel_lp64% %mkl_intel_thread% %libiomp5md%
|
||||||
|
nvcc -c %CudaSources% -o %CudaObj% -g -G -lcusolver
|
||||||
|
rem nvcc -o program.exe -lcusolver -L%mkl_root%/lib/intel64 -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -l%MKLCOMPILER%/lib/intel64_win/libiomp5md
|
||||||
|
cl %CommonCompilerFlags% %Sources% /I"%mkl_root%\include" /link %CudaObj% %cuda_root%/cudart.lib %cuda_root%/cusolver.lib %mkl_core% %mkl_intel_lp64% %mkl_intel_thread% %libiomp5md%
|
||||||
set LastError=%ERRORLEVEL%
|
set LastError=%ERRORLEVEL%
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
|||||||
BIN
build/main.rdi
Normal file
BIN
build/main.rdi
Normal file
Binary file not shown.
Binary file not shown.
8
eigsol_gpu.sublime-project
Normal file
8
eigsol_gpu.sublime-project
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"folders":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"path": "."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
423
eigsol_gpu.sublime-workspace
Normal file
423
eigsol_gpu.sublime-workspace
Normal file
File diff suppressed because one or more lines are too long
4927
external/cusolverDn.h
vendored
Normal file
4927
external/cusolverDn.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
318
external/cusolverMg.h
vendored
Normal file
318
external/cusolverMg.h
vendored
Normal file
@ -0,0 +1,318 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2019 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(CUSOLVERMG_H_)
|
||||||
|
#define CUSOLVERMG_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "cusolverDn.h"
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
struct cusolverMgContext;
|
||||||
|
typedef struct cusolverMgContext *cusolverMgHandle_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \beief This enum decides how 1D device Ids (or process ranks) get mapped to
|
||||||
|
* a 2D grid.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
|
||||||
|
CUDALIBMG_GRID_MAPPING_ROW_MAJOR = 1,
|
||||||
|
CUDALIBMG_GRID_MAPPING_COL_MAJOR = 0
|
||||||
|
|
||||||
|
} cusolverMgGridMapping_t;
|
||||||
|
|
||||||
|
/** \brief Opaque structure of the distributed grid */
|
||||||
|
typedef void *cudaLibMgGrid_t;
|
||||||
|
/** \brief Opaque structure of the distributed matrix descriptor */
|
||||||
|
typedef void *cudaLibMgMatrixDesc_t;
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgCreate(cusolverMgHandle_t *handle);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgDestroy(cusolverMgHandle_t handle);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgDeviceSelect(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
int nbDevices,
|
||||||
|
int deviceId[]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Allocates resources related to the shared memory device grid.
|
||||||
|
* \param[out] grid the opaque data strcuture that holds the grid
|
||||||
|
* \param[in] numRowDevices number of devices in the row
|
||||||
|
* \param[in] numColDevices number of devices in the column
|
||||||
|
* \param[in] deviceId This array of size height * width stores the
|
||||||
|
* device-ids of the 2D grid; each entry must correspond to a valid
|
||||||
|
* gpu or to -1 (denoting CPU). \param[in] mapping whether the 2D grid is in
|
||||||
|
* row/column major \returns the status code
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgCreateDeviceGrid(
|
||||||
|
cudaLibMgGrid_t * grid,
|
||||||
|
int32_t numRowDevices,
|
||||||
|
int32_t numColDevices,
|
||||||
|
const int32_t deviceId[],
|
||||||
|
cusolverMgGridMapping_t mapping);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Releases the allocated resources related to the distributed grid.
|
||||||
|
* \param[in] grid the opaque data strcuture that holds the distributed grid
|
||||||
|
* \returns the status code
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgDestroyGrid(cudaLibMgGrid_t grid);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Allocates resources related to the distributed matrix descriptor.
|
||||||
|
* \param[out] desc the opaque data strcuture that holds the descriptor
|
||||||
|
* \param[in] numRows number of total rows
|
||||||
|
* \param[in] numCols number of total columns
|
||||||
|
* \param[in] rowBlockSize row block size
|
||||||
|
* \param[in] colBlockSize column block size
|
||||||
|
* \param[in] dataType the data type of each element in cudaDataType
|
||||||
|
* \param[in] grid the opaque data structure of the distributed grid
|
||||||
|
* \returns the status code
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgCreateMatrixDesc(
|
||||||
|
cudaLibMgMatrixDesc_t *desc,
|
||||||
|
int64_t numRows,
|
||||||
|
int64_t numCols,
|
||||||
|
int64_t rowBlockSize,
|
||||||
|
int64_t colBlockSize,
|
||||||
|
cudaDataType dataType,
|
||||||
|
const cudaLibMgGrid_t grid);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Releases the allocated resources related to the distributed matrix
|
||||||
|
* descriptor. \param[in] desc the opaque data strcuture that holds the
|
||||||
|
* descriptor \returns the status code
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverMgDestroyMatrixDesc(cudaLibMgMatrixDesc_t desc);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgSyevd_bufferSize(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cusolverEigMode_t jobz,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
void * W,
|
||||||
|
cudaDataType dataTypeW,
|
||||||
|
cudaDataType computeType,
|
||||||
|
int64_t * lwork);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgSyevd(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cusolverEigMode_t jobz,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
void * W,
|
||||||
|
cudaDataType dataTypeW,
|
||||||
|
cudaDataType computeType,
|
||||||
|
void * array_d_work[],
|
||||||
|
int64_t lwork,
|
||||||
|
int * info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgGetrf_bufferSize(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
int M,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
int * array_d_IPIV[],
|
||||||
|
cudaDataType computeType,
|
||||||
|
int64_t * lwork);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgGetrf(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
int M,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
int * array_d_IPIV[],
|
||||||
|
cudaDataType computeType,
|
||||||
|
void * array_d_work[],
|
||||||
|
int64_t lwork,
|
||||||
|
int * info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgGetrs_bufferSize(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasOperation_t TRANS,
|
||||||
|
int N,
|
||||||
|
int NRHS,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
int * array_d_IPIV[],
|
||||||
|
void * array_d_B[],
|
||||||
|
int IB,
|
||||||
|
int JB,
|
||||||
|
cudaLibMgMatrixDesc_t descrB,
|
||||||
|
cudaDataType computeType,
|
||||||
|
int64_t * lwork);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgGetrs(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasOperation_t TRANS,
|
||||||
|
int N,
|
||||||
|
int NRHS,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
int * array_d_IPIV[],
|
||||||
|
void * array_d_B[],
|
||||||
|
int IB,
|
||||||
|
int JB,
|
||||||
|
cudaLibMgMatrixDesc_t descrB,
|
||||||
|
cudaDataType computeType,
|
||||||
|
void * array_d_work[],
|
||||||
|
int64_t lwork,
|
||||||
|
int * info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgPotrf_bufferSize(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
cudaDataType computeType,
|
||||||
|
int64_t * lwork);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgPotrf(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
cudaDataType computeType,
|
||||||
|
void * array_d_work[],
|
||||||
|
int64_t lwork,
|
||||||
|
int * h_info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgPotrs_bufferSize(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int n,
|
||||||
|
int nrhs,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
void * array_d_B[],
|
||||||
|
int IB,
|
||||||
|
int JB,
|
||||||
|
cudaLibMgMatrixDesc_t descrB,
|
||||||
|
cudaDataType computeType,
|
||||||
|
int64_t * lwork);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgPotrs(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int n,
|
||||||
|
int nrhs,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
void * array_d_B[],
|
||||||
|
int IB,
|
||||||
|
int JB,
|
||||||
|
cudaLibMgMatrixDesc_t descrB,
|
||||||
|
cudaDataType computeType,
|
||||||
|
void * array_d_work[],
|
||||||
|
int64_t lwork,
|
||||||
|
int * h_info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgPotri_bufferSize(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
cudaDataType computeType,
|
||||||
|
int64_t * lwork);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverMgPotri(
|
||||||
|
cusolverMgHandle_t handle,
|
||||||
|
cublasFillMode_t uplo,
|
||||||
|
int N,
|
||||||
|
void * array_d_A[],
|
||||||
|
int IA,
|
||||||
|
int JA,
|
||||||
|
cudaLibMgMatrixDesc_t descrA,
|
||||||
|
cudaDataType computeType,
|
||||||
|
void * array_d_work[],
|
||||||
|
int64_t lwork,
|
||||||
|
int * h_info);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif // CUSOLVERMG_H_
|
||||||
339
external/cusolverRf.h
vendored
Normal file
339
external/cusolverRf.h
vendored
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(CUSOLVERRF_H_)
|
||||||
|
#define CUSOLVERRF_H_
|
||||||
|
|
||||||
|
#include "driver_types.h"
|
||||||
|
#include "cuComplex.h"
|
||||||
|
#include "cusolver_common.h"
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
/* CUSOLVERRF mode */
|
||||||
|
typedef enum {
|
||||||
|
CUSOLVERRF_RESET_VALUES_FAST_MODE_OFF = 0, // default
|
||||||
|
CUSOLVERRF_RESET_VALUES_FAST_MODE_ON = 1
|
||||||
|
} cusolverRfResetValuesFastMode_t;
|
||||||
|
|
||||||
|
/* CUSOLVERRF matrix format */
|
||||||
|
typedef enum {
|
||||||
|
CUSOLVERRF_MATRIX_FORMAT_CSR = 0, // default
|
||||||
|
CUSOLVERRF_MATRIX_FORMAT_CSC = 1
|
||||||
|
} cusolverRfMatrixFormat_t;
|
||||||
|
|
||||||
|
/* CUSOLVERRF unit diagonal */
|
||||||
|
typedef enum {
|
||||||
|
CUSOLVERRF_UNIT_DIAGONAL_STORED_L = 0, // default
|
||||||
|
CUSOLVERRF_UNIT_DIAGONAL_STORED_U = 1,
|
||||||
|
CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_L = 2,
|
||||||
|
CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_U = 3
|
||||||
|
} cusolverRfUnitDiagonal_t;
|
||||||
|
|
||||||
|
/* CUSOLVERRF factorization algorithm */
|
||||||
|
typedef enum {
|
||||||
|
CUSOLVERRF_FACTORIZATION_ALG0 = 0, // default
|
||||||
|
CUSOLVERRF_FACTORIZATION_ALG1 = 1,
|
||||||
|
CUSOLVERRF_FACTORIZATION_ALG2 = 2,
|
||||||
|
} cusolverRfFactorization_t;
|
||||||
|
|
||||||
|
/* CUSOLVERRF triangular solve algorithm */
|
||||||
|
typedef enum {
|
||||||
|
CUSOLVERRF_TRIANGULAR_SOLVE_ALG1 = 1, // default
|
||||||
|
CUSOLVERRF_TRIANGULAR_SOLVE_ALG2 = 2,
|
||||||
|
CUSOLVERRF_TRIANGULAR_SOLVE_ALG3 = 3
|
||||||
|
} cusolverRfTriangularSolve_t;
|
||||||
|
|
||||||
|
/* CUSOLVERRF numeric boost report */
|
||||||
|
typedef enum {
|
||||||
|
CUSOLVERRF_NUMERIC_BOOST_NOT_USED = 0, // default
|
||||||
|
CUSOLVERRF_NUMERIC_BOOST_USED = 1
|
||||||
|
} cusolverRfNumericBoostReport_t;
|
||||||
|
|
||||||
|
/* Opaque structure holding CUSOLVERRF library common */
|
||||||
|
struct cusolverRfCommon;
|
||||||
|
typedef struct cusolverRfCommon* cusolverRfHandle_t;
|
||||||
|
|
||||||
|
/* CUSOLVERRF create (allocate memory) and destroy (free memory) in the handle
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfCreate(cusolverRfHandle_t* handle);
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfDestroy(cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF set and get input format */
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfGetMatrixFormat(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfMatrixFormat_t* format,
|
||||||
|
cusolverRfUnitDiagonal_t* diag);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfSetMatrixFormat(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfMatrixFormat_t format,
|
||||||
|
cusolverRfUnitDiagonal_t diag);
|
||||||
|
|
||||||
|
/* CUSOLVERRF set and get numeric properties */
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfSetNumericProperties(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
double zero,
|
||||||
|
double boost);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericProperties(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
double* zero,
|
||||||
|
double* boost);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericBoostReport(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfNumericBoostReport_t* report);
|
||||||
|
|
||||||
|
/* CUSOLVERRF choose the triangular solve algorithm */
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfSetAlgs(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfFactorization_t factAlg,
|
||||||
|
cusolverRfTriangularSolve_t solveAlg);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfGetAlgs(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfFactorization_t* factAlg,
|
||||||
|
cusolverRfTriangularSolve_t* solveAlg);
|
||||||
|
|
||||||
|
/* CUSOLVERRF set and get fast mode */
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfGetResetValuesFastMode(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfResetValuesFastMode_t* fastMode);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfSetResetValuesFastMode(
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
cusolverRfResetValuesFastMode_t fastMode);
|
||||||
|
|
||||||
|
/*** Non-Batched Routines ***/
|
||||||
|
/* CUSOLVERRF setup of internal structures from host or device memory */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfSetupHost(/* Input (in the host memory) */
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
int* h_csrRowPtrA,
|
||||||
|
int* h_csrColIndA,
|
||||||
|
double* h_csrValA,
|
||||||
|
int nnzL,
|
||||||
|
int* h_csrRowPtrL,
|
||||||
|
int* h_csrColIndL,
|
||||||
|
double* h_csrValL,
|
||||||
|
int nnzU,
|
||||||
|
int* h_csrRowPtrU,
|
||||||
|
int* h_csrColIndU,
|
||||||
|
double* h_csrValU,
|
||||||
|
int* h_P,
|
||||||
|
int* h_Q,
|
||||||
|
/* Output */
|
||||||
|
cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfSetupDevice(/* Input (in the device memory) */
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
int* csrRowPtrA,
|
||||||
|
int* csrColIndA,
|
||||||
|
double* csrValA,
|
||||||
|
int nnzL,
|
||||||
|
int* csrRowPtrL,
|
||||||
|
int* csrColIndL,
|
||||||
|
double* csrValL,
|
||||||
|
int nnzU,
|
||||||
|
int* csrRowPtrU,
|
||||||
|
int* csrColIndU,
|
||||||
|
double* csrValU,
|
||||||
|
int* P,
|
||||||
|
int* Q,
|
||||||
|
/* Output */
|
||||||
|
cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF update the matrix values (assuming the reordering, pivoting
|
||||||
|
and consequently the sparsity pattern of L and U did not change),
|
||||||
|
and zero out the remaining values. */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfResetValues(/* Input (in the device memory) */
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
int* csrRowPtrA,
|
||||||
|
int* csrColIndA,
|
||||||
|
double* csrValA,
|
||||||
|
int* P,
|
||||||
|
int* Q,
|
||||||
|
/* Output */
|
||||||
|
cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF analysis (for parallelism) */
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfAnalyze(cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF re-factorization (for parallelism) */
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverRfRefactor(cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF extraction: Get L & U packed into a single matrix M */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfAccessBundledFactorsDevice(/* Input */
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
/* Output (in the host memory) */
|
||||||
|
int* nnzM,
|
||||||
|
/* Output (in the device memory) */
|
||||||
|
int** Mp,
|
||||||
|
int** Mi,
|
||||||
|
double** Mx);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfExtractBundledFactorsHost(/* Input */
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
/* Output (in the host memory) */
|
||||||
|
int* h_nnzM,
|
||||||
|
int** h_Mp,
|
||||||
|
int** h_Mi,
|
||||||
|
double** h_Mx);
|
||||||
|
|
||||||
|
/* CUSOLVERRF extraction: Get L & U individually */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfExtractSplitFactorsHost(/* Input */
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
/* Output (in the host memory) */
|
||||||
|
int* h_nnzL,
|
||||||
|
int** h_csrRowPtrL,
|
||||||
|
int** h_csrColIndL,
|
||||||
|
double** h_csrValL,
|
||||||
|
int* h_nnzU,
|
||||||
|
int** h_csrRowPtrU,
|
||||||
|
int** h_csrColIndU,
|
||||||
|
double** h_csrValU);
|
||||||
|
|
||||||
|
/* CUSOLVERRF (forward and backward triangular) solves */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfSolve(/* Input (in the device memory) */
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
int* P,
|
||||||
|
int* Q,
|
||||||
|
int nrhs, // only nrhs=1 is supported
|
||||||
|
double* Temp, // of size ldt*nrhs (ldt>=n)
|
||||||
|
int ldt,
|
||||||
|
/* Input/Output (in the device memory) */
|
||||||
|
double* XF,
|
||||||
|
/* Input */
|
||||||
|
int ldxf);
|
||||||
|
|
||||||
|
/*** Batched Routines ***/
|
||||||
|
/* CUSOLVERRF-batch setup of internal structures from host */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfBatchSetupHost(/* Input (in the host memory)*/
|
||||||
|
int batchSize,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
int* h_csrRowPtrA,
|
||||||
|
int* h_csrColIndA,
|
||||||
|
double* h_csrValA_array[],
|
||||||
|
int nnzL,
|
||||||
|
int* h_csrRowPtrL,
|
||||||
|
int* h_csrColIndL,
|
||||||
|
double* h_csrValL,
|
||||||
|
int nnzU,
|
||||||
|
int* h_csrRowPtrU,
|
||||||
|
int* h_csrColIndU,
|
||||||
|
double* h_csrValU,
|
||||||
|
int* h_P,
|
||||||
|
int* h_Q,
|
||||||
|
/* Output (in the device memory) */
|
||||||
|
cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF-batch update the matrix values (assuming the reordering,
|
||||||
|
pivoting and consequently the sparsity pattern of L and U did not change),
|
||||||
|
and zero out the remaining values. */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfBatchResetValues(/* Input (in the device memory) */
|
||||||
|
int batchSize,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
int* csrRowPtrA,
|
||||||
|
int* csrColIndA,
|
||||||
|
double* csrValA_array[],
|
||||||
|
int* P,
|
||||||
|
int* Q,
|
||||||
|
/* Output */
|
||||||
|
cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF-batch analysis (for parallelism) */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfBatchAnalyze(cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF-batch re-factorization (for parallelism) */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfBatchRefactor(cusolverRfHandle_t handle);
|
||||||
|
|
||||||
|
/* CUSOLVERRF-batch (forward and backward triangular) solves */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfBatchSolve(/* Input (in the device memory) */
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
int* P,
|
||||||
|
int* Q,
|
||||||
|
int nrhs, // only nrhs=1 is supported
|
||||||
|
double* Temp, // of size 2*batchSize*(n*nrhs)
|
||||||
|
int ldt, // only ldt=n is supported
|
||||||
|
/* Input/Output (in the device memory) */
|
||||||
|
double* XF_array[],
|
||||||
|
/* Input */
|
||||||
|
int ldxf);
|
||||||
|
|
||||||
|
/* CUSOLVERRF-batch obtain the position of zero pivot */
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverRfBatchZeroPivot(/* Input */
|
||||||
|
cusolverRfHandle_t handle,
|
||||||
|
/* Output (in the host memory) */
|
||||||
|
int* position);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif /* CUSOLVERRF_H_ */
|
||||||
923
external/cusolverSp.h
vendored
Normal file
923
external/cusolverSp.h
vendored
Normal file
@ -0,0 +1,923 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(CUSOLVERSP_H_)
|
||||||
|
#define CUSOLVERSP_H_
|
||||||
|
|
||||||
|
#include "cusparse.h"
|
||||||
|
#include "cublas_v2.h"
|
||||||
|
#include "cusolver_common.h"
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
struct cusolverSpContext;
|
||||||
|
typedef struct cusolverSpContext *cusolverSpHandle_t;
|
||||||
|
|
||||||
|
struct csrqrInfo;
|
||||||
|
typedef struct csrqrInfo *csrqrInfo_t;
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCreate(cusolverSpHandle_t *handle);
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDestroy(cusolverSpHandle_t handle);
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverSpSetStream(cusolverSpHandle_t handle, cudaStream_t streamId);
|
||||||
|
cusolverStatus_t CUSOLVERAPI
|
||||||
|
cusolverSpGetStream(cusolverSpHandle_t handle, cudaStream_t *streamId);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrissymHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrEndPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * issym);
|
||||||
|
|
||||||
|
/* -------- GPU linear solver by LU factorization
|
||||||
|
* solve A*x = b, A can be singular
|
||||||
|
* [ls] stands for linear solve
|
||||||
|
* [v] stands for vector
|
||||||
|
* [lu] stands for LU factorization
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvluHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const float * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
float * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvluHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const double * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
double * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvluHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuComplex * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
cuComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvluHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
cuDoubleComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
/* -------- GPU linear solver by QR factorization
|
||||||
|
* solve A*x = b, A can be singular
|
||||||
|
* [ls] stands for linear solve
|
||||||
|
* [v] stands for vector
|
||||||
|
* [qr] stands for QR factorization
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvqr(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const float * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
float * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvqr(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const double * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
double * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvqr(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const cuComplex * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
cuComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvqr(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
cuDoubleComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
/* -------- CPU linear solver by QR factorization
|
||||||
|
* solve A*x = b, A can be singular
|
||||||
|
* [ls] stands for linear solve
|
||||||
|
* [v] stands for vector
|
||||||
|
* [qr] stands for QR factorization
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const float * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
float * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const double * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
double * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuComplex * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
cuComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
cuDoubleComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
/* -------- CPU linear solver by Cholesky factorization
|
||||||
|
* solve A*x = b, A can be singular
|
||||||
|
* [ls] stands for linear solve
|
||||||
|
* [v] stands for vector
|
||||||
|
* [chol] stands for Cholesky factorization
|
||||||
|
*
|
||||||
|
* Only works for symmetric positive definite matrix.
|
||||||
|
* The upper part of A is ignored.
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvcholHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const float * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
float * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvcholHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const double * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
double * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvcholHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const cuComplex * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
cuComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvcholHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
cuDoubleComplex * x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
/* -------- GPU linear solver by Cholesky factorization
|
||||||
|
* solve A*x = b, A can be singular
|
||||||
|
* [ls] stands for linear solve
|
||||||
|
* [v] stands for vector
|
||||||
|
* [chol] stands for Cholesky factorization
|
||||||
|
*
|
||||||
|
* Only works for symmetric positive definite matrix.
|
||||||
|
* The upper part of A is ignored.
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvchol(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const float * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
// output
|
||||||
|
float *x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvchol(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const double * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
// output
|
||||||
|
double *x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvchol(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const cuComplex * b,
|
||||||
|
float tol,
|
||||||
|
int reorder,
|
||||||
|
// output
|
||||||
|
cuComplex *x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvchol(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
double tol,
|
||||||
|
int reorder,
|
||||||
|
// output
|
||||||
|
cuDoubleComplex *x,
|
||||||
|
int * singularity);
|
||||||
|
|
||||||
|
/* ----------- CPU least square solver by QR factorization
|
||||||
|
* solve min|b - A*x|
|
||||||
|
* [lsq] stands for least square
|
||||||
|
* [v] stands for vector
|
||||||
|
* [qr] stands for QR factorization
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsqvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const float * b,
|
||||||
|
float tol,
|
||||||
|
int * rankA,
|
||||||
|
float * x,
|
||||||
|
int * p,
|
||||||
|
float * min_norm);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsqvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const double * b,
|
||||||
|
double tol,
|
||||||
|
int * rankA,
|
||||||
|
double * x,
|
||||||
|
int * p,
|
||||||
|
double * min_norm);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsqvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuComplex * b,
|
||||||
|
float tol,
|
||||||
|
int * rankA,
|
||||||
|
cuComplex * x,
|
||||||
|
int * p,
|
||||||
|
float * min_norm);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsqvqrHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
double tol,
|
||||||
|
int * rankA,
|
||||||
|
cuDoubleComplex * x,
|
||||||
|
int * p,
|
||||||
|
double * min_norm);
|
||||||
|
|
||||||
|
/* --------- CPU eigenvalue solver by shift inverse
|
||||||
|
* solve A*x = lambda * x
|
||||||
|
* where lambda is the eigenvalue nearest mu0.
|
||||||
|
* [eig] stands for eigenvalue solver
|
||||||
|
* [si] stands for shift-inverse
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsreigvsiHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
float mu0,
|
||||||
|
const float * x0,
|
||||||
|
int maxite,
|
||||||
|
float tol,
|
||||||
|
float * mu,
|
||||||
|
float * x);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsreigvsiHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
double mu0,
|
||||||
|
const double * x0,
|
||||||
|
int maxite,
|
||||||
|
double tol,
|
||||||
|
double * mu,
|
||||||
|
double * x);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsreigvsiHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuComplex mu0,
|
||||||
|
const cuComplex * x0,
|
||||||
|
int maxite,
|
||||||
|
float tol,
|
||||||
|
cuComplex * mu,
|
||||||
|
cuComplex * x);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsreigvsiHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuDoubleComplex mu0,
|
||||||
|
const cuDoubleComplex * x0,
|
||||||
|
int maxite,
|
||||||
|
double tol,
|
||||||
|
cuDoubleComplex * mu,
|
||||||
|
cuDoubleComplex * x);
|
||||||
|
|
||||||
|
/* --------- GPU eigenvalue solver by shift inverse
|
||||||
|
* solve A*x = lambda * x
|
||||||
|
* where lambda is the eigenvalue nearest mu0.
|
||||||
|
* [eig] stands for eigenvalue solver
|
||||||
|
* [si] stands for shift-inverse
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsreigvsi(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
float mu0,
|
||||||
|
const float * x0,
|
||||||
|
int maxite,
|
||||||
|
float eps,
|
||||||
|
float * mu,
|
||||||
|
float * x);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsreigvsi(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
double mu0,
|
||||||
|
const double * x0,
|
||||||
|
int maxite,
|
||||||
|
double eps,
|
||||||
|
double * mu,
|
||||||
|
double * x);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsreigvsi(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuComplex mu0,
|
||||||
|
const cuComplex * x0,
|
||||||
|
int maxite,
|
||||||
|
float eps,
|
||||||
|
cuComplex * mu,
|
||||||
|
cuComplex * x);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsreigvsi(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuDoubleComplex mu0,
|
||||||
|
const cuDoubleComplex * x0,
|
||||||
|
int maxite,
|
||||||
|
double eps,
|
||||||
|
cuDoubleComplex * mu,
|
||||||
|
cuDoubleComplex * x);
|
||||||
|
|
||||||
|
// ----------- enclosed eigenvalues
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsreigsHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuComplex left_bottom_corner,
|
||||||
|
cuComplex right_upper_corner,
|
||||||
|
int * num_eigs);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsreigsHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuDoubleComplex left_bottom_corner,
|
||||||
|
cuDoubleComplex right_upper_corner,
|
||||||
|
int * num_eigs);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsreigsHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuComplex left_bottom_corner,
|
||||||
|
cuComplex right_upper_corner,
|
||||||
|
int * num_eigs);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsreigsHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
cuDoubleComplex left_bottom_corner,
|
||||||
|
cuDoubleComplex right_upper_corner,
|
||||||
|
int * num_eigs);
|
||||||
|
|
||||||
|
/* --------- CPU symrcm
|
||||||
|
* Symmetric reverse Cuthill McKee permutation
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrsymrcmHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * p);
|
||||||
|
|
||||||
|
/* --------- CPU symmdq
|
||||||
|
* Symmetric minimum degree algorithm by quotient graph
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrsymmdqHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * p);
|
||||||
|
|
||||||
|
/* --------- CPU symmdq
|
||||||
|
* Symmetric Approximate minimum degree algorithm by quotient graph
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrsymamdHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * p);
|
||||||
|
|
||||||
|
/* --------- CPU metis
|
||||||
|
* symmetric reordering
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrmetisndHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const int64_t * options,
|
||||||
|
int * p);
|
||||||
|
|
||||||
|
/* --------- CPU zfd
|
||||||
|
* Zero free diagonal reordering
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrzfdHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * P,
|
||||||
|
int * numnz);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrzfdHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * P,
|
||||||
|
int * numnz);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrzfdHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * P,
|
||||||
|
int * numnz);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrzfdHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
int * P,
|
||||||
|
int * numnz);
|
||||||
|
|
||||||
|
/* --------- CPU permuation
|
||||||
|
* P*A*Q^T
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrperm_bufferSizeHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const int * p,
|
||||||
|
const int * q,
|
||||||
|
size_t * bufferSizeInBytes);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrpermHost(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
int * csrRowPtrA,
|
||||||
|
int * csrColIndA,
|
||||||
|
const int * p,
|
||||||
|
const int * q,
|
||||||
|
int * map,
|
||||||
|
void * pBuffer);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Low-level API: Batched QR
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCreateCsrqrInfo(csrqrInfo_t *info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDestroyCsrqrInfo(csrqrInfo_t info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrqrAnalysisBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnzA,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
csrqrInfo_t info);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrBufferInfoBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
size_t * internalDataInBytes,
|
||||||
|
size_t * workspaceInBytes);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrBufferInfoBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
size_t * internalDataInBytes,
|
||||||
|
size_t * workspaceInBytes);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrBufferInfoBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
size_t * internalDataInBytes,
|
||||||
|
size_t * workspaceInBytes);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrBufferInfoBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrVal,
|
||||||
|
const int * csrRowPtr,
|
||||||
|
const int * csrColInd,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
size_t * internalDataInBytes,
|
||||||
|
size_t * workspaceInBytes);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrsvBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const float * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const float * b,
|
||||||
|
float * x,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
void * pBuffer);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrsvBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const double * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const double * b,
|
||||||
|
double * x,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
void * pBuffer);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrsvBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuComplex * b,
|
||||||
|
cuComplex * x,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
void * pBuffer);
|
||||||
|
|
||||||
|
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrsvBatched(
|
||||||
|
cusolverSpHandle_t handle,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int nnz,
|
||||||
|
const cusparseMatDescr_t descrA,
|
||||||
|
const cuDoubleComplex * csrValA,
|
||||||
|
const int * csrRowPtrA,
|
||||||
|
const int * csrColIndA,
|
||||||
|
const cuDoubleComplex * b,
|
||||||
|
cuDoubleComplex * x,
|
||||||
|
int batchSize,
|
||||||
|
csrqrInfo_t info,
|
||||||
|
void * pBuffer);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif // define CUSOLVERSP_H_
|
||||||
1107
external/cusolverSp_LOWLEVEL_PREVIEW.h
vendored
Normal file
1107
external/cusolverSp_LOWLEVEL_PREVIEW.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,28 +1,174 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "../external/cusolver_common.h"
|
#include <windows.h>
|
||||||
#define no_name_mangle extern "C"
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
|
||||||
__global__ void hello_from_GPU() {
|
#include "../external/cusolver_common.h"
|
||||||
printf("HELLO FROM GPUUUU");
|
#include "../external/cusolverDn.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
# define no_name_mangle extern "C"
|
||||||
|
#else
|
||||||
|
# define no_name_mangle
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define function static
|
||||||
|
|
||||||
|
#define GLOBAL_LOG_BUFFER_SIZE 4096*4096
|
||||||
|
|
||||||
|
__device__ int last_index;
|
||||||
|
__device__ char *device_log_buffer;
|
||||||
|
char *log_buffer;
|
||||||
|
|
||||||
|
#define D_LOG(msg) d_debug_printf((msg))
|
||||||
|
//#define D_LOGF(msg, ...) d_debug_printfv((msg), __VA_ARGS__)
|
||||||
|
#define H_LOG(msg) h_debug_printf((msg))
|
||||||
|
#define H_LOGF(msg, ...) h_debug_printfv((msg), __VA_ARGS__)
|
||||||
|
|
||||||
|
__host__ function void h_debug_printf(const char *msg);
|
||||||
|
__host__ function void h_debug_printfv(const char *fmt, ...);
|
||||||
|
__device__ function void d_debug_printf(const char *msg);
|
||||||
|
//__device__ function void d_debug_printfv(const char *fmt, ...);
|
||||||
|
|
||||||
|
__global__ void init_device_logging()
|
||||||
|
{
|
||||||
|
last_index = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void check_cusolver_version() {
|
|
||||||
|
__host__ function void
|
||||||
|
h_debug_printf(const char* msg)
|
||||||
|
{
|
||||||
|
OutputDebugStringA(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ function void
|
||||||
|
h_debug_printfv(const char *fmt, ...)
|
||||||
|
{
|
||||||
|
char buffer[2048] = {0};
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
vsnprintf(&buffer[0], sizeof(buffer), fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
OutputDebugStringA(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ function void
|
||||||
|
d_debug_printf(const char *msg) {
|
||||||
|
int i = 0;
|
||||||
|
while(msg[i] != '\0')
|
||||||
|
{
|
||||||
|
device_log_buffer[last_index + i] = msg[i];
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
last_index = last_index + 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__global__ void hello_from_GPU()
|
||||||
|
{
|
||||||
|
|
||||||
|
D_LOG("HELLO FROM GPUUUU\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
check_cusolver_version()
|
||||||
|
{
|
||||||
|
|
||||||
int major=-1, minor=-1, patch=-1;
|
int major=-1, minor=-1, patch=-1;
|
||||||
cusolverGetProperty(MAJOR_VERSION, &major);
|
cusolverGetProperty(MAJOR_VERSION, &major);
|
||||||
cusolverGetProperty(MINOR_VERSION, &minor);
|
cusolverGetProperty(MINOR_VERSION, &minor);
|
||||||
cusolverGetProperty(PATCH_LEVEL, &patch);
|
cusolverGetProperty(PATCH_LEVEL, &patch);
|
||||||
printf("CUSOLVER Version (Major,Minor,PatchLevel): %d.%d.%d\n", major,minor,patch);
|
H_LOGF("CUSOLVER Version (Major,Minor,PatchLevel): %d.%d.%d\n", major,minor,patch);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
no_name_mangle void cuda_entry_point() {
|
no_name_mangle void
|
||||||
|
cuda_entry_point()
|
||||||
|
{
|
||||||
|
H_LOG("\n\n-- Entered CUDA code:\n\n");
|
||||||
|
char *temp_device_log_buffer;
|
||||||
|
cudaMalloc((void**)&temp_device_log_buffer, GLOBAL_LOG_BUFFER_SIZE * sizeof(char));
|
||||||
|
cudaMemcpyToSymbol(device_log_buffer, &temp_device_log_buffer, sizeof(char *));
|
||||||
check_cusolver_version();
|
check_cusolver_version();
|
||||||
|
|
||||||
hello_from_GPU<<<1, 1>>>();
|
init_device_logging<<<1, 1>>>();
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
hello_from_GPU<<<1, 1>>>();
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
log_buffer = (char *)malloc(GLOBAL_LOG_BUFFER_SIZE * sizeof(char));
|
||||||
|
cudaMemcpy(log_buffer, temp_device_log_buffer, GLOBAL_LOG_BUFFER_SIZE * sizeof(char), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
// Print all messages from cuda.
|
||||||
|
H_LOG(log_buffer);
|
||||||
|
|
||||||
|
H_LOG("\n\n Testing cuSolver Eigenvalue problem \n\n");
|
||||||
|
|
||||||
|
const int size = 5;
|
||||||
|
const int mat_size = size*size;
|
||||||
|
double A[mat_size] = {
|
||||||
|
6.39, 0.13, -8.23, 5.71, -3.18,
|
||||||
|
0.13, 8.37, -4.46, -6.10, 7.21,
|
||||||
|
-8.23, -4.46, -9.58, -9.25, -7.42,
|
||||||
|
5.71, -6.10, -9.25, 3.72, 8.54,
|
||||||
|
-3.18, 7.21, -7.42, 8.54, 2.51};
|
||||||
|
|
||||||
|
double *device_mat;
|
||||||
|
cudaMalloc((void**)&device_mat, mat_size * sizeof(double));
|
||||||
|
cudaMemcpy(device_mat, A, mat_size * sizeof(double), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cusolverDnHandle_t cusolverH = 0;
|
||||||
|
cusolverDnCreate(&cusolverH);
|
||||||
|
|
||||||
|
double W[size];
|
||||||
|
double *d_W;
|
||||||
|
cudaMalloc((void**)&d_W, size * sizeof(double));
|
||||||
|
|
||||||
|
int lwork;
|
||||||
|
cusolverDnDsyevd_bufferSize(
|
||||||
|
cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_LOWER,
|
||||||
|
size, device_mat, size, d_W, &lwork
|
||||||
|
);
|
||||||
|
|
||||||
|
double *d_work;
|
||||||
|
cudaMalloc((void**)&d_work, lwork * sizeof(double));
|
||||||
|
|
||||||
|
int *device_info;
|
||||||
|
cudaMalloc((void**)&device_info, sizeof(int));
|
||||||
|
|
||||||
|
cusolverDnDsyevd(
|
||||||
|
cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_LOWER,
|
||||||
|
size, device_mat, size, d_W, d_work, lwork, device_info
|
||||||
|
);
|
||||||
|
|
||||||
|
double out_mat[mat_size];
|
||||||
|
cudaMemcpy(W, d_W, size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||||
|
cudaMemcpy(out_mat, device_mat, mat_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
H_LOG("Results on GPU dsyevd:\n");
|
||||||
|
H_LOG("Eigenvalues:\n");
|
||||||
|
for(int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
H_LOGF("%6.2f\n", W[i]);
|
||||||
|
}
|
||||||
|
H_LOG("\n");
|
||||||
|
H_LOG("Eigenvectors (columnwise):\n");
|
||||||
|
for(int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < size; j++)
|
||||||
|
{
|
||||||
|
H_LOGF("%6.2f ", out_mat[j * size + i]);
|
||||||
|
}
|
||||||
|
H_LOG("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cudaFree(temp_device_log_buffer);
|
||||||
}
|
}
|
||||||
|
|||||||
35
src/main.c
35
src/main.c
@ -14,7 +14,7 @@
|
|||||||
#include "base/base_inc.c"
|
#include "base/base_inc.c"
|
||||||
#include "os/os_inc.c"
|
#include "os/os_inc.c"
|
||||||
#include "os/os_entry_point.c"
|
#include "os/os_entry_point.c"
|
||||||
|
#include "test_mkl.c"
|
||||||
|
|
||||||
#define grid_file_path_bin "D:\\dev\\eigsol_gpu\\out\\grid.bin"
|
#define grid_file_path_bin "D:\\dev\\eigsol_gpu\\out\\grid.bin"
|
||||||
#define grid_file_path "D:\\dev\\eigsol_gpu\\out\\grid.dat"
|
#define grid_file_path "D:\\dev\\eigsol_gpu\\out\\grid.dat"
|
||||||
@ -53,8 +53,6 @@ struct BSplineCtx
|
|||||||
F64 *bsplines;
|
F64 *bsplines;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//~ Globals
|
//~ Globals
|
||||||
global Grid g_grid = {0};
|
global Grid g_grid = {0};
|
||||||
global BSplineCtx g_bspline_ctx = {0};
|
global BSplineCtx g_bspline_ctx = {0};
|
||||||
@ -205,11 +203,11 @@ set_up_bspline_context(Arena* arena)
|
|||||||
{
|
{
|
||||||
// Create knotpoint sequence.
|
// Create knotpoint sequence.
|
||||||
U32 k = 4;
|
U32 k = 4;
|
||||||
U32 N = 14;
|
U32 bspl_N = 14;
|
||||||
g_bspline_ctx.order = k;
|
g_bspline_ctx.order = k;
|
||||||
g_bspline_ctx.num_knotpoints = N;
|
g_bspline_ctx.num_knotpoints = bspl_N;
|
||||||
g_bspline_ctx.num_bsplines = N-k;
|
g_bspline_ctx.num_bsplines = bspl_N-k;
|
||||||
g_bspline_ctx.num_phys_points = N-(2*k)+2; // Remove k points at each end, and then add back the first and last points.
|
g_bspline_ctx.num_phys_points = bspl_N-(2*k)+2; // Remove k points at each end, and then add back the first and last points.
|
||||||
g_bspline_ctx.arena = arena;
|
g_bspline_ctx.arena = arena;
|
||||||
g_bspline_ctx.knotpoints = PushArray(arena, F64, g_bspline_ctx.num_knotpoints);
|
g_bspline_ctx.knotpoints = PushArray(arena, F64, g_bspline_ctx.num_knotpoints);
|
||||||
// Set up physical points;
|
// Set up physical points;
|
||||||
@ -308,36 +306,37 @@ write_bsplines_to_matrix_F64(Arena *arena)
|
|||||||
str8_list_push(scratch.arena, &bspline_array_list, row_joined);
|
str8_list_push(scratch.arena, &bspline_array_list, row_joined);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
write_string_list_to_file(scratch.arena, str8_lit(bspline_array_file_path), &bspline_array_list);
|
write_string_list_to_file(scratch.arena, str8_lit(bspline_array_file_path), &bspline_array_list);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function
|
function
|
||||||
void EntryPoint()
|
void EntryPoint(void)
|
||||||
{
|
{
|
||||||
OS_InitReceipt os_receipt = OS_init();
|
OS_InitReceipt os_receipt = OS_init();
|
||||||
OS_InitGfxReceipt os_gfx_receipt = OS_gfx_init(os_receipt);
|
OS_InitGfxReceipt os_gfx_receipt = OS_gfx_init(os_receipt);
|
||||||
|
|
||||||
Arena *arena = m_make_arena();
|
Arena *arena = m_make_arena();
|
||||||
|
|
||||||
|
//- Set up grid and write to file.
|
||||||
set_up_grid(arena);
|
set_up_grid(arena);
|
||||||
|
|
||||||
write_array_binary_F64(str8_lit(grid_file_path_bin), g_grid.points, g_grid.num_steps);
|
write_array_binary_F64(str8_lit(grid_file_path_bin), g_grid.points, g_grid.num_steps);
|
||||||
write_array_F64(str8_lit(grid_file_path), g_grid.points, g_grid.num_steps, "%13.6e\n");
|
write_array_F64(str8_lit(grid_file_path), g_grid.points, g_grid.num_steps, "%13.6e\n");
|
||||||
|
|
||||||
|
//- The BSpline context is the knotpoints and the BSpline order etc.
|
||||||
set_up_bspline_context(arena);
|
set_up_bspline_context(arena);
|
||||||
|
write_array_F64(str8_lit(knotpoints_file_path),
|
||||||
|
g_bspline_ctx.knotpoints, g_bspline_ctx.num_knotpoints,
|
||||||
|
"%13.6e\n");
|
||||||
|
|
||||||
|
//- Then we generate the BSplines and save them off for reference and debugging.
|
||||||
write_array_F64(str8_lit(knotpoints_file_path), g_bspline_ctx.knotpoints, g_bspline_ctx.num_knotpoints, "%13.6e\n");
|
|
||||||
|
|
||||||
write_bsplines_to_matrix_F64(arena);
|
write_bsplines_to_matrix_F64(arena);
|
||||||
|
|
||||||
|
//- Test MKL
|
||||||
|
test_mkl_zgeev();
|
||||||
|
test_mkl_dsyevd();
|
||||||
|
cuda_entry_point();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
272
src/test_mkl.c
Normal file
272
src/test_mkl.c
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright 2009-2021 Intel Corporation.
|
||||||
|
*
|
||||||
|
* This software and the related documents are Intel copyrighted materials, and
|
||||||
|
* your use of them is governed by the express license under which they were
|
||||||
|
* provided to you (License). Unless the License provides otherwise, you may not
|
||||||
|
* use, modify, copy, publish, distribute, disclose or transmit this software or
|
||||||
|
* the related documents without Intel's prior written permission.
|
||||||
|
*
|
||||||
|
* This software and the related documents are provided as is, with no express
|
||||||
|
* or implied warranties, other than those that are expressly stated in the
|
||||||
|
* License.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
ZGEEV Example.
|
||||||
|
==============
|
||||||
|
|
||||||
|
Program computes the eigenvalues and left and right eigenvectors of a general
|
||||||
|
rectangular matrix A:
|
||||||
|
|
||||||
|
( -3.84, 2.25) ( -8.94, -4.75) ( 8.95, -6.53) ( -9.87, 4.82)
|
||||||
|
( -0.66, 0.83) ( -4.40, -3.82) ( -3.50, -4.26) ( -3.15, 7.36)
|
||||||
|
( -3.99, -4.73) ( -5.88, -6.60) ( -3.36, -0.40) ( -0.75, 5.23)
|
||||||
|
( 7.74, 4.18) ( 3.66, -7.53) ( 2.58, 3.60) ( 4.59, 5.41)
|
||||||
|
|
||||||
|
Description.
|
||||||
|
============
|
||||||
|
|
||||||
|
The routine computes for an n-by-n complex nonsymmetric matrix A, the
|
||||||
|
eigenvalues and, optionally, the left and/or right eigenvectors. The right
|
||||||
|
eigenvector v(j) of A satisfies
|
||||||
|
|
||||||
|
A*v(j)= lambda(j)*v(j)
|
||||||
|
|
||||||
|
where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies
|
||||||
|
|
||||||
|
u(j)H*A = lambda(j)*u(j)H
|
||||||
|
|
||||||
|
where u(j)H denotes the conjugate transpose of u(j). The computed
|
||||||
|
eigenvectors are normalized to have Euclidean norm equal to 1 and
|
||||||
|
largest component real.
|
||||||
|
|
||||||
|
Example Program Results.
|
||||||
|
========================
|
||||||
|
|
||||||
|
ZGEEV Example Program Results
|
||||||
|
|
||||||
|
Eigenvalues
|
||||||
|
( -9.43,-12.98) ( -3.44, 12.69) ( 0.11, -3.40) ( 5.76, 7.13)
|
||||||
|
|
||||||
|
Left eigenvectors
|
||||||
|
( 0.24, -0.18) ( 0.61, 0.00) ( -0.18, -0.33) ( 0.28, 0.09)
|
||||||
|
( 0.79, 0.00) ( -0.05, -0.27) ( 0.82, 0.00) ( -0.55, 0.16)
|
||||||
|
( 0.22, -0.27) ( -0.21, 0.53) ( -0.37, 0.15) ( 0.45, 0.09)
|
||||||
|
( -0.02, 0.41) ( 0.40, -0.24) ( 0.06, 0.12) ( 0.62, 0.00)
|
||||||
|
|
||||||
|
Right eigenvectors
|
||||||
|
( 0.43, 0.33) ( 0.83, 0.00) ( 0.60, 0.00) ( -0.31, 0.03)
|
||||||
|
( 0.51, -0.03) ( 0.08, -0.25) ( -0.40, -0.20) ( 0.04, 0.34)
|
||||||
|
( 0.62, 0.00) ( -0.25, 0.28) ( -0.09, -0.48) ( 0.36, 0.06)
|
||||||
|
( -0.23, 0.11) ( -0.10, -0.32) ( -0.43, 0.13) ( 0.81, 0.00)
|
||||||
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <mkl.h>
|
||||||
|
|
||||||
|
/* Auxiliary routines prototypes */
|
||||||
|
function void
|
||||||
|
print_matrix_cmplx16( char* desc, int m, int n, MKL_Complex16* a, int lda );
|
||||||
|
|
||||||
|
#ifndef TEST_MKL_N
|
||||||
|
#define TEST_MKL_N 4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Main program */
|
||||||
|
static void
|
||||||
|
test_mkl_zgeev(void) {
|
||||||
|
/* Locals */
|
||||||
|
int N = TEST_MKL_N;
|
||||||
|
MKL_INT n = N, lda = N, ldvl = N, ldvr = N, info, lwork;
|
||||||
|
MKL_Complex16 wkopt;
|
||||||
|
MKL_Complex16* work;
|
||||||
|
/* Local arrays */
|
||||||
|
/* rwork dimension should be at least 2*n */
|
||||||
|
double rwork[2*TEST_MKL_N];
|
||||||
|
MKL_Complex16 w[TEST_MKL_N], vl[TEST_MKL_N*TEST_MKL_N], vr[TEST_MKL_N*TEST_MKL_N];
|
||||||
|
MKL_Complex16 a[TEST_MKL_N*TEST_MKL_N] = {
|
||||||
|
{-3.84, 2.25}, {-0.66, 0.83}, {-3.99, -4.73}, { 7.74, 4.18},
|
||||||
|
{-8.94, -4.75}, {-4.40, -3.82}, {-5.88, -6.60}, { 3.66, -7.53},
|
||||||
|
{ 8.95, -6.53}, {-3.50, -4.26}, {-3.36, -0.40}, { 2.58, 3.60},
|
||||||
|
{-9.87, 4.82}, {-3.15, 7.36}, {-0.75, 5.23}, { 4.59, 5.41}
|
||||||
|
};
|
||||||
|
/* Executable statements */
|
||||||
|
OutputDebugString( " ZGEEV Example Program Results\n" );
|
||||||
|
/* Query and allocate the optimal workspace */
|
||||||
|
lwork = -1;
|
||||||
|
zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
|
||||||
|
&wkopt, &lwork, rwork, &info );
|
||||||
|
lwork = (MKL_INT)wkopt.real;
|
||||||
|
work = (MKL_Complex16*)malloc( lwork*sizeof(MKL_Complex16) );
|
||||||
|
/* Solve eigenproblem */
|
||||||
|
zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
|
||||||
|
work, &lwork, rwork, &info );
|
||||||
|
/* Check for convergence */
|
||||||
|
if( info > 0 ) {
|
||||||
|
OutputDebugString( "The algorithm failed to compute eigenvalues.\n" );
|
||||||
|
exit( 1 );
|
||||||
|
}
|
||||||
|
/* Print eigenvalues */
|
||||||
|
print_matrix_cmplx16( "Eigenvalues", 1, n, w, 1 );
|
||||||
|
/* Print left eigenvectors */
|
||||||
|
print_matrix_cmplx16( "Left eigenvectors", n, n, vl, ldvl );
|
||||||
|
/* Print right eigenvectors */
|
||||||
|
print_matrix_cmplx16( "Right eigenvectors", n, n, vr, ldvr );
|
||||||
|
/* Free workspace */
|
||||||
|
free( (void*)work );
|
||||||
|
//exit( 0 );
|
||||||
|
} /* End of ZGEEV Example */
|
||||||
|
|
||||||
|
/* Auxiliary routine: printing a matrix */
|
||||||
|
function void
|
||||||
|
print_matrix_cmplx16( char* desc, int m, int n, MKL_Complex16* a, int lda ) {
|
||||||
|
ArenaTemp scratch = scratch_get(0,0);
|
||||||
|
int i, j;
|
||||||
|
String8 newline = str8_lit("\n");
|
||||||
|
String8 header = str8_pushf(scratch.arena, "\n %s\n", desc );
|
||||||
|
OutputDebugString(header.str);
|
||||||
|
//printf("\n %s \n", desc);
|
||||||
|
for( i = 0; i < m; i++ ) {
|
||||||
|
for( j = 0; j < n; j++ ) {
|
||||||
|
String8 outstr = str8_pushf(scratch.arena, " (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag );
|
||||||
|
OutputDebugString(outstr.str);
|
||||||
|
//printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag );
|
||||||
|
}
|
||||||
|
OutputDebugString(newline.str);
|
||||||
|
//printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright 2009-2021 Intel Corporation.
|
||||||
|
*
|
||||||
|
* This software and the related documents are Intel copyrighted materials, and
|
||||||
|
* your use of them is governed by the express license under which they were
|
||||||
|
* provided to you (License). Unless the License provides otherwise, you may not
|
||||||
|
* use, modify, copy, publish, distribute, disclose or transmit this software or
|
||||||
|
* the related documents without Intel's prior written permission.
|
||||||
|
*
|
||||||
|
* This software and the related documents are provided as is, with no express
|
||||||
|
* or implied warranties, other than those that are expressly stated in the
|
||||||
|
* License.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
DSYEVD Example.
|
||||||
|
==============
|
||||||
|
|
||||||
|
Program computes all eigenvalues and eigenvectors of a real symmetric
|
||||||
|
matrix A using divide and conquer algorithm, where A is:
|
||||||
|
|
||||||
|
6.39 0.13 -8.23 5.71 -3.18
|
||||||
|
0.13 8.37 -4.46 -6.10 7.21
|
||||||
|
-8.23 -4.46 -9.58 -9.25 -7.42
|
||||||
|
5.71 -6.10 -9.25 3.72 8.54
|
||||||
|
-3.18 7.21 -7.42 8.54 2.51
|
||||||
|
|
||||||
|
Description.
|
||||||
|
============
|
||||||
|
|
||||||
|
The routine computes all eigenvalues and, optionally, eigenvectors of an
|
||||||
|
n-by-n real symmetric matrix A. The eigenvector v(j) of A satisfies
|
||||||
|
|
||||||
|
A*v(j) = lambda(j)*v(j)
|
||||||
|
|
||||||
|
where lambda(j) is its eigenvalue. The computed eigenvectors are
|
||||||
|
orthonormal.
|
||||||
|
If the eigenvectors are requested, then this routine uses a divide and
|
||||||
|
conquer algorithm to compute eigenvalues and eigenvectors.
|
||||||
|
|
||||||
|
Example Program Results.
|
||||||
|
========================
|
||||||
|
|
||||||
|
DSYEVD Example Program Results
|
||||||
|
|
||||||
|
Eigenvalues
|
||||||
|
-17.44 -11.96 6.72 14.25 19.84
|
||||||
|
|
||||||
|
Eigenvectors (stored columnwise)
|
||||||
|
-0.26 0.31 -0.74 0.33 0.42
|
||||||
|
-0.17 -0.39 -0.38 -0.80 0.16
|
||||||
|
-0.89 0.04 0.09 0.03 -0.45
|
||||||
|
-0.29 -0.59 0.34 0.31 0.60
|
||||||
|
-0.19 0.63 0.44 -0.38 0.48
|
||||||
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <mkl.h>
|
||||||
|
|
||||||
|
/* Auxiliary routines prototypes */
|
||||||
|
extern void print_matrix( char* desc, int m, int n, double* a, int lda );
|
||||||
|
|
||||||
|
/* Parameters */
|
||||||
|
#define N 5
|
||||||
|
#define LDA N
|
||||||
|
|
||||||
|
/* Main program */
|
||||||
|
function void
|
||||||
|
test_mkl_dsyevd(void) {
|
||||||
|
/* Locals */
|
||||||
|
MKL_INT n = N, lda = LDA, info, lwork, liwork;
|
||||||
|
MKL_INT iwkopt;
|
||||||
|
MKL_INT* iwork;
|
||||||
|
double wkopt;
|
||||||
|
double* work;
|
||||||
|
/* Local arrays */
|
||||||
|
double w[N];
|
||||||
|
double a[LDA*N] = {
|
||||||
|
6.39, 0.00, 0.00, 0.00, 0.00,
|
||||||
|
0.13, 8.37, 0.00, 0.00, 0.00,
|
||||||
|
-8.23, -4.46, -9.58, 0.00, 0.00,
|
||||||
|
5.71, -6.10, -9.25, 3.72, 0.00,
|
||||||
|
-3.18, 7.21, -7.42, 8.54, 2.51
|
||||||
|
};
|
||||||
|
/* Executable statements */
|
||||||
|
OutputDebugString( " DSYEVD Example Program Results\n" );
|
||||||
|
/* Query and allocate the optimal workspace */
|
||||||
|
lwork = -1;
|
||||||
|
liwork = -1;
|
||||||
|
dsyevd( "Vectors", "Upper", &n, a, &lda, w, &wkopt, &lwork, &iwkopt,
|
||||||
|
&liwork, &info );
|
||||||
|
lwork = (MKL_INT)wkopt;
|
||||||
|
work = (double*)malloc( lwork*sizeof(double) );
|
||||||
|
liwork = iwkopt;
|
||||||
|
iwork = (MKL_INT*)malloc( liwork*sizeof(MKL_INT) );
|
||||||
|
/* Solve eigenproblem */
|
||||||
|
dsyevd( "Vectors", "Upper", &n, a, &lda, w, work, &lwork, iwork,
|
||||||
|
&liwork, &info );
|
||||||
|
/* Check for convergence */
|
||||||
|
if( info > 0 ) {
|
||||||
|
OutputDebugString( "The algorithm failed to compute eigenvalues.\n" );
|
||||||
|
exit( 1 );
|
||||||
|
}
|
||||||
|
/* Print eigenvalues */
|
||||||
|
print_matrix( "Eigenvalues", 1, n, w, 1 );
|
||||||
|
/* Print eigenvectors */
|
||||||
|
print_matrix( "Eigenvectors (stored columnwise)", n, n, a, lda );
|
||||||
|
/* Free workspace */
|
||||||
|
free( (void*)iwork );
|
||||||
|
free( (void*)work );
|
||||||
|
} /* End of DSYEVD Example */
|
||||||
|
|
||||||
|
/* Auxiliary routine: printing a matrix */
|
||||||
|
void print_matrix( char* desc, int m, int n, double* a, int lda ) {
|
||||||
|
ArenaTemp scratch = scratch_get(0, 0);
|
||||||
|
int i, j;
|
||||||
|
//printf( "\n %s\n", desc );
|
||||||
|
String8 header = str8_pushf(scratch.arena, "\n %s\n", desc);
|
||||||
|
OutputDebugString(header.str);
|
||||||
|
for( i = 0; i < m; i++ ) {
|
||||||
|
for( j = 0; j < n; j++ )
|
||||||
|
{
|
||||||
|
String8 out = str8_pushf(scratch.arena, " %6.2f", a[i+j*lda] );
|
||||||
|
OutputDebugString(out.str);
|
||||||
|
//printf( " %6.2f", a[i+j*lda] );
|
||||||
|
}
|
||||||
|
OutputDebugString(str8_lit("\n").str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
BIN
timeBuild.ctm
BIN
timeBuild.ctm
Binary file not shown.
Loading…
Reference in New Issue
Block a user