working dsyevd example on GPU with logging
This commit is contained in:
parent
879ae444a1
commit
48b67775f8
2
.gitignore
vendored
2
.gitignore
vendored
@ -94,3 +94,5 @@ dkms.conf
|
||||
*.out
|
||||
*.app
|
||||
|
||||
#visual studio
|
||||
.vs
|
||||
|
||||
25
build.bat
25
build.bat
@ -4,16 +4,31 @@ ctime -begin timeBuild.ctm
|
||||
|
||||
@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456
|
||||
@rem set CommonCompilerFlags="/nologo /Zi /FC"
|
||||
set CommonCompilerFlags=/nologo /Zi /FC
|
||||
set CommonCompilerFlags=/nologo /Zi /FC /Od
|
||||
@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456
|
||||
@rem
|
||||
@rem
|
||||
|
||||
set cuda_root=D:/lib/cudatoolkit/lib/x64
|
||||
|
||||
set mkl_root=D:/lib/oneAPI_mkl/mkl/2021.3.0
|
||||
set mkl_core=%mkl_root%/lib/intel64/mkl_core.lib
|
||||
set mkl_intel_lp64=%mkl_root%/lib/intel64/mkl_intel_lp64.lib
|
||||
set mkl_intel_thread=%mkl_root%/lib/intel64/mkl_intel_thread.lib
|
||||
set MKLCOMPILER=D:/lib/oneAPI_mkl/compiler/2021.3.0/windows/compiler
|
||||
set libiomp5md=%MKLCOMPILER%/lib/intel64_win/libiomp5md.lib
|
||||
|
||||
set CudaSources=../src/cuda_entry_point.cu
|
||||
set CudaObj=cuda_entry_point.obj
|
||||
set Sources=../src/main.c
|
||||
set CudaLib=D:/lib/cudatoolkit/lib/x64
|
||||
set CudaSolver=%CudaLib%/cusolver.lib
|
||||
|
||||
IF NOT EXIST .\build mkdir .\build
|
||||
pushd .\build
|
||||
cl %CommonCompilerFlags% %Sources% -Feprogram.exe
|
||||
@rem nvcc -Xcompiler %CommonCompilerFlags% -o program.exe %Sources% %CudaSources% -lcusolver
|
||||
rem cl /c %CommonCompilerFlags% %Sources% /I"%mkl_root%\include"
|
||||
rem FOR MSVC /link %mkl_core% %mkl_intel_lp64% %mkl_intel_thread% %libiomp5md%
|
||||
nvcc -c %CudaSources% -o %CudaObj% -g -G -lcusolver
|
||||
rem nvcc -o program.exe -lcusolver -L%mkl_root%/lib/intel64 -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -l%MKLCOMPILER%/lib/intel64_win/libiomp5md
|
||||
cl %CommonCompilerFlags% %Sources% /I"%mkl_root%\include" /link %CudaObj% %cuda_root%/cudart.lib %cuda_root%/cusolver.lib %mkl_core% %mkl_intel_lp64% %mkl_intel_thread% %libiomp5md%
|
||||
set LastError=%ERRORLEVEL%
|
||||
popd
|
||||
|
||||
|
||||
BIN
build/main.rdi
Normal file
BIN
build/main.rdi
Normal file
Binary file not shown.
Binary file not shown.
8
eigsol_gpu.sublime-project
Normal file
8
eigsol_gpu.sublime-project
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"folders":
|
||||
[
|
||||
{
|
||||
"path": "."
|
||||
}
|
||||
]
|
||||
}
|
||||
423
eigsol_gpu.sublime-workspace
Normal file
423
eigsol_gpu.sublime-workspace
Normal file
File diff suppressed because one or more lines are too long
4927
external/cusolverDn.h
vendored
Normal file
4927
external/cusolverDn.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
318
external/cusolverMg.h
vendored
Normal file
318
external/cusolverMg.h
vendored
Normal file
@ -0,0 +1,318 @@
|
||||
/*
|
||||
* Copyright 2019 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(CUSOLVERMG_H_)
|
||||
#define CUSOLVERMG_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "cusolverDn.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
struct cusolverMgContext;
|
||||
typedef struct cusolverMgContext *cusolverMgHandle_t;
|
||||
|
||||
/**
|
||||
* \beief This enum decides how 1D device Ids (or process ranks) get mapped to
|
||||
* a 2D grid.
|
||||
*/
|
||||
typedef enum {
|
||||
|
||||
CUDALIBMG_GRID_MAPPING_ROW_MAJOR = 1,
|
||||
CUDALIBMG_GRID_MAPPING_COL_MAJOR = 0
|
||||
|
||||
} cusolverMgGridMapping_t;
|
||||
|
||||
/** \brief Opaque structure of the distributed grid */
|
||||
typedef void *cudaLibMgGrid_t;
|
||||
/** \brief Opaque structure of the distributed matrix descriptor */
|
||||
typedef void *cudaLibMgMatrixDesc_t;
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgCreate(cusolverMgHandle_t *handle);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgDestroy(cusolverMgHandle_t handle);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgDeviceSelect(
|
||||
cusolverMgHandle_t handle,
|
||||
int nbDevices,
|
||||
int deviceId[]);
|
||||
|
||||
/**
|
||||
* \brief Allocates resources related to the shared memory device grid.
|
||||
* \param[out] grid the opaque data strcuture that holds the grid
|
||||
* \param[in] numRowDevices number of devices in the row
|
||||
* \param[in] numColDevices number of devices in the column
|
||||
* \param[in] deviceId This array of size height * width stores the
|
||||
* device-ids of the 2D grid; each entry must correspond to a valid
|
||||
* gpu or to -1 (denoting CPU). \param[in] mapping whether the 2D grid is in
|
||||
* row/column major \returns the status code
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgCreateDeviceGrid(
|
||||
cudaLibMgGrid_t * grid,
|
||||
int32_t numRowDevices,
|
||||
int32_t numColDevices,
|
||||
const int32_t deviceId[],
|
||||
cusolverMgGridMapping_t mapping);
|
||||
|
||||
/**
|
||||
* \brief Releases the allocated resources related to the distributed grid.
|
||||
* \param[in] grid the opaque data strcuture that holds the distributed grid
|
||||
* \returns the status code
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgDestroyGrid(cudaLibMgGrid_t grid);
|
||||
|
||||
/**
|
||||
* \brief Allocates resources related to the distributed matrix descriptor.
|
||||
* \param[out] desc the opaque data strcuture that holds the descriptor
|
||||
* \param[in] numRows number of total rows
|
||||
* \param[in] numCols number of total columns
|
||||
* \param[in] rowBlockSize row block size
|
||||
* \param[in] colBlockSize column block size
|
||||
* \param[in] dataType the data type of each element in cudaDataType
|
||||
* \param[in] grid the opaque data structure of the distributed grid
|
||||
* \returns the status code
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgCreateMatrixDesc(
|
||||
cudaLibMgMatrixDesc_t *desc,
|
||||
int64_t numRows,
|
||||
int64_t numCols,
|
||||
int64_t rowBlockSize,
|
||||
int64_t colBlockSize,
|
||||
cudaDataType dataType,
|
||||
const cudaLibMgGrid_t grid);
|
||||
|
||||
/**
|
||||
* \brief Releases the allocated resources related to the distributed matrix
|
||||
* descriptor. \param[in] desc the opaque data strcuture that holds the
|
||||
* descriptor \returns the status code
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverMgDestroyMatrixDesc(cudaLibMgMatrixDesc_t desc);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgSyevd_bufferSize(
|
||||
cusolverMgHandle_t handle,
|
||||
cusolverEigMode_t jobz,
|
||||
cublasFillMode_t uplo,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
void * W,
|
||||
cudaDataType dataTypeW,
|
||||
cudaDataType computeType,
|
||||
int64_t * lwork);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgSyevd(
|
||||
cusolverMgHandle_t handle,
|
||||
cusolverEigMode_t jobz,
|
||||
cublasFillMode_t uplo,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
void * W,
|
||||
cudaDataType dataTypeW,
|
||||
cudaDataType computeType,
|
||||
void * array_d_work[],
|
||||
int64_t lwork,
|
||||
int * info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgGetrf_bufferSize(
|
||||
cusolverMgHandle_t handle,
|
||||
int M,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
int * array_d_IPIV[],
|
||||
cudaDataType computeType,
|
||||
int64_t * lwork);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgGetrf(
|
||||
cusolverMgHandle_t handle,
|
||||
int M,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
int * array_d_IPIV[],
|
||||
cudaDataType computeType,
|
||||
void * array_d_work[],
|
||||
int64_t lwork,
|
||||
int * info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgGetrs_bufferSize(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasOperation_t TRANS,
|
||||
int N,
|
||||
int NRHS,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
int * array_d_IPIV[],
|
||||
void * array_d_B[],
|
||||
int IB,
|
||||
int JB,
|
||||
cudaLibMgMatrixDesc_t descrB,
|
||||
cudaDataType computeType,
|
||||
int64_t * lwork);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgGetrs(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasOperation_t TRANS,
|
||||
int N,
|
||||
int NRHS,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
int * array_d_IPIV[],
|
||||
void * array_d_B[],
|
||||
int IB,
|
||||
int JB,
|
||||
cudaLibMgMatrixDesc_t descrB,
|
||||
cudaDataType computeType,
|
||||
void * array_d_work[],
|
||||
int64_t lwork,
|
||||
int * info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgPotrf_bufferSize(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasFillMode_t uplo,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
cudaDataType computeType,
|
||||
int64_t * lwork);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgPotrf(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasFillMode_t uplo,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
cudaDataType computeType,
|
||||
void * array_d_work[],
|
||||
int64_t lwork,
|
||||
int * h_info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgPotrs_bufferSize(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasFillMode_t uplo,
|
||||
int n,
|
||||
int nrhs,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
void * array_d_B[],
|
||||
int IB,
|
||||
int JB,
|
||||
cudaLibMgMatrixDesc_t descrB,
|
||||
cudaDataType computeType,
|
||||
int64_t * lwork);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgPotrs(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasFillMode_t uplo,
|
||||
int n,
|
||||
int nrhs,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
void * array_d_B[],
|
||||
int IB,
|
||||
int JB,
|
||||
cudaLibMgMatrixDesc_t descrB,
|
||||
cudaDataType computeType,
|
||||
void * array_d_work[],
|
||||
int64_t lwork,
|
||||
int * h_info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgPotri_bufferSize(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasFillMode_t uplo,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
cudaDataType computeType,
|
||||
int64_t * lwork);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverMgPotri(
|
||||
cusolverMgHandle_t handle,
|
||||
cublasFillMode_t uplo,
|
||||
int N,
|
||||
void * array_d_A[],
|
||||
int IA,
|
||||
int JA,
|
||||
cudaLibMgMatrixDesc_t descrA,
|
||||
cudaDataType computeType,
|
||||
void * array_d_work[],
|
||||
int64_t lwork,
|
||||
int * h_info);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif // CUSOLVERMG_H_
|
||||
339
external/cusolverRf.h
vendored
Normal file
339
external/cusolverRf.h
vendored
Normal file
@ -0,0 +1,339 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(CUSOLVERRF_H_)
|
||||
#define CUSOLVERRF_H_
|
||||
|
||||
#include "driver_types.h"
|
||||
#include "cuComplex.h"
|
||||
#include "cusolver_common.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* CUSOLVERRF mode */
|
||||
typedef enum {
|
||||
CUSOLVERRF_RESET_VALUES_FAST_MODE_OFF = 0, // default
|
||||
CUSOLVERRF_RESET_VALUES_FAST_MODE_ON = 1
|
||||
} cusolverRfResetValuesFastMode_t;
|
||||
|
||||
/* CUSOLVERRF matrix format */
|
||||
typedef enum {
|
||||
CUSOLVERRF_MATRIX_FORMAT_CSR = 0, // default
|
||||
CUSOLVERRF_MATRIX_FORMAT_CSC = 1
|
||||
} cusolverRfMatrixFormat_t;
|
||||
|
||||
/* CUSOLVERRF unit diagonal */
|
||||
typedef enum {
|
||||
CUSOLVERRF_UNIT_DIAGONAL_STORED_L = 0, // default
|
||||
CUSOLVERRF_UNIT_DIAGONAL_STORED_U = 1,
|
||||
CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_L = 2,
|
||||
CUSOLVERRF_UNIT_DIAGONAL_ASSUMED_U = 3
|
||||
} cusolverRfUnitDiagonal_t;
|
||||
|
||||
/* CUSOLVERRF factorization algorithm */
|
||||
typedef enum {
|
||||
CUSOLVERRF_FACTORIZATION_ALG0 = 0, // default
|
||||
CUSOLVERRF_FACTORIZATION_ALG1 = 1,
|
||||
CUSOLVERRF_FACTORIZATION_ALG2 = 2,
|
||||
} cusolverRfFactorization_t;
|
||||
|
||||
/* CUSOLVERRF triangular solve algorithm */
|
||||
typedef enum {
|
||||
CUSOLVERRF_TRIANGULAR_SOLVE_ALG1 = 1, // default
|
||||
CUSOLVERRF_TRIANGULAR_SOLVE_ALG2 = 2,
|
||||
CUSOLVERRF_TRIANGULAR_SOLVE_ALG3 = 3
|
||||
} cusolverRfTriangularSolve_t;
|
||||
|
||||
/* CUSOLVERRF numeric boost report */
|
||||
typedef enum {
|
||||
CUSOLVERRF_NUMERIC_BOOST_NOT_USED = 0, // default
|
||||
CUSOLVERRF_NUMERIC_BOOST_USED = 1
|
||||
} cusolverRfNumericBoostReport_t;
|
||||
|
||||
/* Opaque structure holding CUSOLVERRF library common */
|
||||
struct cusolverRfCommon;
|
||||
typedef struct cusolverRfCommon* cusolverRfHandle_t;
|
||||
|
||||
/* CUSOLVERRF create (allocate memory) and destroy (free memory) in the handle
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfCreate(cusolverRfHandle_t* handle);
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfDestroy(cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF set and get input format */
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfGetMatrixFormat(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfMatrixFormat_t* format,
|
||||
cusolverRfUnitDiagonal_t* diag);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfSetMatrixFormat(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfMatrixFormat_t format,
|
||||
cusolverRfUnitDiagonal_t diag);
|
||||
|
||||
/* CUSOLVERRF set and get numeric properties */
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfSetNumericProperties(
|
||||
cusolverRfHandle_t handle,
|
||||
double zero,
|
||||
double boost);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericProperties(
|
||||
cusolverRfHandle_t handle,
|
||||
double* zero,
|
||||
double* boost);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfGetNumericBoostReport(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfNumericBoostReport_t* report);
|
||||
|
||||
/* CUSOLVERRF choose the triangular solve algorithm */
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfSetAlgs(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfFactorization_t factAlg,
|
||||
cusolverRfTriangularSolve_t solveAlg);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfGetAlgs(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfFactorization_t* factAlg,
|
||||
cusolverRfTriangularSolve_t* solveAlg);
|
||||
|
||||
/* CUSOLVERRF set and get fast mode */
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfGetResetValuesFastMode(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfResetValuesFastMode_t* fastMode);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfSetResetValuesFastMode(
|
||||
cusolverRfHandle_t handle,
|
||||
cusolverRfResetValuesFastMode_t fastMode);
|
||||
|
||||
/*** Non-Batched Routines ***/
|
||||
/* CUSOLVERRF setup of internal structures from host or device memory */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfSetupHost(/* Input (in the host memory) */
|
||||
int n,
|
||||
int nnzA,
|
||||
int* h_csrRowPtrA,
|
||||
int* h_csrColIndA,
|
||||
double* h_csrValA,
|
||||
int nnzL,
|
||||
int* h_csrRowPtrL,
|
||||
int* h_csrColIndL,
|
||||
double* h_csrValL,
|
||||
int nnzU,
|
||||
int* h_csrRowPtrU,
|
||||
int* h_csrColIndU,
|
||||
double* h_csrValU,
|
||||
int* h_P,
|
||||
int* h_Q,
|
||||
/* Output */
|
||||
cusolverRfHandle_t handle);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfSetupDevice(/* Input (in the device memory) */
|
||||
int n,
|
||||
int nnzA,
|
||||
int* csrRowPtrA,
|
||||
int* csrColIndA,
|
||||
double* csrValA,
|
||||
int nnzL,
|
||||
int* csrRowPtrL,
|
||||
int* csrColIndL,
|
||||
double* csrValL,
|
||||
int nnzU,
|
||||
int* csrRowPtrU,
|
||||
int* csrColIndU,
|
||||
double* csrValU,
|
||||
int* P,
|
||||
int* Q,
|
||||
/* Output */
|
||||
cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF update the matrix values (assuming the reordering, pivoting
|
||||
and consequently the sparsity pattern of L and U did not change),
|
||||
and zero out the remaining values. */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfResetValues(/* Input (in the device memory) */
|
||||
int n,
|
||||
int nnzA,
|
||||
int* csrRowPtrA,
|
||||
int* csrColIndA,
|
||||
double* csrValA,
|
||||
int* P,
|
||||
int* Q,
|
||||
/* Output */
|
||||
cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF analysis (for parallelism) */
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfAnalyze(cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF re-factorization (for parallelism) */
|
||||
cusolverStatus_t CUSOLVERAPI cusolverRfRefactor(cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF extraction: Get L & U packed into a single matrix M */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfAccessBundledFactorsDevice(/* Input */
|
||||
cusolverRfHandle_t handle,
|
||||
/* Output (in the host memory) */
|
||||
int* nnzM,
|
||||
/* Output (in the device memory) */
|
||||
int** Mp,
|
||||
int** Mi,
|
||||
double** Mx);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfExtractBundledFactorsHost(/* Input */
|
||||
cusolverRfHandle_t handle,
|
||||
/* Output (in the host memory) */
|
||||
int* h_nnzM,
|
||||
int** h_Mp,
|
||||
int** h_Mi,
|
||||
double** h_Mx);
|
||||
|
||||
/* CUSOLVERRF extraction: Get L & U individually */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfExtractSplitFactorsHost(/* Input */
|
||||
cusolverRfHandle_t handle,
|
||||
/* Output (in the host memory) */
|
||||
int* h_nnzL,
|
||||
int** h_csrRowPtrL,
|
||||
int** h_csrColIndL,
|
||||
double** h_csrValL,
|
||||
int* h_nnzU,
|
||||
int** h_csrRowPtrU,
|
||||
int** h_csrColIndU,
|
||||
double** h_csrValU);
|
||||
|
||||
/* CUSOLVERRF (forward and backward triangular) solves */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfSolve(/* Input (in the device memory) */
|
||||
cusolverRfHandle_t handle,
|
||||
int* P,
|
||||
int* Q,
|
||||
int nrhs, // only nrhs=1 is supported
|
||||
double* Temp, // of size ldt*nrhs (ldt>=n)
|
||||
int ldt,
|
||||
/* Input/Output (in the device memory) */
|
||||
double* XF,
|
||||
/* Input */
|
||||
int ldxf);
|
||||
|
||||
/*** Batched Routines ***/
|
||||
/* CUSOLVERRF-batch setup of internal structures from host */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfBatchSetupHost(/* Input (in the host memory)*/
|
||||
int batchSize,
|
||||
int n,
|
||||
int nnzA,
|
||||
int* h_csrRowPtrA,
|
||||
int* h_csrColIndA,
|
||||
double* h_csrValA_array[],
|
||||
int nnzL,
|
||||
int* h_csrRowPtrL,
|
||||
int* h_csrColIndL,
|
||||
double* h_csrValL,
|
||||
int nnzU,
|
||||
int* h_csrRowPtrU,
|
||||
int* h_csrColIndU,
|
||||
double* h_csrValU,
|
||||
int* h_P,
|
||||
int* h_Q,
|
||||
/* Output (in the device memory) */
|
||||
cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF-batch update the matrix values (assuming the reordering,
|
||||
pivoting and consequently the sparsity pattern of L and U did not change),
|
||||
and zero out the remaining values. */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfBatchResetValues(/* Input (in the device memory) */
|
||||
int batchSize,
|
||||
int n,
|
||||
int nnzA,
|
||||
int* csrRowPtrA,
|
||||
int* csrColIndA,
|
||||
double* csrValA_array[],
|
||||
int* P,
|
||||
int* Q,
|
||||
/* Output */
|
||||
cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF-batch analysis (for parallelism) */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfBatchAnalyze(cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF-batch re-factorization (for parallelism) */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfBatchRefactor(cusolverRfHandle_t handle);
|
||||
|
||||
/* CUSOLVERRF-batch (forward and backward triangular) solves */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfBatchSolve(/* Input (in the device memory) */
|
||||
cusolverRfHandle_t handle,
|
||||
int* P,
|
||||
int* Q,
|
||||
int nrhs, // only nrhs=1 is supported
|
||||
double* Temp, // of size 2*batchSize*(n*nrhs)
|
||||
int ldt, // only ldt=n is supported
|
||||
/* Input/Output (in the device memory) */
|
||||
double* XF_array[],
|
||||
/* Input */
|
||||
int ldxf);
|
||||
|
||||
/* CUSOLVERRF-batch obtain the position of zero pivot */
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverRfBatchZeroPivot(/* Input */
|
||||
cusolverRfHandle_t handle,
|
||||
/* Output (in the host memory) */
|
||||
int* position);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* CUSOLVERRF_H_ */
|
||||
923
external/cusolverSp.h
vendored
Normal file
923
external/cusolverSp.h
vendored
Normal file
@ -0,0 +1,923 @@
|
||||
/*
|
||||
* Copyright 2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(CUSOLVERSP_H_)
|
||||
#define CUSOLVERSP_H_
|
||||
|
||||
#include "cusparse.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "cusolver_common.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
struct cusolverSpContext;
|
||||
typedef struct cusolverSpContext *cusolverSpHandle_t;
|
||||
|
||||
struct csrqrInfo;
|
||||
typedef struct csrqrInfo *csrqrInfo_t;
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCreate(cusolverSpHandle_t *handle);
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDestroy(cusolverSpHandle_t handle);
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverSpSetStream(cusolverSpHandle_t handle, cudaStream_t streamId);
|
||||
cusolverStatus_t CUSOLVERAPI
|
||||
cusolverSpGetStream(cusolverSpHandle_t handle, cudaStream_t *streamId);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrissymHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrEndPtrA,
|
||||
const int * csrColIndA,
|
||||
int * issym);
|
||||
|
||||
/* -------- GPU linear solver by LU factorization
|
||||
* solve A*x = b, A can be singular
|
||||
* [ls] stands for linear solve
|
||||
* [v] stands for vector
|
||||
* [lu] stands for LU factorization
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvluHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const float * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
float * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvluHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const double * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
double * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvluHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuComplex * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
cuComplex * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvluHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuDoubleComplex * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
cuDoubleComplex * x,
|
||||
int * singularity);
|
||||
|
||||
/* -------- GPU linear solver by QR factorization
|
||||
* solve A*x = b, A can be singular
|
||||
* [ls] stands for linear solve
|
||||
* [v] stands for vector
|
||||
* [qr] stands for QR factorization
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvqr(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const float * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
float * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvqr(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const double * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
double * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvqr(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const cuComplex * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
cuComplex * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvqr(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const cuDoubleComplex * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
cuDoubleComplex * x,
|
||||
int * singularity);
|
||||
|
||||
/* -------- CPU linear solver by QR factorization
|
||||
* solve A*x = b, A can be singular
|
||||
* [ls] stands for linear solve
|
||||
* [v] stands for vector
|
||||
* [qr] stands for QR factorization
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const float * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
float * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const double * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
double * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuComplex * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
cuComplex * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuDoubleComplex * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
cuDoubleComplex * x,
|
||||
int * singularity);
|
||||
|
||||
/* -------- CPU linear solver by Cholesky factorization
|
||||
* solve A*x = b, A can be singular
|
||||
* [ls] stands for linear solve
|
||||
* [v] stands for vector
|
||||
* [chol] stands for Cholesky factorization
|
||||
*
|
||||
* Only works for symmetric positive definite matrix.
|
||||
* The upper part of A is ignored.
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvcholHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const float * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
float * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvcholHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const double * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
double * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvcholHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const cuComplex * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
cuComplex * x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvcholHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const cuDoubleComplex * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
cuDoubleComplex * x,
|
||||
int * singularity);
|
||||
|
||||
/* -------- GPU linear solver by Cholesky factorization
|
||||
* solve A*x = b, A can be singular
|
||||
* [ls] stands for linear solve
|
||||
* [v] stands for vector
|
||||
* [chol] stands for Cholesky factorization
|
||||
*
|
||||
* Only works for symmetric positive definite matrix.
|
||||
* The upper part of A is ignored.
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsvchol(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const float * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
// output
|
||||
float *x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsvchol(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const double * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
// output
|
||||
double *x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsvchol(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const cuComplex * b,
|
||||
float tol,
|
||||
int reorder,
|
||||
// output
|
||||
cuComplex *x,
|
||||
int * singularity);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsvchol(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
const cuDoubleComplex * b,
|
||||
double tol,
|
||||
int reorder,
|
||||
// output
|
||||
cuDoubleComplex *x,
|
||||
int * singularity);
|
||||
|
||||
/* ----------- CPU least square solver by QR factorization
|
||||
* solve min|b - A*x|
|
||||
* [lsq] stands for least square
|
||||
* [v] stands for vector
|
||||
* [qr] stands for QR factorization
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrlsqvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const float * b,
|
||||
float tol,
|
||||
int * rankA,
|
||||
float * x,
|
||||
int * p,
|
||||
float * min_norm);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrlsqvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const double * b,
|
||||
double tol,
|
||||
int * rankA,
|
||||
double * x,
|
||||
int * p,
|
||||
double * min_norm);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrlsqvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuComplex * b,
|
||||
float tol,
|
||||
int * rankA,
|
||||
cuComplex * x,
|
||||
int * p,
|
||||
float * min_norm);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrlsqvqrHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuDoubleComplex * b,
|
||||
double tol,
|
||||
int * rankA,
|
||||
cuDoubleComplex * x,
|
||||
int * p,
|
||||
double * min_norm);
|
||||
|
||||
/* --------- CPU eigenvalue solver by shift inverse
|
||||
* solve A*x = lambda * x
|
||||
* where lambda is the eigenvalue nearest mu0.
|
||||
* [eig] stands for eigenvalue solver
|
||||
* [si] stands for shift-inverse
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsreigvsiHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
float mu0,
|
||||
const float * x0,
|
||||
int maxite,
|
||||
float tol,
|
||||
float * mu,
|
||||
float * x);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsreigvsiHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
double mu0,
|
||||
const double * x0,
|
||||
int maxite,
|
||||
double tol,
|
||||
double * mu,
|
||||
double * x);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsreigvsiHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuComplex mu0,
|
||||
const cuComplex * x0,
|
||||
int maxite,
|
||||
float tol,
|
||||
cuComplex * mu,
|
||||
cuComplex * x);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsreigvsiHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuDoubleComplex mu0,
|
||||
const cuDoubleComplex * x0,
|
||||
int maxite,
|
||||
double tol,
|
||||
cuDoubleComplex * mu,
|
||||
cuDoubleComplex * x);
|
||||
|
||||
/* --------- GPU eigenvalue solver by shift inverse
|
||||
* solve A*x = lambda * x
|
||||
* where lambda is the eigenvalue nearest mu0.
|
||||
* [eig] stands for eigenvalue solver
|
||||
* [si] stands for shift-inverse
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsreigvsi(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
float mu0,
|
||||
const float * x0,
|
||||
int maxite,
|
||||
float eps,
|
||||
float * mu,
|
||||
float * x);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsreigvsi(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
double mu0,
|
||||
const double * x0,
|
||||
int maxite,
|
||||
double eps,
|
||||
double * mu,
|
||||
double * x);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsreigvsi(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuComplex mu0,
|
||||
const cuComplex * x0,
|
||||
int maxite,
|
||||
float eps,
|
||||
cuComplex * mu,
|
||||
cuComplex * x);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsreigvsi(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuDoubleComplex mu0,
|
||||
const cuDoubleComplex * x0,
|
||||
int maxite,
|
||||
double eps,
|
||||
cuDoubleComplex * mu,
|
||||
cuDoubleComplex * x);
|
||||
|
||||
// ----------- enclosed eigenvalues
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsreigsHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuComplex left_bottom_corner,
|
||||
cuComplex right_upper_corner,
|
||||
int * num_eigs);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsreigsHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuDoubleComplex left_bottom_corner,
|
||||
cuDoubleComplex right_upper_corner,
|
||||
int * num_eigs);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsreigsHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuComplex left_bottom_corner,
|
||||
cuComplex right_upper_corner,
|
||||
int * num_eigs);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsreigsHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
cuDoubleComplex left_bottom_corner,
|
||||
cuDoubleComplex right_upper_corner,
|
||||
int * num_eigs);
|
||||
|
||||
/* --------- CPU symrcm
|
||||
* Symmetric reverse Cuthill McKee permutation
|
||||
*
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrsymrcmHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * p);
|
||||
|
||||
/* --------- CPU symmdq
|
||||
* Symmetric minimum degree algorithm by quotient graph
|
||||
*
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrsymmdqHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * p);
|
||||
|
||||
/* --------- CPU symmdq
|
||||
* Symmetric Approximate minimum degree algorithm by quotient graph
|
||||
*
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrsymamdHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * p);
|
||||
|
||||
/* --------- CPU metis
|
||||
* symmetric reordering
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrmetisndHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const int64_t * options,
|
||||
int * p);
|
||||
|
||||
/* --------- CPU zfd
|
||||
* Zero free diagonal reordering
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrzfdHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * P,
|
||||
int * numnz);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrzfdHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * P,
|
||||
int * numnz);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrzfdHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * P,
|
||||
int * numnz);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrzfdHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
int * P,
|
||||
int * numnz);
|
||||
|
||||
/* --------- CPU permuation
|
||||
* P*A*Q^T
|
||||
*
|
||||
*/
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrperm_bufferSizeHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const int * p,
|
||||
const int * q,
|
||||
size_t * bufferSizeInBytes);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrpermHost(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
int * csrRowPtrA,
|
||||
int * csrColIndA,
|
||||
const int * p,
|
||||
const int * q,
|
||||
int * map,
|
||||
void * pBuffer);
|
||||
|
||||
/*
|
||||
* Low-level API: Batched QR
|
||||
*
|
||||
*/
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCreateCsrqrInfo(csrqrInfo_t *info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDestroyCsrqrInfo(csrqrInfo_t info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpXcsrqrAnalysisBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnzA,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
csrqrInfo_t info);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrBufferInfoBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
size_t * internalDataInBytes,
|
||||
size_t * workspaceInBytes);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrBufferInfoBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
size_t * internalDataInBytes,
|
||||
size_t * workspaceInBytes);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrBufferInfoBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
size_t * internalDataInBytes,
|
||||
size_t * workspaceInBytes);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrBufferInfoBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrVal,
|
||||
const int * csrRowPtr,
|
||||
const int * csrColInd,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
size_t * internalDataInBytes,
|
||||
size_t * workspaceInBytes);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpScsrqrsvBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const float * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const float * b,
|
||||
float * x,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
void * pBuffer);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpDcsrqrsvBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const double * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const double * b,
|
||||
double * x,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
void * pBuffer);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpCcsrqrsvBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuComplex * b,
|
||||
cuComplex * x,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
void * pBuffer);
|
||||
|
||||
cusolverStatus_t CUSOLVERAPI cusolverSpZcsrqrsvBatched(
|
||||
cusolverSpHandle_t handle,
|
||||
int m,
|
||||
int n,
|
||||
int nnz,
|
||||
const cusparseMatDescr_t descrA,
|
||||
const cuDoubleComplex * csrValA,
|
||||
const int * csrRowPtrA,
|
||||
const int * csrColIndA,
|
||||
const cuDoubleComplex * b,
|
||||
cuDoubleComplex * x,
|
||||
int batchSize,
|
||||
csrqrInfo_t info,
|
||||
void * pBuffer);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif // define CUSOLVERSP_H_
|
||||
1107
external/cusolverSp_LOWLEVEL_PREVIEW.h
vendored
Normal file
1107
external/cusolverSp_LOWLEVEL_PREVIEW.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,28 +1,174 @@
|
||||
#include <stdio.h>
|
||||
#include "../external/cusolver_common.h"
|
||||
#define no_name_mangle extern "C"
|
||||
#include <windows.h>
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
|
||||
__global__ void hello_from_GPU() {
|
||||
printf("HELLO FROM GPUUUU");
|
||||
#include "../external/cusolver_common.h"
|
||||
#include "../external/cusolverDn.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define no_name_mangle extern "C"
|
||||
#else
|
||||
# define no_name_mangle
|
||||
#endif
|
||||
|
||||
#define function static
|
||||
|
||||
#define GLOBAL_LOG_BUFFER_SIZE 4096*4096
|
||||
|
||||
__device__ int last_index;
|
||||
__device__ char *device_log_buffer;
|
||||
char *log_buffer;
|
||||
|
||||
#define D_LOG(msg) d_debug_printf((msg))
|
||||
//#define D_LOGF(msg, ...) d_debug_printfv((msg), __VA_ARGS__)
|
||||
#define H_LOG(msg) h_debug_printf((msg))
|
||||
#define H_LOGF(msg, ...) h_debug_printfv((msg), __VA_ARGS__)
|
||||
|
||||
__host__ function void h_debug_printf(const char *msg);
|
||||
__host__ function void h_debug_printfv(const char *fmt, ...);
|
||||
__device__ function void d_debug_printf(const char *msg);
|
||||
//__device__ function void d_debug_printfv(const char *fmt, ...);
|
||||
|
||||
__global__ void init_device_logging()
|
||||
{
|
||||
last_index = 0;
|
||||
}
|
||||
|
||||
void check_cusolver_version() {
|
||||
|
||||
__host__ function void
|
||||
h_debug_printf(const char* msg)
|
||||
{
|
||||
OutputDebugStringA(msg);
|
||||
}
|
||||
|
||||
__host__ function void
|
||||
h_debug_printfv(const char *fmt, ...)
|
||||
{
|
||||
char buffer[2048] = {0};
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(&buffer[0], sizeof(buffer), fmt, args);
|
||||
va_end(args);
|
||||
OutputDebugStringA(buffer);
|
||||
}
|
||||
|
||||
__device__ function void
|
||||
d_debug_printf(const char *msg) {
|
||||
int i = 0;
|
||||
while(msg[i] != '\0')
|
||||
{
|
||||
device_log_buffer[last_index + i] = msg[i];
|
||||
i += 1;
|
||||
}
|
||||
last_index = last_index + 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void hello_from_GPU()
|
||||
{
|
||||
|
||||
D_LOG("HELLO FROM GPUUUU\n");
|
||||
}
|
||||
|
||||
void
|
||||
check_cusolver_version()
|
||||
{
|
||||
|
||||
int major=-1, minor=-1, patch=-1;
|
||||
cusolverGetProperty(MAJOR_VERSION, &major);
|
||||
cusolverGetProperty(MINOR_VERSION, &minor);
|
||||
cusolverGetProperty(PATCH_LEVEL, &patch);
|
||||
printf("CUSOLVER Version (Major,Minor,PatchLevel): %d.%d.%d\n", major,minor,patch);
|
||||
H_LOGF("CUSOLVER Version (Major,Minor,PatchLevel): %d.%d.%d\n", major,minor,patch);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
no_name_mangle void cuda_entry_point() {
|
||||
|
||||
no_name_mangle void
|
||||
cuda_entry_point()
|
||||
{
|
||||
H_LOG("\n\n-- Entered CUDA code:\n\n");
|
||||
char *temp_device_log_buffer;
|
||||
cudaMalloc((void**)&temp_device_log_buffer, GLOBAL_LOG_BUFFER_SIZE * sizeof(char));
|
||||
cudaMemcpyToSymbol(device_log_buffer, &temp_device_log_buffer, sizeof(char *));
|
||||
check_cusolver_version();
|
||||
|
||||
hello_from_GPU<<<1, 1>>>();
|
||||
|
||||
init_device_logging<<<1, 1>>>();
|
||||
cudaDeviceSynchronize();
|
||||
hello_from_GPU<<<1, 1>>>();
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
|
||||
|
||||
log_buffer = (char *)malloc(GLOBAL_LOG_BUFFER_SIZE * sizeof(char));
|
||||
cudaMemcpy(log_buffer, temp_device_log_buffer, GLOBAL_LOG_BUFFER_SIZE * sizeof(char), cudaMemcpyDeviceToHost);
|
||||
|
||||
// Print all messages from cuda.
|
||||
H_LOG(log_buffer);
|
||||
|
||||
H_LOG("\n\n Testing cuSolver Eigenvalue problem \n\n");
|
||||
|
||||
const int size = 5;
|
||||
const int mat_size = size*size;
|
||||
double A[mat_size] = {
|
||||
6.39, 0.13, -8.23, 5.71, -3.18,
|
||||
0.13, 8.37, -4.46, -6.10, 7.21,
|
||||
-8.23, -4.46, -9.58, -9.25, -7.42,
|
||||
5.71, -6.10, -9.25, 3.72, 8.54,
|
||||
-3.18, 7.21, -7.42, 8.54, 2.51};
|
||||
|
||||
double *device_mat;
|
||||
cudaMalloc((void**)&device_mat, mat_size * sizeof(double));
|
||||
cudaMemcpy(device_mat, A, mat_size * sizeof(double), cudaMemcpyHostToDevice);
|
||||
|
||||
cusolverDnHandle_t cusolverH = 0;
|
||||
cusolverDnCreate(&cusolverH);
|
||||
|
||||
double W[size];
|
||||
double *d_W;
|
||||
cudaMalloc((void**)&d_W, size * sizeof(double));
|
||||
|
||||
int lwork;
|
||||
cusolverDnDsyevd_bufferSize(
|
||||
cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_LOWER,
|
||||
size, device_mat, size, d_W, &lwork
|
||||
);
|
||||
|
||||
double *d_work;
|
||||
cudaMalloc((void**)&d_work, lwork * sizeof(double));
|
||||
|
||||
int *device_info;
|
||||
cudaMalloc((void**)&device_info, sizeof(int));
|
||||
|
||||
cusolverDnDsyevd(
|
||||
cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_LOWER,
|
||||
size, device_mat, size, d_W, d_work, lwork, device_info
|
||||
);
|
||||
|
||||
double out_mat[mat_size];
|
||||
cudaMemcpy(W, d_W, size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(out_mat, device_mat, mat_size * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
|
||||
H_LOG("Results on GPU dsyevd:\n");
|
||||
H_LOG("Eigenvalues:\n");
|
||||
for(int i = 0; i < size; i++)
|
||||
{
|
||||
H_LOGF("%6.2f\n", W[i]);
|
||||
}
|
||||
H_LOG("\n");
|
||||
H_LOG("Eigenvectors (columnwise):\n");
|
||||
for(int i = 0; i < size; i++)
|
||||
{
|
||||
for(int j = 0; j < size; j++)
|
||||
{
|
||||
H_LOGF("%6.2f ", out_mat[j * size + i]);
|
||||
}
|
||||
H_LOG("\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
cudaFree(temp_device_log_buffer);
|
||||
}
|
||||
|
||||
35
src/main.c
35
src/main.c
@ -14,7 +14,7 @@
|
||||
#include "base/base_inc.c"
|
||||
#include "os/os_inc.c"
|
||||
#include "os/os_entry_point.c"
|
||||
|
||||
#include "test_mkl.c"
|
||||
|
||||
#define grid_file_path_bin "D:\\dev\\eigsol_gpu\\out\\grid.bin"
|
||||
#define grid_file_path "D:\\dev\\eigsol_gpu\\out\\grid.dat"
|
||||
@ -53,8 +53,6 @@ struct BSplineCtx
|
||||
F64 *bsplines;
|
||||
};
|
||||
|
||||
|
||||
|
||||
//~ Globals
|
||||
global Grid g_grid = {0};
|
||||
global BSplineCtx g_bspline_ctx = {0};
|
||||
@ -205,11 +203,11 @@ set_up_bspline_context(Arena* arena)
|
||||
{
|
||||
// Create knotpoint sequence.
|
||||
U32 k = 4;
|
||||
U32 N = 14;
|
||||
U32 bspl_N = 14;
|
||||
g_bspline_ctx.order = k;
|
||||
g_bspline_ctx.num_knotpoints = N;
|
||||
g_bspline_ctx.num_bsplines = N-k;
|
||||
g_bspline_ctx.num_phys_points = N-(2*k)+2; // Remove k points at each end, and then add back the first and last points.
|
||||
g_bspline_ctx.num_knotpoints = bspl_N;
|
||||
g_bspline_ctx.num_bsplines = bspl_N-k;
|
||||
g_bspline_ctx.num_phys_points = bspl_N-(2*k)+2; // Remove k points at each end, and then add back the first and last points.
|
||||
g_bspline_ctx.arena = arena;
|
||||
g_bspline_ctx.knotpoints = PushArray(arena, F64, g_bspline_ctx.num_knotpoints);
|
||||
// Set up physical points;
|
||||
@ -308,36 +306,37 @@ write_bsplines_to_matrix_F64(Arena *arena)
|
||||
str8_list_push(scratch.arena, &bspline_array_list, row_joined);
|
||||
}
|
||||
|
||||
|
||||
write_string_list_to_file(scratch.arena, str8_lit(bspline_array_file_path), &bspline_array_list);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
function
|
||||
void EntryPoint()
|
||||
function
|
||||
void EntryPoint(void)
|
||||
{
|
||||
OS_InitReceipt os_receipt = OS_init();
|
||||
OS_InitGfxReceipt os_gfx_receipt = OS_gfx_init(os_receipt);
|
||||
|
||||
Arena *arena = m_make_arena();
|
||||
|
||||
//- Set up grid and write to file.
|
||||
set_up_grid(arena);
|
||||
|
||||
write_array_binary_F64(str8_lit(grid_file_path_bin), g_grid.points, g_grid.num_steps);
|
||||
write_array_F64(str8_lit(grid_file_path), g_grid.points, g_grid.num_steps, "%13.6e\n");
|
||||
|
||||
//- The BSpline context is the knotpoints and the BSpline order etc.
|
||||
set_up_bspline_context(arena);
|
||||
write_array_F64(str8_lit(knotpoints_file_path),
|
||||
g_bspline_ctx.knotpoints, g_bspline_ctx.num_knotpoints,
|
||||
"%13.6e\n");
|
||||
|
||||
|
||||
write_array_F64(str8_lit(knotpoints_file_path), g_bspline_ctx.knotpoints, g_bspline_ctx.num_knotpoints, "%13.6e\n");
|
||||
|
||||
//- Then we generate the BSplines and save them off for reference and debugging.
|
||||
write_bsplines_to_matrix_F64(arena);
|
||||
|
||||
//- Test MKL
|
||||
test_mkl_zgeev();
|
||||
test_mkl_dsyevd();
|
||||
cuda_entry_point();
|
||||
|
||||
}
|
||||
|
||||
|
||||
272
src/test_mkl.c
Normal file
272
src/test_mkl.c
Normal file
@ -0,0 +1,272 @@
|
||||
|
||||
/*******************************************************************************
|
||||
* Copyright 2009-2021 Intel Corporation.
|
||||
*
|
||||
* This software and the related documents are Intel copyrighted materials, and
|
||||
* your use of them is governed by the express license under which they were
|
||||
* provided to you (License). Unless the License provides otherwise, you may not
|
||||
* use, modify, copy, publish, distribute, disclose or transmit this software or
|
||||
* the related documents without Intel's prior written permission.
|
||||
*
|
||||
* This software and the related documents are provided as is, with no express
|
||||
* or implied warranties, other than those that are expressly stated in the
|
||||
* License.
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
ZGEEV Example.
|
||||
==============
|
||||
|
||||
Program computes the eigenvalues and left and right eigenvectors of a general
|
||||
rectangular matrix A:
|
||||
|
||||
( -3.84, 2.25) ( -8.94, -4.75) ( 8.95, -6.53) ( -9.87, 4.82)
|
||||
( -0.66, 0.83) ( -4.40, -3.82) ( -3.50, -4.26) ( -3.15, 7.36)
|
||||
( -3.99, -4.73) ( -5.88, -6.60) ( -3.36, -0.40) ( -0.75, 5.23)
|
||||
( 7.74, 4.18) ( 3.66, -7.53) ( 2.58, 3.60) ( 4.59, 5.41)
|
||||
|
||||
Description.
|
||||
============
|
||||
|
||||
The routine computes for an n-by-n complex nonsymmetric matrix A, the
|
||||
eigenvalues and, optionally, the left and/or right eigenvectors. The right
|
||||
eigenvector v(j) of A satisfies
|
||||
|
||||
A*v(j)= lambda(j)*v(j)
|
||||
|
||||
where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies
|
||||
|
||||
u(j)H*A = lambda(j)*u(j)H
|
||||
|
||||
where u(j)H denotes the conjugate transpose of u(j). The computed
|
||||
eigenvectors are normalized to have Euclidean norm equal to 1 and
|
||||
largest component real.
|
||||
|
||||
Example Program Results.
|
||||
========================
|
||||
|
||||
ZGEEV Example Program Results
|
||||
|
||||
Eigenvalues
|
||||
( -9.43,-12.98) ( -3.44, 12.69) ( 0.11, -3.40) ( 5.76, 7.13)
|
||||
|
||||
Left eigenvectors
|
||||
( 0.24, -0.18) ( 0.61, 0.00) ( -0.18, -0.33) ( 0.28, 0.09)
|
||||
( 0.79, 0.00) ( -0.05, -0.27) ( 0.82, 0.00) ( -0.55, 0.16)
|
||||
( 0.22, -0.27) ( -0.21, 0.53) ( -0.37, 0.15) ( 0.45, 0.09)
|
||||
( -0.02, 0.41) ( 0.40, -0.24) ( 0.06, 0.12) ( 0.62, 0.00)
|
||||
|
||||
Right eigenvectors
|
||||
( 0.43, 0.33) ( 0.83, 0.00) ( 0.60, 0.00) ( -0.31, 0.03)
|
||||
( 0.51, -0.03) ( 0.08, -0.25) ( -0.40, -0.20) ( 0.04, 0.34)
|
||||
( 0.62, 0.00) ( -0.25, 0.28) ( -0.09, -0.48) ( 0.36, 0.06)
|
||||
( -0.23, 0.11) ( -0.10, -0.32) ( -0.43, 0.13) ( 0.81, 0.00)
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <mkl.h>
|
||||
|
||||
/* Auxiliary routines prototypes */
|
||||
function void
|
||||
print_matrix_cmplx16( char* desc, int m, int n, MKL_Complex16* a, int lda );
|
||||
|
||||
#ifndef TEST_MKL_N
|
||||
#define TEST_MKL_N 4
|
||||
#endif
|
||||
|
||||
/* Main program */
|
||||
static void
|
||||
test_mkl_zgeev(void) {
|
||||
/* Locals */
|
||||
int N = TEST_MKL_N;
|
||||
MKL_INT n = N, lda = N, ldvl = N, ldvr = N, info, lwork;
|
||||
MKL_Complex16 wkopt;
|
||||
MKL_Complex16* work;
|
||||
/* Local arrays */
|
||||
/* rwork dimension should be at least 2*n */
|
||||
double rwork[2*TEST_MKL_N];
|
||||
MKL_Complex16 w[TEST_MKL_N], vl[TEST_MKL_N*TEST_MKL_N], vr[TEST_MKL_N*TEST_MKL_N];
|
||||
MKL_Complex16 a[TEST_MKL_N*TEST_MKL_N] = {
|
||||
{-3.84, 2.25}, {-0.66, 0.83}, {-3.99, -4.73}, { 7.74, 4.18},
|
||||
{-8.94, -4.75}, {-4.40, -3.82}, {-5.88, -6.60}, { 3.66, -7.53},
|
||||
{ 8.95, -6.53}, {-3.50, -4.26}, {-3.36, -0.40}, { 2.58, 3.60},
|
||||
{-9.87, 4.82}, {-3.15, 7.36}, {-0.75, 5.23}, { 4.59, 5.41}
|
||||
};
|
||||
/* Executable statements */
|
||||
OutputDebugString( " ZGEEV Example Program Results\n" );
|
||||
/* Query and allocate the optimal workspace */
|
||||
lwork = -1;
|
||||
zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
|
||||
&wkopt, &lwork, rwork, &info );
|
||||
lwork = (MKL_INT)wkopt.real;
|
||||
work = (MKL_Complex16*)malloc( lwork*sizeof(MKL_Complex16) );
|
||||
/* Solve eigenproblem */
|
||||
zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
|
||||
work, &lwork, rwork, &info );
|
||||
/* Check for convergence */
|
||||
if( info > 0 ) {
|
||||
OutputDebugString( "The algorithm failed to compute eigenvalues.\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
/* Print eigenvalues */
|
||||
print_matrix_cmplx16( "Eigenvalues", 1, n, w, 1 );
|
||||
/* Print left eigenvectors */
|
||||
print_matrix_cmplx16( "Left eigenvectors", n, n, vl, ldvl );
|
||||
/* Print right eigenvectors */
|
||||
print_matrix_cmplx16( "Right eigenvectors", n, n, vr, ldvr );
|
||||
/* Free workspace */
|
||||
free( (void*)work );
|
||||
//exit( 0 );
|
||||
} /* End of ZGEEV Example */
|
||||
|
||||
/* Auxiliary routine: printing a matrix */
|
||||
function void
|
||||
print_matrix_cmplx16( char* desc, int m, int n, MKL_Complex16* a, int lda ) {
|
||||
ArenaTemp scratch = scratch_get(0,0);
|
||||
int i, j;
|
||||
String8 newline = str8_lit("\n");
|
||||
String8 header = str8_pushf(scratch.arena, "\n %s\n", desc );
|
||||
OutputDebugString(header.str);
|
||||
//printf("\n %s \n", desc);
|
||||
for( i = 0; i < m; i++ ) {
|
||||
for( j = 0; j < n; j++ ) {
|
||||
String8 outstr = str8_pushf(scratch.arena, " (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag );
|
||||
OutputDebugString(outstr.str);
|
||||
//printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag );
|
||||
}
|
||||
OutputDebugString(newline.str);
|
||||
//printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Copyright 2009-2021 Intel Corporation.
|
||||
*
|
||||
* This software and the related documents are Intel copyrighted materials, and
|
||||
* your use of them is governed by the express license under which they were
|
||||
* provided to you (License). Unless the License provides otherwise, you may not
|
||||
* use, modify, copy, publish, distribute, disclose or transmit this software or
|
||||
* the related documents without Intel's prior written permission.
|
||||
*
|
||||
* This software and the related documents are provided as is, with no express
|
||||
* or implied warranties, other than those that are expressly stated in the
|
||||
* License.
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
DSYEVD Example.
|
||||
==============
|
||||
|
||||
Program computes all eigenvalues and eigenvectors of a real symmetric
|
||||
matrix A using divide and conquer algorithm, where A is:
|
||||
|
||||
6.39 0.13 -8.23 5.71 -3.18
|
||||
0.13 8.37 -4.46 -6.10 7.21
|
||||
-8.23 -4.46 -9.58 -9.25 -7.42
|
||||
5.71 -6.10 -9.25 3.72 8.54
|
||||
-3.18 7.21 -7.42 8.54 2.51
|
||||
|
||||
Description.
|
||||
============
|
||||
|
||||
The routine computes all eigenvalues and, optionally, eigenvectors of an
|
||||
n-by-n real symmetric matrix A. The eigenvector v(j) of A satisfies
|
||||
|
||||
A*v(j) = lambda(j)*v(j)
|
||||
|
||||
where lambda(j) is its eigenvalue. The computed eigenvectors are
|
||||
orthonormal.
|
||||
If the eigenvectors are requested, then this routine uses a divide and
|
||||
conquer algorithm to compute eigenvalues and eigenvectors.
|
||||
|
||||
Example Program Results.
|
||||
========================
|
||||
|
||||
DSYEVD Example Program Results
|
||||
|
||||
Eigenvalues
|
||||
-17.44 -11.96 6.72 14.25 19.84
|
||||
|
||||
Eigenvectors (stored columnwise)
|
||||
-0.26 0.31 -0.74 0.33 0.42
|
||||
-0.17 -0.39 -0.38 -0.80 0.16
|
||||
-0.89 0.04 0.09 0.03 -0.45
|
||||
-0.29 -0.59 0.34 0.31 0.60
|
||||
-0.19 0.63 0.44 -0.38 0.48
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <mkl.h>
|
||||
|
||||
/* Auxiliary routines prototypes */
|
||||
extern void print_matrix( char* desc, int m, int n, double* a, int lda );
|
||||
|
||||
/* Parameters */
|
||||
#define N 5
|
||||
#define LDA N
|
||||
|
||||
/* Main program */
|
||||
function void
|
||||
test_mkl_dsyevd(void) {
|
||||
/* Locals */
|
||||
MKL_INT n = N, lda = LDA, info, lwork, liwork;
|
||||
MKL_INT iwkopt;
|
||||
MKL_INT* iwork;
|
||||
double wkopt;
|
||||
double* work;
|
||||
/* Local arrays */
|
||||
double w[N];
|
||||
double a[LDA*N] = {
|
||||
6.39, 0.00, 0.00, 0.00, 0.00,
|
||||
0.13, 8.37, 0.00, 0.00, 0.00,
|
||||
-8.23, -4.46, -9.58, 0.00, 0.00,
|
||||
5.71, -6.10, -9.25, 3.72, 0.00,
|
||||
-3.18, 7.21, -7.42, 8.54, 2.51
|
||||
};
|
||||
/* Executable statements */
|
||||
OutputDebugString( " DSYEVD Example Program Results\n" );
|
||||
/* Query and allocate the optimal workspace */
|
||||
lwork = -1;
|
||||
liwork = -1;
|
||||
dsyevd( "Vectors", "Upper", &n, a, &lda, w, &wkopt, &lwork, &iwkopt,
|
||||
&liwork, &info );
|
||||
lwork = (MKL_INT)wkopt;
|
||||
work = (double*)malloc( lwork*sizeof(double) );
|
||||
liwork = iwkopt;
|
||||
iwork = (MKL_INT*)malloc( liwork*sizeof(MKL_INT) );
|
||||
/* Solve eigenproblem */
|
||||
dsyevd( "Vectors", "Upper", &n, a, &lda, w, work, &lwork, iwork,
|
||||
&liwork, &info );
|
||||
/* Check for convergence */
|
||||
if( info > 0 ) {
|
||||
OutputDebugString( "The algorithm failed to compute eigenvalues.\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
/* Print eigenvalues */
|
||||
print_matrix( "Eigenvalues", 1, n, w, 1 );
|
||||
/* Print eigenvectors */
|
||||
print_matrix( "Eigenvectors (stored columnwise)", n, n, a, lda );
|
||||
/* Free workspace */
|
||||
free( (void*)iwork );
|
||||
free( (void*)work );
|
||||
} /* End of DSYEVD Example */
|
||||
|
||||
/* Auxiliary routine: printing a matrix */
|
||||
void print_matrix( char* desc, int m, int n, double* a, int lda ) {
|
||||
ArenaTemp scratch = scratch_get(0, 0);
|
||||
int i, j;
|
||||
//printf( "\n %s\n", desc );
|
||||
String8 header = str8_pushf(scratch.arena, "\n %s\n", desc);
|
||||
OutputDebugString(header.str);
|
||||
for( i = 0; i < m; i++ ) {
|
||||
for( j = 0; j < n; j++ )
|
||||
{
|
||||
String8 out = str8_pushf(scratch.arena, " %6.2f", a[i+j*lda] );
|
||||
OutputDebugString(out.str);
|
||||
//printf( " %6.2f", a[i+j*lda] );
|
||||
}
|
||||
OutputDebugString(str8_lit("\n").str);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BIN
timeBuild.ctm
BIN
timeBuild.ctm
Binary file not shown.
Loading…
Reference in New Issue
Block a user