added clangd support
This commit is contained in:
parent
a30a500564
commit
10f92af6cf
29
build.bat
29
build.bat
@ -12,23 +12,26 @@ if "%SRC%"=="" (
|
|||||||
|
|
||||||
set OUT=%~n1.exe
|
set OUT=%~n1.exe
|
||||||
|
|
||||||
|
set CFLAGS="-O3" "-march=native" "-ffast-math" "-fopenmp" "-Wall" "-I%MKL_ROOT%\include"
|
||||||
|
set LDFLAGS=-L%MKL_ROOT%\lib -L%COMPILER_ROOT%\lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -llibiomp5md
|
||||||
|
|
||||||
pushd build
|
pushd build
|
||||||
del /F /Q *
|
del /F /Q *
|
||||||
clang ../src/%SRC% -o %OUT% ^
|
|
||||||
-O3 ^
|
clang ../src/%SRC% -o %OUT% %CFLAGS% %LDFLAGS%
|
||||||
-march=native ^
|
|
||||||
-ffast-math ^
|
|
||||||
-fopenmp ^
|
|
||||||
-I"%MKL_ROOT%\include" ^
|
|
||||||
-L"%MKL_ROOT%\lib" ^
|
|
||||||
-L"%COMPILER_ROOT%\lib" ^
|
|
||||||
-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core ^
|
|
||||||
-llibiomp5md ^
|
|
||||||
-Wall
|
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
@rem Generate a file for clangd to understand the include files and compiler flags
|
||||||
|
(
|
||||||
|
echo -xc
|
||||||
|
echo -std=c11
|
||||||
|
for %%f in (%CFLAGS%) do echo %%~f
|
||||||
|
) > compile_flags.txt
|
||||||
|
|
||||||
echo Build complete: build/%OUT%
|
echo Build complete: build/%OUT%
|
||||||
|
|
||||||
set PATH=%PATH%;E:\lib\intel_mkl\mkl\2025.3\bin
|
set PATH=%PATH%;%MKL_ROOT%\bin
|
||||||
set PATH=%PATH%;E:\lib\intel_mkl\compiler\2025.3\bin
|
set PATH=%PATH%;%COMPILER_ROOT%\bin
|
||||||
|
|
||||||
|
endlocal
|
||||||
|
|||||||
8
compile_flags.txt
Normal file
8
compile_flags.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
-xc
|
||||||
|
-std=c11
|
||||||
|
-O3
|
||||||
|
-march=native
|
||||||
|
-ffast-math
|
||||||
|
-fopenmp
|
||||||
|
-Wall
|
||||||
|
-IE:\lib\intel_mkl\mkl\2025.3\include
|
||||||
817
src/main.c
817
src/main.c
@ -1,8 +1,8 @@
|
|||||||
#include <stdlib.h>
|
#include <ctype.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
@ -28,7 +28,7 @@ struct CSRMatrix {
|
|||||||
MKL_INT nnz;
|
MKL_INT nnz;
|
||||||
MKL_INT *row_ptr; // size rows + 1
|
MKL_INT *row_ptr; // size rows + 1
|
||||||
MKL_INT *col_ind; // size nnz
|
MKL_INT *col_ind; // size nnz
|
||||||
F64 *values; // size nnz
|
F64 *values; // size nnz
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct Triplet Triplet;
|
typedef struct Triplet Triplet;
|
||||||
@ -75,129 +75,121 @@ static void panic(const char *msg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void *xmalloc(size_t n) {
|
static void *xmalloc(size_t n) {
|
||||||
void *p = malloc(n);
|
void *p = malloc(n);
|
||||||
if (!p) panic("out of memory");
|
if (!p)
|
||||||
return p;
|
panic("out of memory");
|
||||||
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *xcalloc(size_t count, size_t size) {
|
static void *xcalloc(size_t count, size_t size) {
|
||||||
void *p = calloc(count, size);
|
void *p = calloc(count, size);
|
||||||
if(!p) {
|
if (!p) {
|
||||||
panic("out of memory");
|
panic("out of memory");
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void matrix_label_from_path(const char *path, char *out, size_t out_size)
|
static void matrix_label_from_path(const char *path, char *out,
|
||||||
{
|
size_t out_size) {
|
||||||
const char *base = path;
|
const char *base = path;
|
||||||
|
|
||||||
const char *s1 = strrchr(path, '/');
|
const char *s1 = strrchr(path, '/');
|
||||||
const char *s2 = strrchr(path, '\\');
|
const char *s2 = strrchr(path, '\\');
|
||||||
|
|
||||||
if (s1 && s1 >= base) base = s1 + 1;
|
if (s1 && s1 >= base)
|
||||||
if (s2 && s2 >= base) base = s2 + 1;
|
base = s1 + 1;
|
||||||
|
if (s2 && s2 >= base)
|
||||||
|
base = s2 + 1;
|
||||||
|
|
||||||
strncpy_s(out, out_size, base, _TRUNCATE);
|
strncpy_s(out, out_size, base, _TRUNCATE);
|
||||||
|
|
||||||
char *ext = strrchr(out, '.');
|
char *ext = strrchr(out, '.');
|
||||||
if (ext && strcmp(ext, ".mtx") == 0) {
|
if (ext && strcmp(ext, ".mtx") == 0) {
|
||||||
*ext = '\0';
|
*ext = '\0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static S64 now_ns(void)
|
static S64 now_ns(void) {
|
||||||
{
|
static LARGE_INTEGER freq;
|
||||||
static LARGE_INTEGER freq;
|
static B32 initialized = 0;
|
||||||
static B32 initialized = 0;
|
|
||||||
|
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
QueryPerformanceFrequency(&freq);
|
QueryPerformanceFrequency(&freq);
|
||||||
initialized = 1;
|
initialized = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
LARGE_INTEGER counter;
|
LARGE_INTEGER counter;
|
||||||
QueryPerformanceCounter(&counter);
|
QueryPerformanceCounter(&counter);
|
||||||
|
|
||||||
return (S64)((counter.QuadPart * 1000000000LL) / freq.QuadPart);
|
return (S64)((counter.QuadPart * 1000000000LL) / freq.QuadPart);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void timing_write_json(FILE *fp, const Timing *t)
|
static void timing_write_json(FILE *fp, const Timing *t) {
|
||||||
{
|
fprintf(fp,
|
||||||
fprintf(fp,
|
"{"
|
||||||
"{"
|
"\"label\":\"%s\","
|
||||||
"\"label\":\"%s\","
|
"\"rows\":%d,"
|
||||||
"\"rows\":%d,"
|
"\"cols\":%d,"
|
||||||
"\"cols\":%d,"
|
"\"nnz\":%d,"
|
||||||
"\"nnz\":%d,"
|
"\"spmv_runs\":%d,"
|
||||||
"\"spmv_runs\":%d,"
|
"\"spmv_total_ns\":%lld,"
|
||||||
"\"spmv_total_ns\":%lld,"
|
"\"spmv_avg_ns\":%lld,"
|
||||||
"\"spmv_avg_ns\":%lld,"
|
"\"dense_rows\":%d,"
|
||||||
"\"dense_rows\":%d,"
|
"\"dense_cols\":%d,"
|
||||||
"\"dense_cols\":%d,"
|
"\"dense_runs\":%d,"
|
||||||
"\"dense_runs\":%d,"
|
"\"dense_total_ns\":%lld,"
|
||||||
"\"dense_total_ns\":%lld,"
|
"\"dense_avg_ns\":%lld"
|
||||||
"\"dense_avg_ns\":%lld"
|
"}",
|
||||||
"}",
|
t->label, t->rows, t->cols, t->NNZ, t->SpMVRuns,
|
||||||
t->label,
|
(long long)t->SpMVTotalNs, (long long)t->SpMVAvgNs, t->DenseRows,
|
||||||
t->rows,
|
t->DenseCols, t->DenseRuns, (long long)t->DenseTotalNs,
|
||||||
t->cols,
|
(long long)t->DenseAvgNs);
|
||||||
t->NNZ,
|
|
||||||
t->SpMVRuns,
|
|
||||||
(long long)t->SpMVTotalNs,
|
|
||||||
(long long)t->SpMVAvgNs,
|
|
||||||
t->DenseRows,
|
|
||||||
t->DenseCols,
|
|
||||||
t->DenseRuns,
|
|
||||||
(long long)t->DenseTotalNs,
|
|
||||||
(long long)t->DenseAvgNs
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void timings_write_json_file(
|
static void timings_write_json_file(const char *path, const Timing *timings,
|
||||||
const char *path,
|
int count) {
|
||||||
const Timing *timings,
|
FILE *fp = NULL;
|
||||||
int count)
|
|
||||||
{
|
|
||||||
FILE *fp = NULL;
|
|
||||||
|
|
||||||
if (fopen_s(&fp, path, "w") != 0 || fp == NULL)
|
if (fopen_s(&fp, path, "w") != 0 || fp == NULL)
|
||||||
panic("failed to open json file");
|
panic("failed to open json file");
|
||||||
|
|
||||||
fprintf(fp, "[\n");
|
fprintf(fp, "[\n");
|
||||||
|
|
||||||
for (int i = 0; i < count; i++)
|
for (int i = 0; i < count; i++) {
|
||||||
{
|
fprintf(fp, " ");
|
||||||
fprintf(fp, " ");
|
timing_write_json(fp, &timings[i]);
|
||||||
timing_write_json(fp, &timings[i]);
|
|
||||||
|
|
||||||
if (i != count - 1)
|
if (i != count - 1)
|
||||||
fprintf(fp, ",");
|
fprintf(fp, ",");
|
||||||
|
|
||||||
fprintf(fp, "\n");
|
fprintf(fp, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(fp, "]\n");
|
fprintf(fp, "]\n");
|
||||||
|
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *trim_left(char *s) {
|
static char *trim_left(char *s) {
|
||||||
while (*s && isspace((unsigned char)*s)) s++;
|
while (*s && isspace((unsigned char)*s))
|
||||||
return s;
|
s++;
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int triplet_cmp(const void *a, const void *b) {
|
static inline int triplet_cmp(const void *a, const void *b) {
|
||||||
const Triplet *x = (const Triplet *)a;
|
const Triplet *x = (const Triplet *)a;
|
||||||
const Triplet *y = (const Triplet *)b;
|
const Triplet *y = (const Triplet *)b;
|
||||||
if (x->i < y->i) return -1;
|
if (x->i < y->i)
|
||||||
if (x->i > y->i) return 1;
|
return -1;
|
||||||
if (x->j < y->j) return -1;
|
if (x->i > y->i)
|
||||||
if (x->j > y->j) return 1;
|
return 1;
|
||||||
return 0;
|
if (x->j < y->j)
|
||||||
|
return -1;
|
||||||
|
if (x->j > y->j)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Exact-semantics "fast" version of the slow Matrix Market reader.
|
// Exact-semantics "fast" version of the slow Matrix Market reader.
|
||||||
// Keeps:
|
// Keeps:
|
||||||
// - general / symmetric handling
|
// - general / symmetric handling
|
||||||
@ -209,331 +201,309 @@ static inline int triplet_cmp(const void *a, const void *b) {
|
|||||||
// - hot-loop parsing (strtol/strtod instead of sscanf_s)
|
// - hot-loop parsing (strtol/strtod instead of sscanf_s)
|
||||||
|
|
||||||
CSRMatrix read_matrix_market_to_csr(const char *path) {
|
CSRMatrix read_matrix_market_to_csr(const char *path) {
|
||||||
FILE *fp = NULL;
|
FILE *fp = NULL;
|
||||||
errno_t err = fopen_s(&fp, path, "r");
|
errno_t err = fopen_s(&fp, path, "r");
|
||||||
if (err != 0 || fp == NULL) {
|
if (err != 0 || fp == NULL) {
|
||||||
panic("failed to open Matrix Market file");
|
panic("failed to open Matrix Market file");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bigger stdio buffer helps a lot for multi-GB files.
|
// Bigger stdio buffer helps a lot for multi-GB files.
|
||||||
setvbuf(fp, NULL, _IOFBF, 1 << 22); // 4 MiB
|
setvbuf(fp, NULL, _IOFBF, 1 << 22); // 4 MiB
|
||||||
|
|
||||||
char line[4096];
|
char line[4096];
|
||||||
|
|
||||||
if (fgets(line, sizeof(line), fp) == NULL) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("failed to read Matrix Market header");
|
|
||||||
}
|
|
||||||
|
|
||||||
char banner[64];
|
|
||||||
char object[64];
|
|
||||||
char format[64];
|
|
||||||
char field[64];
|
|
||||||
char symmetry[64];
|
|
||||||
|
|
||||||
int scanned = sscanf_s(
|
|
||||||
line,
|
|
||||||
"%63s %63s %63s %63s %63s",
|
|
||||||
banner, (unsigned)_countof(banner),
|
|
||||||
object, (unsigned)_countof(object),
|
|
||||||
format, (unsigned)_countof(format),
|
|
||||||
field, (unsigned)_countof(field),
|
|
||||||
symmetry, (unsigned)_countof(symmetry)
|
|
||||||
);
|
|
||||||
|
|
||||||
if (scanned != 5) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("invalid Matrix Market header");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp(banner, "%%MatrixMarket") != 0) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("not a Matrix Market file");
|
|
||||||
}
|
|
||||||
if (strcmp(object, "matrix") != 0) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("only 'matrix' object supported");
|
|
||||||
}
|
|
||||||
if (strcmp(format, "coordinate") != 0) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("only coordinate format supported");
|
|
||||||
}
|
|
||||||
|
|
||||||
int is_real = (strcmp(field, "real") == 0);
|
|
||||||
int is_integer = (strcmp(field, "integer") == 0);
|
|
||||||
int is_pattern = (strcmp(field, "pattern") == 0);
|
|
||||||
|
|
||||||
if (!is_real && !is_integer && !is_pattern) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("unsupported Matrix Market field type");
|
|
||||||
}
|
|
||||||
|
|
||||||
int is_general = (strcmp(symmetry, "general") == 0);
|
|
||||||
int is_symmetric = (strcmp(symmetry, "symmetric") == 0);
|
|
||||||
|
|
||||||
if (!is_general && !is_symmetric) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("unsupported Matrix Market symmetry");
|
|
||||||
}
|
|
||||||
|
|
||||||
int rows = 0;
|
|
||||||
int cols = 0;
|
|
||||||
int nnz_in_file = 0;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
if (fgets(line, sizeof(line), fp) == NULL) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("missing size line");
|
|
||||||
}
|
|
||||||
|
|
||||||
char *s = trim_left(line);
|
|
||||||
if (*s == '%') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
scanned = sscanf_s(s, "%d %d %d", &rows, &cols, &nnz_in_file);
|
|
||||||
if (scanned != 3) {
|
|
||||||
fclose(fp);
|
|
||||||
panic("invalid size line");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
int cap = is_symmetric ? 2 * nnz_in_file : nnz_in_file;
|
|
||||||
Triplet *trips = (Triplet *)xmalloc((size_t)cap * sizeof(Triplet));
|
|
||||||
int tcount = 0;
|
|
||||||
|
|
||||||
while (fgets(line, sizeof(line), fp) != NULL) {
|
|
||||||
char *s = trim_left(line);
|
|
||||||
|
|
||||||
if (*s == '\0' || *s == '\n' || *s == '%') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *p = s;
|
|
||||||
char *end = NULL;
|
|
||||||
|
|
||||||
long li = strtol(p, &end, 10);
|
|
||||||
if (end == p) {
|
|
||||||
fclose(fp);
|
|
||||||
free(trips);
|
|
||||||
panic("bad entry line: failed to parse row");
|
|
||||||
}
|
|
||||||
p = end;
|
|
||||||
|
|
||||||
long lj = strtol(p, &end, 10);
|
|
||||||
if (end == p) {
|
|
||||||
fclose(fp);
|
|
||||||
free(trips);
|
|
||||||
panic("bad entry line: failed to parse col");
|
|
||||||
}
|
|
||||||
p = end;
|
|
||||||
|
|
||||||
double v = 1.0;
|
|
||||||
|
|
||||||
if (is_pattern) {
|
|
||||||
// nothing else to parse
|
|
||||||
} else if (is_integer) {
|
|
||||||
long liv = strtol(p, &end, 10);
|
|
||||||
if (end == p) {
|
|
||||||
fclose(fp);
|
|
||||||
free(trips);
|
|
||||||
panic("bad integer entry line");
|
|
||||||
}
|
|
||||||
v = (double)liv;
|
|
||||||
p = end;
|
|
||||||
} else {
|
|
||||||
v = strtod(p, &end);
|
|
||||||
if (end == p) {
|
|
||||||
fclose(fp);
|
|
||||||
free(trips);
|
|
||||||
panic("bad real entry line");
|
|
||||||
}
|
|
||||||
p = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
int i = (int)li - 1;
|
|
||||||
int j = (int)lj - 1;
|
|
||||||
|
|
||||||
if (i < 0 || i >= rows || j < 0 || j >= cols) {
|
|
||||||
fclose(fp);
|
|
||||||
free(trips);
|
|
||||||
panic("entry index out of range");
|
|
||||||
}
|
|
||||||
|
|
||||||
trips[tcount].i = i;
|
|
||||||
trips[tcount].j = j;
|
|
||||||
trips[tcount].v = v;
|
|
||||||
tcount++;
|
|
||||||
|
|
||||||
if (is_symmetric && i != j) {
|
|
||||||
trips[tcount].i = j;
|
|
||||||
trips[tcount].j = i;
|
|
||||||
trips[tcount].v = v;
|
|
||||||
tcount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (fgets(line, sizeof(line), fp) == NULL) {
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
|
panic("failed to read Matrix Market header");
|
||||||
|
}
|
||||||
|
|
||||||
qsort(trips, (size_t)tcount, sizeof(Triplet), triplet_cmp);
|
char banner[64];
|
||||||
|
char object[64];
|
||||||
|
char format[64];
|
||||||
|
char field[64];
|
||||||
|
char symmetry[64];
|
||||||
|
|
||||||
Triplet *uniq = (Triplet *)xmalloc((size_t)tcount * sizeof(Triplet));
|
int scanned = sscanf_s(
|
||||||
int ucount = 0;
|
line, "%63s %63s %63s %63s %63s", banner, (unsigned)_countof(banner),
|
||||||
|
object, (unsigned)_countof(object), format, (unsigned)_countof(format),
|
||||||
|
field, (unsigned)_countof(field), symmetry, (unsigned)_countof(symmetry));
|
||||||
|
|
||||||
for (int k = 0; k < tcount;) {
|
if (scanned != 5) {
|
||||||
int i = trips[k].i;
|
fclose(fp);
|
||||||
int j = trips[k].j;
|
panic("invalid Matrix Market header");
|
||||||
double sum = 0.0;
|
}
|
||||||
|
|
||||||
while (k < tcount && trips[k].i == i && trips[k].j == j) {
|
if (strcmp(banner, "%%MatrixMarket") != 0) {
|
||||||
sum += trips[k].v;
|
fclose(fp);
|
||||||
k++;
|
panic("not a Matrix Market file");
|
||||||
}
|
}
|
||||||
|
if (strcmp(object, "matrix") != 0) {
|
||||||
|
fclose(fp);
|
||||||
|
panic("only 'matrix' object supported");
|
||||||
|
}
|
||||||
|
if (strcmp(format, "coordinate") != 0) {
|
||||||
|
fclose(fp);
|
||||||
|
panic("only coordinate format supported");
|
||||||
|
}
|
||||||
|
|
||||||
uniq[ucount].i = i;
|
int is_real = (strcmp(field, "real") == 0);
|
||||||
uniq[ucount].j = j;
|
int is_integer = (strcmp(field, "integer") == 0);
|
||||||
uniq[ucount].v = sum;
|
int is_pattern = (strcmp(field, "pattern") == 0);
|
||||||
ucount++;
|
|
||||||
|
if (!is_real && !is_integer && !is_pattern) {
|
||||||
|
fclose(fp);
|
||||||
|
panic("unsupported Matrix Market field type");
|
||||||
|
}
|
||||||
|
|
||||||
|
int is_general = (strcmp(symmetry, "general") == 0);
|
||||||
|
int is_symmetric = (strcmp(symmetry, "symmetric") == 0);
|
||||||
|
|
||||||
|
if (!is_general && !is_symmetric) {
|
||||||
|
fclose(fp);
|
||||||
|
panic("unsupported Matrix Market symmetry");
|
||||||
|
}
|
||||||
|
|
||||||
|
int rows = 0;
|
||||||
|
int cols = 0;
|
||||||
|
int nnz_in_file = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (fgets(line, sizeof(line), fp) == NULL) {
|
||||||
|
fclose(fp);
|
||||||
|
panic("missing size line");
|
||||||
}
|
}
|
||||||
|
|
||||||
free(trips);
|
char *s = trim_left(line);
|
||||||
|
if (*s == '%') {
|
||||||
CSRMatrix A;
|
continue;
|
||||||
A.rows = rows;
|
|
||||||
A.cols = cols;
|
|
||||||
A.nnz = ucount;
|
|
||||||
A.row_ptr = (int *)xcalloc((size_t)rows + 1, sizeof(int));
|
|
||||||
A.col_ind = (int *)xmalloc((size_t)ucount * sizeof(int));
|
|
||||||
A.values = (double *)xmalloc((size_t)ucount * sizeof(double));
|
|
||||||
|
|
||||||
for (int k = 0; k < ucount; k++) {
|
|
||||||
A.row_ptr[uniq[k].i + 1]++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < rows; i++) {
|
scanned = sscanf_s(s, "%d %d %d", &rows, &cols, &nnz_in_file);
|
||||||
A.row_ptr[i + 1] += A.row_ptr[i];
|
if (scanned != 3) {
|
||||||
|
fclose(fp);
|
||||||
|
panic("invalid size line");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cap = is_symmetric ? 2 * nnz_in_file : nnz_in_file;
|
||||||
|
Triplet *trips = (Triplet *)xmalloc((size_t)cap * sizeof(Triplet));
|
||||||
|
int tcount = 0;
|
||||||
|
|
||||||
|
while (fgets(line, sizeof(line), fp) != NULL) {
|
||||||
|
char *s = trim_left(line);
|
||||||
|
|
||||||
|
if (*s == '\0' || *s == '\n' || *s == '%') {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int *next = (int *)xmalloc((size_t)rows * sizeof(int));
|
char *p = s;
|
||||||
memcpy(next, A.row_ptr, (size_t)rows * sizeof(int));
|
char *end = NULL;
|
||||||
|
|
||||||
for (int k = 0; k < ucount; k++) {
|
long li = strtol(p, &end, 10);
|
||||||
int row = uniq[k].i;
|
if (end == p) {
|
||||||
int p = next[row]++;
|
fclose(fp);
|
||||||
|
free(trips);
|
||||||
|
panic("bad entry line: failed to parse row");
|
||||||
|
}
|
||||||
|
p = end;
|
||||||
|
|
||||||
A.col_ind[p] = uniq[k].j;
|
long lj = strtol(p, &end, 10);
|
||||||
A.values[p] = uniq[k].v;
|
if (end == p) {
|
||||||
|
fclose(fp);
|
||||||
|
free(trips);
|
||||||
|
panic("bad entry line: failed to parse col");
|
||||||
|
}
|
||||||
|
p = end;
|
||||||
|
|
||||||
|
double v = 1.0;
|
||||||
|
|
||||||
|
if (is_pattern) {
|
||||||
|
// nothing else to parse
|
||||||
|
} else if (is_integer) {
|
||||||
|
long liv = strtol(p, &end, 10);
|
||||||
|
if (end == p) {
|
||||||
|
fclose(fp);
|
||||||
|
free(trips);
|
||||||
|
panic("bad integer entry line");
|
||||||
|
}
|
||||||
|
v = (double)liv;
|
||||||
|
p = end;
|
||||||
|
} else {
|
||||||
|
v = strtod(p, &end);
|
||||||
|
if (end == p) {
|
||||||
|
fclose(fp);
|
||||||
|
free(trips);
|
||||||
|
panic("bad real entry line");
|
||||||
|
}
|
||||||
|
p = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
free(next);
|
int i = (int)li - 1;
|
||||||
free(uniq);
|
int j = (int)lj - 1;
|
||||||
|
|
||||||
return A;
|
if (i < 0 || i >= rows || j < 0 || j >= cols) {
|
||||||
|
fclose(fp);
|
||||||
|
free(trips);
|
||||||
|
panic("entry index out of range");
|
||||||
|
}
|
||||||
|
|
||||||
|
trips[tcount].i = i;
|
||||||
|
trips[tcount].j = j;
|
||||||
|
trips[tcount].v = v;
|
||||||
|
tcount++;
|
||||||
|
|
||||||
|
if (is_symmetric && i != j) {
|
||||||
|
trips[tcount].i = j;
|
||||||
|
trips[tcount].j = i;
|
||||||
|
trips[tcount].v = v;
|
||||||
|
tcount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
qsort(trips, (size_t)tcount, sizeof(Triplet), triplet_cmp);
|
||||||
|
|
||||||
|
Triplet *uniq = (Triplet *)xmalloc((size_t)tcount * sizeof(Triplet));
|
||||||
|
int ucount = 0;
|
||||||
|
|
||||||
|
for (int k = 0; k < tcount;) {
|
||||||
|
int i = trips[k].i;
|
||||||
|
int j = trips[k].j;
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
while (k < tcount && trips[k].i == i && trips[k].j == j) {
|
||||||
|
sum += trips[k].v;
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
|
||||||
|
uniq[ucount].i = i;
|
||||||
|
uniq[ucount].j = j;
|
||||||
|
uniq[ucount].v = sum;
|
||||||
|
ucount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(trips);
|
||||||
|
|
||||||
|
CSRMatrix A;
|
||||||
|
A.rows = rows;
|
||||||
|
A.cols = cols;
|
||||||
|
A.nnz = ucount;
|
||||||
|
A.row_ptr = (int *)xcalloc((size_t)rows + 1, sizeof(int));
|
||||||
|
A.col_ind = (int *)xmalloc((size_t)ucount * sizeof(int));
|
||||||
|
A.values = (double *)xmalloc((size_t)ucount * sizeof(double));
|
||||||
|
|
||||||
|
for (int k = 0; k < ucount; k++) {
|
||||||
|
A.row_ptr[uniq[k].i + 1]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < rows; i++) {
|
||||||
|
A.row_ptr[i + 1] += A.row_ptr[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
int *next = (int *)xmalloc((size_t)rows * sizeof(int));
|
||||||
|
memcpy(next, A.row_ptr, (size_t)rows * sizeof(int));
|
||||||
|
|
||||||
|
for (int k = 0; k < ucount; k++) {
|
||||||
|
int row = uniq[k].i;
|
||||||
|
int p = next[row]++;
|
||||||
|
|
||||||
|
A.col_ind[p] = uniq[k].j;
|
||||||
|
A.values[p] = uniq[k].v;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(next);
|
||||||
|
free(uniq);
|
||||||
|
|
||||||
|
return A;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_csr(CSRMatrix *A) {
|
static void free_csr(CSRMatrix *A) {
|
||||||
if (!A) return;
|
if (!A)
|
||||||
free(A->row_ptr);
|
return;
|
||||||
free(A->col_ind);
|
free(A->row_ptr);
|
||||||
free(A->values);
|
free(A->col_ind);
|
||||||
A->row_ptr = NULL;
|
free(A->values);
|
||||||
A->col_ind = NULL;
|
A->row_ptr = NULL;
|
||||||
A->values = NULL;
|
A->col_ind = NULL;
|
||||||
A->rows = 0;
|
A->values = NULL;
|
||||||
A->cols = 0;
|
A->rows = 0;
|
||||||
A->nnz = 0;
|
A->cols = 0;
|
||||||
|
A->nnz = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static sparse_matrix_t csr_to_mkl_handle(const CSRMatrix *A) {
|
static sparse_matrix_t csr_to_mkl_handle(const CSRMatrix *A) {
|
||||||
sparse_matrix_t H = NULL;
|
sparse_matrix_t H = NULL;
|
||||||
|
|
||||||
// oneMKL CSR creation takes row_start and row_end arrays.
|
// oneMKL CSR creation takes row_start and row_end arrays.
|
||||||
// With standard CSR row_ptr, these are row_ptr[i] and row_ptr[i+1].
|
// With standard CSR row_ptr, these are row_ptr[i] and row_ptr[i+1].
|
||||||
sparse_status_t st = mkl_sparse_d_create_csr(
|
sparse_status_t st = mkl_sparse_d_create_csr(
|
||||||
&H,
|
&H, SPARSE_INDEX_BASE_ZERO, A->rows, A->cols, A->row_ptr, A->row_ptr + 1,
|
||||||
SPARSE_INDEX_BASE_ZERO,
|
A->col_ind, A->values);
|
||||||
A->rows,
|
if (st != SPARSE_STATUS_SUCCESS)
|
||||||
A->cols,
|
panic("mkl_sparse_d_create_csr failed");
|
||||||
A->row_ptr,
|
|
||||||
A->row_ptr + 1,
|
|
||||||
A->col_ind,
|
|
||||||
A->values
|
|
||||||
);
|
|
||||||
if (st != SPARSE_STATUS_SUCCESS) panic("mkl_sparse_d_create_csr failed");
|
|
||||||
|
|
||||||
return H;
|
return H;
|
||||||
}
|
}
|
||||||
|
|
||||||
Timing timeSpMV(const CSRMatrix *A) {
|
Timing timeSpMV(const CSRMatrix *A) {
|
||||||
Timing out = {0};
|
Timing out = {0};
|
||||||
sparse_matrix_t H = csr_to_mkl_handle(A);
|
sparse_matrix_t H = csr_to_mkl_handle(A);
|
||||||
|
|
||||||
struct matrix_descr descr;
|
struct matrix_descr descr;
|
||||||
descr.type = SPARSE_MATRIX_TYPE_GENERAL;
|
descr.type = SPARSE_MATRIX_TYPE_GENERAL;
|
||||||
descr.mode = SPARSE_FILL_MODE_FULL;
|
descr.mode = SPARSE_FILL_MODE_FULL;
|
||||||
descr.diag = SPARSE_DIAG_NON_UNIT;
|
descr.diag = SPARSE_DIAG_NON_UNIT;
|
||||||
|
|
||||||
// Optional optimization path recommended by oneMKL.
|
// Optional optimization path recommended by oneMKL.
|
||||||
mkl_sparse_set_mv_hint(H, SPARSE_OPERATION_NON_TRANSPOSE, descr, g_spmv_runs);
|
mkl_sparse_set_mv_hint(H, SPARSE_OPERATION_NON_TRANSPOSE, descr, g_spmv_runs);
|
||||||
mkl_sparse_optimize(H);
|
mkl_sparse_optimize(H);
|
||||||
|
|
||||||
double *x = (double *)xmalloc((size_t)A->cols * sizeof(double));
|
double *x = (double *)xmalloc((size_t)A->cols * sizeof(double));
|
||||||
double *y = (double *)xcalloc((size_t)A->rows, sizeof(double));
|
double *y = (double *)xcalloc((size_t)A->rows, sizeof(double));
|
||||||
|
|
||||||
for (MKL_INT i = 0; i < A->cols; i++) x[i] = 1.0;
|
for (MKL_INT i = 0; i < A->cols; i++)
|
||||||
|
x[i] = 1.0;
|
||||||
|
|
||||||
// warmup
|
// warmup
|
||||||
for(int i = 0; i < 2; i += 1) {
|
for (int i = 0; i < 2; i += 1) {
|
||||||
sparse_status_t st = mkl_sparse_d_mv(
|
sparse_status_t st = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, H,
|
||||||
SPARSE_OPERATION_NON_TRANSPOSE,
|
descr, x, 0.0, y);
|
||||||
1.0,
|
if (st != SPARSE_STATUS_SUCCESS)
|
||||||
H,
|
panic("mkl_sparse_d_mv failed");
|
||||||
descr,
|
}
|
||||||
x,
|
|
||||||
0.0,
|
|
||||||
y
|
|
||||||
);
|
|
||||||
if (st != SPARSE_STATUS_SUCCESS) panic("mkl_sparse_d_mv failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
S64 t0 = now_ns();
|
S64 t0 = now_ns();
|
||||||
for (int i = 0; i < g_spmv_runs; i += 1) {
|
for (int i = 0; i < g_spmv_runs; i += 1) {
|
||||||
sparse_status_t st = mkl_sparse_d_mv(
|
sparse_status_t st = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, H,
|
||||||
SPARSE_OPERATION_NON_TRANSPOSE,
|
descr, x, 0.0, y);
|
||||||
1.0,
|
if (st != SPARSE_STATUS_SUCCESS)
|
||||||
H,
|
panic("mkl_sparse_d_mv failed");
|
||||||
descr,
|
}
|
||||||
x,
|
S64 t1 = now_ns();
|
||||||
0.0,
|
|
||||||
y
|
|
||||||
);
|
|
||||||
if (st != SPARSE_STATUS_SUCCESS) panic("mkl_sparse_d_mv failed");
|
|
||||||
}
|
|
||||||
S64 t1 = now_ns();
|
|
||||||
|
|
||||||
S64 elapsed_ns = t1-t0;
|
S64 elapsed_ns = t1 - t0;
|
||||||
|
|
||||||
out.rows = A->rows;
|
out.rows = A->rows;
|
||||||
out.cols = A->cols;
|
out.cols = A->cols;
|
||||||
out.NNZ = A->nnz;
|
out.NNZ = A->nnz;
|
||||||
out.SpMVRuns = g_spmv_runs;
|
out.SpMVRuns = g_spmv_runs;
|
||||||
out.SpMVTotalNs = elapsed_ns;
|
out.SpMVTotalNs = elapsed_ns;
|
||||||
out.SpMVAvgNs = elapsed_ns / g_spmv_runs;
|
out.SpMVAvgNs = elapsed_ns / g_spmv_runs;
|
||||||
|
|
||||||
printf("SpMV done for %d runs. y[0] = %.6g\n", g_spmv_runs, (A->rows > 0 ? y[0] : 0.0));
|
printf("SpMV done for %d runs. y[0] = %.6g\n", g_spmv_runs,
|
||||||
|
(A->rows > 0 ? y[0] : 0.0));
|
||||||
|
|
||||||
F64 avg_ms = (F64)out.SpMVAvgNs/1e6;
|
F64 avg_ms = (F64)out.SpMVAvgNs / 1e6;
|
||||||
printf("Average time for SpMV: %.3f ms \n", avg_ms);
|
printf("Average time for SpMV: %.3f ms \n", avg_ms);
|
||||||
|
|
||||||
free(x);
|
free(x);
|
||||||
free(y);
|
free(y);
|
||||||
|
|
||||||
|
mkl_sparse_destroy(H);
|
||||||
|
|
||||||
mkl_sparse_destroy(H);
|
return out;
|
||||||
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static DenseTiming timeDenseMatmul(int inRows) {
|
static DenseTiming timeDenseMatmul(int inRows) {
|
||||||
@ -545,78 +515,62 @@ static DenseTiming timeDenseMatmul(int inRows) {
|
|||||||
out.DenseRows = rows;
|
out.DenseRows = rows;
|
||||||
out.DenseCols = cols;
|
out.DenseCols = cols;
|
||||||
|
|
||||||
S64 byteReq = rows*cols*sizeof(F64);
|
S64 byteReq = rows * cols * sizeof(F64);
|
||||||
F64 GBreq = (F64)byteReq/(1024.0 * 1024.0 * 1024.0);
|
F64 GBreq = (F64)byteReq / (1024.0 * 1024.0 * 1024.0);
|
||||||
printf("Matrix of size %i x %i requires %.2f GB of memory \n", inRows, inRows, GBreq);
|
printf("Matrix of size %i x %i requires %.2f GB of memory \n", inRows, inRows,
|
||||||
|
GBreq);
|
||||||
|
|
||||||
MKL_INT n = rows;
|
MKL_INT n = rows;
|
||||||
|
|
||||||
// Row-major dense matrices: C = A * B
|
// Row-major dense matrices: C = A * B
|
||||||
F64 *left = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64));
|
F64 *left = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64));
|
||||||
F64 *right = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64));
|
F64 *right = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64));
|
||||||
F64 *outMat = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64));
|
F64 *outMat = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64));
|
||||||
|
|
||||||
// Fill deterministically
|
// Fill deterministically
|
||||||
for (MKL_INT i = 0; i < n * n; i++) {
|
for (MKL_INT i = 0; i < n * n; i++) {
|
||||||
left[i] = (F64)((i % 13) + 1) * 0.1;
|
left[i] = (F64)((i % 13) + 1) * 0.1;
|
||||||
right[i] = (F64)((i % 17) + 1) * 0.1;
|
right[i] = (F64)((i % 17) + 1) * 0.1;
|
||||||
outMat[i] = 0.0;
|
outMat[i] = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warmup
|
// Warmup
|
||||||
cblas_dgemm(
|
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, left, n,
|
||||||
CblasRowMajor,
|
right, n, 0.0, outMat, n);
|
||||||
CblasNoTrans,
|
|
||||||
CblasNoTrans,
|
|
||||||
n, n, n,
|
|
||||||
1.0,
|
|
||||||
left, n,
|
|
||||||
right, n,
|
|
||||||
0.0,
|
|
||||||
outMat, n
|
|
||||||
);
|
|
||||||
|
|
||||||
S64 t0 = now_ns();
|
S64 t0 = now_ns();
|
||||||
|
|
||||||
for (int i = 0; i < g_dense_runs; i += 1) {
|
for (int i = 0; i < g_dense_runs; i += 1) {
|
||||||
cblas_dgemm(
|
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, left,
|
||||||
CblasRowMajor,
|
n, right, n, 0.0, outMat, n);
|
||||||
CblasNoTrans,
|
}
|
||||||
CblasNoTrans,
|
|
||||||
n, n, n,
|
|
||||||
1.0,
|
|
||||||
left, n,
|
|
||||||
right, n,
|
|
||||||
0.0,
|
|
||||||
outMat, n
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
S64 t1 = now_ns();
|
S64 t1 = now_ns();
|
||||||
S64 elapsed_ns = t1 - t0;
|
S64 elapsed_ns = t1 - t0;
|
||||||
|
|
||||||
out.DenseRuns = g_dense_runs;
|
out.DenseRuns = g_dense_runs;
|
||||||
out.DenseTotalNs = elapsed_ns;
|
out.DenseTotalNs = elapsed_ns;
|
||||||
out.DenseAvgNs = elapsed_ns / (S64)g_dense_runs;
|
out.DenseAvgNs = elapsed_ns / (S64)g_dense_runs;
|
||||||
|
|
||||||
printf("Dense matmul done for %d runs. C[0] = %.6g\n",
|
printf("Dense matmul done for %d runs. C[0] = %.6g\n", g_dense_runs,
|
||||||
g_dense_runs, outMat[0]);
|
outMat[0]);
|
||||||
|
|
||||||
F64 avg_ms = (F64)out.DenseAvgNs / 1e6;
|
F64 avg_ms = (F64)out.DenseAvgNs / 1e6;
|
||||||
printf("Average time for dense matmul: %.3f ms\n", avg_ms);
|
printf("Average time for dense matmul: %.3f ms\n", avg_ms);
|
||||||
|
|
||||||
free(left);
|
free(left);
|
||||||
free(right);
|
free(right);
|
||||||
free(outMat);
|
free(outMat);
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
Timing doSpMVTimings(const char *path) {
|
Timing doSpMVTimings(const char *path) {
|
||||||
printf("Reading market matrix %s \n", path);
|
printf("Reading market matrix %s \n", path);
|
||||||
CSRMatrix A;
|
CSRMatrix A;
|
||||||
A = read_matrix_market_to_csr(path);
|
A = read_matrix_market_to_csr(path);
|
||||||
printf("Read matrix with size %d x %d and nnz = %d \n", A.rows, A.cols, A.nnz);
|
printf("Read matrix with size %d x %d and nnz = %d \n", A.rows, A.cols,
|
||||||
|
A.nnz);
|
||||||
Timing out = timeSpMV(&A);
|
Timing out = timeSpMV(&A);
|
||||||
matrix_label_from_path(path, out.label, sizeof(out.label));
|
matrix_label_from_path(path, out.label, sizeof(out.label));
|
||||||
free_csr(&A);
|
free_csr(&A);
|
||||||
@ -624,19 +578,16 @@ Timing doSpMVTimings(const char *path) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
|
||||||
S32 numPaths = 4;
|
S32 numPaths = 4;
|
||||||
int denseRows[] = {1024, 2048, 4096, 4096*2};
|
int denseRows[] = {1024, 2048, 4096, 4096 * 2};
|
||||||
|
|
||||||
const char *paths[] = {
|
const char *paths[] = {
|
||||||
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\FEM_3D_thermal2.mtx",
|
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\FEM_3D_thermal2.mtx",
|
||||||
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\ldoor.mtx",
|
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\ldoor.mtx",
|
||||||
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\Cube_Coup_dt0.mtx",
|
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\Cube_Coup_dt0.mtx",
|
||||||
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\nlpkkt200.mtx"
|
"E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\nlpkkt200.mtx"};
|
||||||
};
|
|
||||||
|
|
||||||
// Sanity check for threading.
|
// Sanity check for threading.
|
||||||
// Single thread gave avg 0.9 ms and 16 threads gave 0.14 msg avg
|
// Single thread gave avg 0.9 ms and 16 threads gave 0.14 msg avg
|
||||||
@ -653,8 +604,8 @@ int main() {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
{
|
{
|
||||||
mkl_set_num_threads(16); // pick your core count
|
mkl_set_num_threads(16); // pick your core count
|
||||||
mkl_set_dynamic(0); // disable MKL changing thread count dynamically
|
mkl_set_dynamic(0); // disable MKL changing thread count dynamically
|
||||||
printf("MKL max threads: %d\n", mkl_get_max_threads());
|
printf("MKL max threads: %d\n", mkl_get_max_threads());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user