From 10f92af6cff71c3465969c60a850310430f5d7a7 Mon Sep 17 00:00:00 2001 From: antonl Date: Wed, 18 Mar 2026 09:48:11 +0100 Subject: [PATCH] added clangd support --- build.bat | 29 +- compile_flags.txt | 8 + src/main.c | 825 ++++++++++++++++++++++------------------------ 3 files changed, 412 insertions(+), 450 deletions(-) create mode 100644 compile_flags.txt diff --git a/build.bat b/build.bat index b8ca9c8..9a364a6 100644 --- a/build.bat +++ b/build.bat @@ -12,23 +12,26 @@ if "%SRC%"=="" ( set OUT=%~n1.exe +set CFLAGS="-O3" "-march=native" "-ffast-math" "-fopenmp" "-Wall" "-I%MKL_ROOT%\include" +set LDFLAGS=-L%MKL_ROOT%\lib -L%COMPILER_ROOT%\lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -llibiomp5md + pushd build del /F /Q * -clang ../src/%SRC% -o %OUT% ^ - -O3 ^ - -march=native ^ - -ffast-math ^ - -fopenmp ^ - -I"%MKL_ROOT%\include" ^ - -L"%MKL_ROOT%\lib" ^ - -L"%COMPILER_ROOT%\lib" ^ - -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core ^ - -llibiomp5md ^ - -Wall + +clang ../src/%SRC% -o %OUT% %CFLAGS% %LDFLAGS% popd +@rem Generate a file for clangd to understand the include files and compiler flags +( + echo -xc + echo -std=c11 + for %%f in (%CFLAGS%) do echo %%~f +) > compile_flags.txt + echo Build complete: build/%OUT% -set PATH=%PATH%;E:\lib\intel_mkl\mkl\2025.3\bin -set PATH=%PATH%;E:\lib\intel_mkl\compiler\2025.3\bin +set PATH=%PATH%;%MKL_ROOT%\bin +set PATH=%PATH%;%COMPILER_ROOT%\bin + +endlocal diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..8f81c02 --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,8 @@ +-xc +-std=c11 +-O3 +-march=native +-ffast-math +-fopenmp +-Wall +-IE:\lib\intel_mkl\mkl\2025.3\include diff --git a/src/main.c b/src/main.c index 505e081..a8164ca 100644 --- a/src/main.c +++ b/src/main.c @@ -1,8 +1,8 @@ -#include +#include #include #include +#include #include -#include #define WIN32_LEAN_AND_MEAN #include @@ -28,7 +28,7 @@ struct CSRMatrix { MKL_INT nnz; MKL_INT *row_ptr; // size rows + 1 MKL_INT *col_ind; // size nnz - F64 *values; // size nnz + F64 *values; // size nnz }; typedef struct Triplet Triplet; @@ -38,7 +38,7 @@ struct Triplet { double v; }; -// I just ahve a separate dense timing struct that the timing +// I just ahve a separate dense timing struct that the timing // function fills out and then we transfer this information to the Timing struct // that is serialised to json typedef struct DenseTiming DenseTiming; @@ -75,129 +75,121 @@ static void panic(const char *msg) { } static void *xmalloc(size_t n) { - void *p = malloc(n); - if (!p) panic("out of memory"); - return p; + void *p = malloc(n); + if (!p) + panic("out of memory"); + return p; } static void *xcalloc(size_t count, size_t size) { void *p = calloc(count, size); - if(!p) { + if (!p) { panic("out of memory"); } return p; } -static void matrix_label_from_path(const char *path, char *out, size_t out_size) -{ - const char *base = path; +static void matrix_label_from_path(const char *path, char *out, + size_t out_size) { + const char *base = path; - const char *s1 = strrchr(path, '/'); - const char *s2 = strrchr(path, '\\'); + const char *s1 = strrchr(path, '/'); + const char *s2 = strrchr(path, '\\'); - if (s1 && s1 >= base) base = s1 + 1; - if (s2 && s2 >= base) base = s2 + 1; + if (s1 && s1 >= base) + base = s1 + 1; + if (s2 && s2 >= base) + base = s2 + 1; - strncpy_s(out, out_size, base, _TRUNCATE); + strncpy_s(out, out_size, base, _TRUNCATE); - char *ext = strrchr(out, '.'); - if (ext && strcmp(ext, ".mtx") == 0) { - *ext = '\0'; - } + char *ext = strrchr(out, '.'); + if (ext && strcmp(ext, ".mtx") == 0) { + *ext = '\0'; + } } -static S64 now_ns(void) -{ - static LARGE_INTEGER freq; - static B32 initialized = 0; +static S64 now_ns(void) { + static LARGE_INTEGER freq; + static B32 initialized = 0; - if (!initialized) { - QueryPerformanceFrequency(&freq); - initialized = 1; - } + if (!initialized) { + QueryPerformanceFrequency(&freq); + initialized = 1; + } - LARGE_INTEGER counter; - QueryPerformanceCounter(&counter); + LARGE_INTEGER counter; + QueryPerformanceCounter(&counter); - return (S64)((counter.QuadPart * 1000000000LL) / freq.QuadPart); + return (S64)((counter.QuadPart * 1000000000LL) / freq.QuadPart); } -static void timing_write_json(FILE *fp, const Timing *t) -{ -fprintf(fp, - "{" - "\"label\":\"%s\"," - "\"rows\":%d," - "\"cols\":%d," - "\"nnz\":%d," - "\"spmv_runs\":%d," - "\"spmv_total_ns\":%lld," - "\"spmv_avg_ns\":%lld," - "\"dense_rows\":%d," - "\"dense_cols\":%d," - "\"dense_runs\":%d," - "\"dense_total_ns\":%lld," - "\"dense_avg_ns\":%lld" - "}", - t->label, - t->rows, - t->cols, - t->NNZ, - t->SpMVRuns, - (long long)t->SpMVTotalNs, - (long long)t->SpMVAvgNs, - t->DenseRows, - t->DenseCols, - t->DenseRuns, - (long long)t->DenseTotalNs, - (long long)t->DenseAvgNs - ); +static void timing_write_json(FILE *fp, const Timing *t) { + fprintf(fp, + "{" + "\"label\":\"%s\"," + "\"rows\":%d," + "\"cols\":%d," + "\"nnz\":%d," + "\"spmv_runs\":%d," + "\"spmv_total_ns\":%lld," + "\"spmv_avg_ns\":%lld," + "\"dense_rows\":%d," + "\"dense_cols\":%d," + "\"dense_runs\":%d," + "\"dense_total_ns\":%lld," + "\"dense_avg_ns\":%lld" + "}", + t->label, t->rows, t->cols, t->NNZ, t->SpMVRuns, + (long long)t->SpMVTotalNs, (long long)t->SpMVAvgNs, t->DenseRows, + t->DenseCols, t->DenseRuns, (long long)t->DenseTotalNs, + (long long)t->DenseAvgNs); } -static void timings_write_json_file( - const char *path, - const Timing *timings, - int count) -{ - FILE *fp = NULL; +static void timings_write_json_file(const char *path, const Timing *timings, + int count) { + FILE *fp = NULL; - if (fopen_s(&fp, path, "w") != 0 || fp == NULL) - panic("failed to open json file"); + if (fopen_s(&fp, path, "w") != 0 || fp == NULL) + panic("failed to open json file"); - fprintf(fp, "[\n"); + fprintf(fp, "[\n"); - for (int i = 0; i < count; i++) - { - fprintf(fp, " "); - timing_write_json(fp, &timings[i]); + for (int i = 0; i < count; i++) { + fprintf(fp, " "); + timing_write_json(fp, &timings[i]); - if (i != count - 1) - fprintf(fp, ","); + if (i != count - 1) + fprintf(fp, ","); - fprintf(fp, "\n"); - } + fprintf(fp, "\n"); + } - fprintf(fp, "]\n"); + fprintf(fp, "]\n"); - fclose(fp); + fclose(fp); } static char *trim_left(char *s) { - while (*s && isspace((unsigned char)*s)) s++; - return s; + while (*s && isspace((unsigned char)*s)) + s++; + return s; } static inline int triplet_cmp(const void *a, const void *b) { - const Triplet *x = (const Triplet *)a; - const Triplet *y = (const Triplet *)b; - if (x->i < y->i) return -1; - if (x->i > y->i) return 1; - if (x->j < y->j) return -1; - if (x->j > y->j) return 1; - return 0; + const Triplet *x = (const Triplet *)a; + const Triplet *y = (const Triplet *)b; + if (x->i < y->i) + return -1; + if (x->i > y->i) + return 1; + if (x->j < y->j) + return -1; + if (x->j > y->j) + return 1; + return 0; } - // Exact-semantics "fast" version of the slow Matrix Market reader. // Keeps: // - general / symmetric handling @@ -209,331 +201,309 @@ static inline int triplet_cmp(const void *a, const void *b) { // - hot-loop parsing (strtol/strtod instead of sscanf_s) CSRMatrix read_matrix_market_to_csr(const char *path) { - FILE *fp = NULL; - errno_t err = fopen_s(&fp, path, "r"); - if (err != 0 || fp == NULL) { - panic("failed to open Matrix Market file"); - } + FILE *fp = NULL; + errno_t err = fopen_s(&fp, path, "r"); + if (err != 0 || fp == NULL) { + panic("failed to open Matrix Market file"); + } - // Bigger stdio buffer helps a lot for multi-GB files. - setvbuf(fp, NULL, _IOFBF, 1 << 22); // 4 MiB + // Bigger stdio buffer helps a lot for multi-GB files. + setvbuf(fp, NULL, _IOFBF, 1 << 22); // 4 MiB - char line[4096]; - - if (fgets(line, sizeof(line), fp) == NULL) { - fclose(fp); - panic("failed to read Matrix Market header"); - } - - char banner[64]; - char object[64]; - char format[64]; - char field[64]; - char symmetry[64]; - - int scanned = sscanf_s( - line, - "%63s %63s %63s %63s %63s", - banner, (unsigned)_countof(banner), - object, (unsigned)_countof(object), - format, (unsigned)_countof(format), - field, (unsigned)_countof(field), - symmetry, (unsigned)_countof(symmetry) - ); - - if (scanned != 5) { - fclose(fp); - panic("invalid Matrix Market header"); - } - - if (strcmp(banner, "%%MatrixMarket") != 0) { - fclose(fp); - panic("not a Matrix Market file"); - } - if (strcmp(object, "matrix") != 0) { - fclose(fp); - panic("only 'matrix' object supported"); - } - if (strcmp(format, "coordinate") != 0) { - fclose(fp); - panic("only coordinate format supported"); - } - - int is_real = (strcmp(field, "real") == 0); - int is_integer = (strcmp(field, "integer") == 0); - int is_pattern = (strcmp(field, "pattern") == 0); - - if (!is_real && !is_integer && !is_pattern) { - fclose(fp); - panic("unsupported Matrix Market field type"); - } - - int is_general = (strcmp(symmetry, "general") == 0); - int is_symmetric = (strcmp(symmetry, "symmetric") == 0); - - if (!is_general && !is_symmetric) { - fclose(fp); - panic("unsupported Matrix Market symmetry"); - } - - int rows = 0; - int cols = 0; - int nnz_in_file = 0; - - for (;;) { - if (fgets(line, sizeof(line), fp) == NULL) { - fclose(fp); - panic("missing size line"); - } - - char *s = trim_left(line); - if (*s == '%') { - continue; - } - - scanned = sscanf_s(s, "%d %d %d", &rows, &cols, &nnz_in_file); - if (scanned != 3) { - fclose(fp); - panic("invalid size line"); - } - break; - } - - int cap = is_symmetric ? 2 * nnz_in_file : nnz_in_file; - Triplet *trips = (Triplet *)xmalloc((size_t)cap * sizeof(Triplet)); - int tcount = 0; - - while (fgets(line, sizeof(line), fp) != NULL) { - char *s = trim_left(line); - - if (*s == '\0' || *s == '\n' || *s == '%') { - continue; - } - - char *p = s; - char *end = NULL; - - long li = strtol(p, &end, 10); - if (end == p) { - fclose(fp); - free(trips); - panic("bad entry line: failed to parse row"); - } - p = end; - - long lj = strtol(p, &end, 10); - if (end == p) { - fclose(fp); - free(trips); - panic("bad entry line: failed to parse col"); - } - p = end; - - double v = 1.0; - - if (is_pattern) { - // nothing else to parse - } else if (is_integer) { - long liv = strtol(p, &end, 10); - if (end == p) { - fclose(fp); - free(trips); - panic("bad integer entry line"); - } - v = (double)liv; - p = end; - } else { - v = strtod(p, &end); - if (end == p) { - fclose(fp); - free(trips); - panic("bad real entry line"); - } - p = end; - } - - int i = (int)li - 1; - int j = (int)lj - 1; - - if (i < 0 || i >= rows || j < 0 || j >= cols) { - fclose(fp); - free(trips); - panic("entry index out of range"); - } - - trips[tcount].i = i; - trips[tcount].j = j; - trips[tcount].v = v; - tcount++; - - if (is_symmetric && i != j) { - trips[tcount].i = j; - trips[tcount].j = i; - trips[tcount].v = v; - tcount++; - } - } + char line[4096]; + if (fgets(line, sizeof(line), fp) == NULL) { fclose(fp); + panic("failed to read Matrix Market header"); + } - qsort(trips, (size_t)tcount, sizeof(Triplet), triplet_cmp); + char banner[64]; + char object[64]; + char format[64]; + char field[64]; + char symmetry[64]; - Triplet *uniq = (Triplet *)xmalloc((size_t)tcount * sizeof(Triplet)); - int ucount = 0; + int scanned = sscanf_s( + line, "%63s %63s %63s %63s %63s", banner, (unsigned)_countof(banner), + object, (unsigned)_countof(object), format, (unsigned)_countof(format), + field, (unsigned)_countof(field), symmetry, (unsigned)_countof(symmetry)); - for (int k = 0; k < tcount;) { - int i = trips[k].i; - int j = trips[k].j; - double sum = 0.0; + if (scanned != 5) { + fclose(fp); + panic("invalid Matrix Market header"); + } - while (k < tcount && trips[k].i == i && trips[k].j == j) { - sum += trips[k].v; - k++; - } + if (strcmp(banner, "%%MatrixMarket") != 0) { + fclose(fp); + panic("not a Matrix Market file"); + } + if (strcmp(object, "matrix") != 0) { + fclose(fp); + panic("only 'matrix' object supported"); + } + if (strcmp(format, "coordinate") != 0) { + fclose(fp); + panic("only coordinate format supported"); + } - uniq[ucount].i = i; - uniq[ucount].j = j; - uniq[ucount].v = sum; - ucount++; + int is_real = (strcmp(field, "real") == 0); + int is_integer = (strcmp(field, "integer") == 0); + int is_pattern = (strcmp(field, "pattern") == 0); + + if (!is_real && !is_integer && !is_pattern) { + fclose(fp); + panic("unsupported Matrix Market field type"); + } + + int is_general = (strcmp(symmetry, "general") == 0); + int is_symmetric = (strcmp(symmetry, "symmetric") == 0); + + if (!is_general && !is_symmetric) { + fclose(fp); + panic("unsupported Matrix Market symmetry"); + } + + int rows = 0; + int cols = 0; + int nnz_in_file = 0; + + for (;;) { + if (fgets(line, sizeof(line), fp) == NULL) { + fclose(fp); + panic("missing size line"); } - free(trips); - - CSRMatrix A; - A.rows = rows; - A.cols = cols; - A.nnz = ucount; - A.row_ptr = (int *)xcalloc((size_t)rows + 1, sizeof(int)); - A.col_ind = (int *)xmalloc((size_t)ucount * sizeof(int)); - A.values = (double *)xmalloc((size_t)ucount * sizeof(double)); - - for (int k = 0; k < ucount; k++) { - A.row_ptr[uniq[k].i + 1]++; + char *s = trim_left(line); + if (*s == '%') { + continue; } - for (int i = 0; i < rows; i++) { - A.row_ptr[i + 1] += A.row_ptr[i]; + scanned = sscanf_s(s, "%d %d %d", &rows, &cols, &nnz_in_file); + if (scanned != 3) { + fclose(fp); + panic("invalid size line"); + } + break; + } + + int cap = is_symmetric ? 2 * nnz_in_file : nnz_in_file; + Triplet *trips = (Triplet *)xmalloc((size_t)cap * sizeof(Triplet)); + int tcount = 0; + + while (fgets(line, sizeof(line), fp) != NULL) { + char *s = trim_left(line); + + if (*s == '\0' || *s == '\n' || *s == '%') { + continue; } - int *next = (int *)xmalloc((size_t)rows * sizeof(int)); - memcpy(next, A.row_ptr, (size_t)rows * sizeof(int)); + char *p = s; + char *end = NULL; - for (int k = 0; k < ucount; k++) { - int row = uniq[k].i; - int p = next[row]++; + long li = strtol(p, &end, 10); + if (end == p) { + fclose(fp); + free(trips); + panic("bad entry line: failed to parse row"); + } + p = end; - A.col_ind[p] = uniq[k].j; - A.values[p] = uniq[k].v; + long lj = strtol(p, &end, 10); + if (end == p) { + fclose(fp); + free(trips); + panic("bad entry line: failed to parse col"); + } + p = end; + + double v = 1.0; + + if (is_pattern) { + // nothing else to parse + } else if (is_integer) { + long liv = strtol(p, &end, 10); + if (end == p) { + fclose(fp); + free(trips); + panic("bad integer entry line"); + } + v = (double)liv; + p = end; + } else { + v = strtod(p, &end); + if (end == p) { + fclose(fp); + free(trips); + panic("bad real entry line"); + } + p = end; } - free(next); - free(uniq); + int i = (int)li - 1; + int j = (int)lj - 1; - return A; -} + if (i < 0 || i >= rows || j < 0 || j >= cols) { + fclose(fp); + free(trips); + panic("entry index out of range"); + } + + trips[tcount].i = i; + trips[tcount].j = j; + trips[tcount].v = v; + tcount++; + + if (is_symmetric && i != j) { + trips[tcount].i = j; + trips[tcount].j = i; + trips[tcount].v = v; + tcount++; + } + } + + fclose(fp); + + qsort(trips, (size_t)tcount, sizeof(Triplet), triplet_cmp); + + Triplet *uniq = (Triplet *)xmalloc((size_t)tcount * sizeof(Triplet)); + int ucount = 0; + + for (int k = 0; k < tcount;) { + int i = trips[k].i; + int j = trips[k].j; + double sum = 0.0; + + while (k < tcount && trips[k].i == i && trips[k].j == j) { + sum += trips[k].v; + k++; + } + + uniq[ucount].i = i; + uniq[ucount].j = j; + uniq[ucount].v = sum; + ucount++; + } + + free(trips); + + CSRMatrix A; + A.rows = rows; + A.cols = cols; + A.nnz = ucount; + A.row_ptr = (int *)xcalloc((size_t)rows + 1, sizeof(int)); + A.col_ind = (int *)xmalloc((size_t)ucount * sizeof(int)); + A.values = (double *)xmalloc((size_t)ucount * sizeof(double)); + + for (int k = 0; k < ucount; k++) { + A.row_ptr[uniq[k].i + 1]++; + } + + for (int i = 0; i < rows; i++) { + A.row_ptr[i + 1] += A.row_ptr[i]; + } + + int *next = (int *)xmalloc((size_t)rows * sizeof(int)); + memcpy(next, A.row_ptr, (size_t)rows * sizeof(int)); + + for (int k = 0; k < ucount; k++) { + int row = uniq[k].i; + int p = next[row]++; + + A.col_ind[p] = uniq[k].j; + A.values[p] = uniq[k].v; + } + + free(next); + free(uniq); + + return A; +} static void free_csr(CSRMatrix *A) { - if (!A) return; - free(A->row_ptr); - free(A->col_ind); - free(A->values); - A->row_ptr = NULL; - A->col_ind = NULL; - A->values = NULL; - A->rows = 0; - A->cols = 0; - A->nnz = 0; + if (!A) + return; + free(A->row_ptr); + free(A->col_ind); + free(A->values); + A->row_ptr = NULL; + A->col_ind = NULL; + A->values = NULL; + A->rows = 0; + A->cols = 0; + A->nnz = 0; } static sparse_matrix_t csr_to_mkl_handle(const CSRMatrix *A) { - sparse_matrix_t H = NULL; + sparse_matrix_t H = NULL; - // oneMKL CSR creation takes row_start and row_end arrays. - // With standard CSR row_ptr, these are row_ptr[i] and row_ptr[i+1]. - sparse_status_t st = mkl_sparse_d_create_csr( - &H, - SPARSE_INDEX_BASE_ZERO, - A->rows, - A->cols, - A->row_ptr, - A->row_ptr + 1, - A->col_ind, - A->values - ); - if (st != SPARSE_STATUS_SUCCESS) panic("mkl_sparse_d_create_csr failed"); + // oneMKL CSR creation takes row_start and row_end arrays. + // With standard CSR row_ptr, these are row_ptr[i] and row_ptr[i+1]. + sparse_status_t st = mkl_sparse_d_create_csr( + &H, SPARSE_INDEX_BASE_ZERO, A->rows, A->cols, A->row_ptr, A->row_ptr + 1, + A->col_ind, A->values); + if (st != SPARSE_STATUS_SUCCESS) + panic("mkl_sparse_d_create_csr failed"); - return H; + return H; } Timing timeSpMV(const CSRMatrix *A) { - Timing out = {0}; - sparse_matrix_t H = csr_to_mkl_handle(A); + Timing out = {0}; + sparse_matrix_t H = csr_to_mkl_handle(A); - struct matrix_descr descr; - descr.type = SPARSE_MATRIX_TYPE_GENERAL; - descr.mode = SPARSE_FILL_MODE_FULL; - descr.diag = SPARSE_DIAG_NON_UNIT; + struct matrix_descr descr; + descr.type = SPARSE_MATRIX_TYPE_GENERAL; + descr.mode = SPARSE_FILL_MODE_FULL; + descr.diag = SPARSE_DIAG_NON_UNIT; - // Optional optimization path recommended by oneMKL. - mkl_sparse_set_mv_hint(H, SPARSE_OPERATION_NON_TRANSPOSE, descr, g_spmv_runs); - mkl_sparse_optimize(H); + // Optional optimization path recommended by oneMKL. + mkl_sparse_set_mv_hint(H, SPARSE_OPERATION_NON_TRANSPOSE, descr, g_spmv_runs); + mkl_sparse_optimize(H); - double *x = (double *)xmalloc((size_t)A->cols * sizeof(double)); - double *y = (double *)xcalloc((size_t)A->rows, sizeof(double)); + double *x = (double *)xmalloc((size_t)A->cols * sizeof(double)); + double *y = (double *)xcalloc((size_t)A->rows, sizeof(double)); - for (MKL_INT i = 0; i < A->cols; i++) x[i] = 1.0; + for (MKL_INT i = 0; i < A->cols; i++) + x[i] = 1.0; - // warmup - for(int i = 0; i < 2; i += 1) { - sparse_status_t st = mkl_sparse_d_mv( - SPARSE_OPERATION_NON_TRANSPOSE, - 1.0, - H, - descr, - x, - 0.0, - y - ); - if (st != SPARSE_STATUS_SUCCESS) panic("mkl_sparse_d_mv failed"); - } + // warmup + for (int i = 0; i < 2; i += 1) { + sparse_status_t st = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, H, + descr, x, 0.0, y); + if (st != SPARSE_STATUS_SUCCESS) + panic("mkl_sparse_d_mv failed"); + } - S64 t0 = now_ns(); - for (int i = 0; i < g_spmv_runs; i += 1) { - sparse_status_t st = mkl_sparse_d_mv( - SPARSE_OPERATION_NON_TRANSPOSE, - 1.0, - H, - descr, - x, - 0.0, - y - ); - if (st != SPARSE_STATUS_SUCCESS) panic("mkl_sparse_d_mv failed"); - } - S64 t1 = now_ns(); - - S64 elapsed_ns = t1-t0; + S64 t0 = now_ns(); + for (int i = 0; i < g_spmv_runs; i += 1) { + sparse_status_t st = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, H, + descr, x, 0.0, y); + if (st != SPARSE_STATUS_SUCCESS) + panic("mkl_sparse_d_mv failed"); + } + S64 t1 = now_ns(); - out.rows = A->rows; - out.cols = A->cols; - out.NNZ = A->nnz; - out.SpMVRuns = g_spmv_runs; - out.SpMVTotalNs = elapsed_ns; - out.SpMVAvgNs = elapsed_ns / g_spmv_runs; + S64 elapsed_ns = t1 - t0; - printf("SpMV done for %d runs. y[0] = %.6g\n", g_spmv_runs, (A->rows > 0 ? y[0] : 0.0)); + out.rows = A->rows; + out.cols = A->cols; + out.NNZ = A->nnz; + out.SpMVRuns = g_spmv_runs; + out.SpMVTotalNs = elapsed_ns; + out.SpMVAvgNs = elapsed_ns / g_spmv_runs; - F64 avg_ms = (F64)out.SpMVAvgNs/1e6; - printf("Average time for SpMV: %.3f ms \n", avg_ms); + printf("SpMV done for %d runs. y[0] = %.6g\n", g_spmv_runs, + (A->rows > 0 ? y[0] : 0.0)); - free(x); - free(y); - + F64 avg_ms = (F64)out.SpMVAvgNs / 1e6; + printf("Average time for SpMV: %.3f ms \n", avg_ms); - mkl_sparse_destroy(H); + free(x); + free(y); + mkl_sparse_destroy(H); - return out; + return out; } static DenseTiming timeDenseMatmul(int inRows) { @@ -545,78 +515,62 @@ static DenseTiming timeDenseMatmul(int inRows) { out.DenseRows = rows; out.DenseCols = cols; - S64 byteReq = rows*cols*sizeof(F64); - F64 GBreq = (F64)byteReq/(1024.0 * 1024.0 * 1024.0); - printf("Matrix of size %i x %i requires %.2f GB of memory \n", inRows, inRows, GBreq); + S64 byteReq = rows * cols * sizeof(F64); + F64 GBreq = (F64)byteReq / (1024.0 * 1024.0 * 1024.0); + printf("Matrix of size %i x %i requires %.2f GB of memory \n", inRows, inRows, + GBreq); MKL_INT n = rows; - // Row-major dense matrices: C = A * B - F64 *left = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64)); - F64 *right = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64)); - F64 *outMat = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64)); + // Row-major dense matrices: C = A * B + F64 *left = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64)); + F64 *right = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64)); + F64 *outMat = (F64 *)xmalloc((size_t)n * (size_t)n * sizeof(F64)); - // Fill deterministically - for (MKL_INT i = 0; i < n * n; i++) { - left[i] = (F64)((i % 13) + 1) * 0.1; - right[i] = (F64)((i % 17) + 1) * 0.1; - outMat[i] = 0.0; - } + // Fill deterministically + for (MKL_INT i = 0; i < n * n; i++) { + left[i] = (F64)((i % 13) + 1) * 0.1; + right[i] = (F64)((i % 17) + 1) * 0.1; + outMat[i] = 0.0; + } - // Warmup - cblas_dgemm( - CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - n, n, n, - 1.0, - left, n, - right, n, - 0.0, - outMat, n - ); + // Warmup + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, left, n, + right, n, 0.0, outMat, n); - S64 t0 = now_ns(); + S64 t0 = now_ns(); - for (int i = 0; i < g_dense_runs; i += 1) { - cblas_dgemm( - CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - n, n, n, - 1.0, - left, n, - right, n, - 0.0, - outMat, n - ); - } + for (int i = 0; i < g_dense_runs; i += 1) { + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, left, + n, right, n, 0.0, outMat, n); + } - S64 t1 = now_ns(); - S64 elapsed_ns = t1 - t0; + S64 t1 = now_ns(); + S64 elapsed_ns = t1 - t0; - out.DenseRuns = g_dense_runs; - out.DenseTotalNs = elapsed_ns; - out.DenseAvgNs = elapsed_ns / (S64)g_dense_runs; + out.DenseRuns = g_dense_runs; + out.DenseTotalNs = elapsed_ns; + out.DenseAvgNs = elapsed_ns / (S64)g_dense_runs; - printf("Dense matmul done for %d runs. C[0] = %.6g\n", - g_dense_runs, outMat[0]); + printf("Dense matmul done for %d runs. C[0] = %.6g\n", g_dense_runs, + outMat[0]); - F64 avg_ms = (F64)out.DenseAvgNs / 1e6; - printf("Average time for dense matmul: %.3f ms\n", avg_ms); + F64 avg_ms = (F64)out.DenseAvgNs / 1e6; + printf("Average time for dense matmul: %.3f ms\n", avg_ms); - free(left); - free(right); - free(outMat); + free(left); + free(right); + free(outMat); - return out; + return out; } Timing doSpMVTimings(const char *path) { printf("Reading market matrix %s \n", path); CSRMatrix A; A = read_matrix_market_to_csr(path); - printf("Read matrix with size %d x %d and nnz = %d \n", A.rows, A.cols, A.nnz); + printf("Read matrix with size %d x %d and nnz = %d \n", A.rows, A.cols, + A.nnz); Timing out = timeSpMV(&A); matrix_label_from_path(path, out.label, sizeof(out.label)); free_csr(&A); @@ -624,21 +578,18 @@ Timing doSpMVTimings(const char *path) { return out; } - - int main() { S32 numPaths = 4; - int denseRows[] = {1024, 2048, 4096, 4096*2}; + int denseRows[] = {1024, 2048, 4096, 4096 * 2}; const char *paths[] = { - "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\FEM_3D_thermal2.mtx", - "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\ldoor.mtx", - "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\Cube_Coup_dt0.mtx", - "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\nlpkkt200.mtx" - }; + "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\FEM_3D_thermal2.mtx", + "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\ldoor.mtx", + "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\Cube_Coup_dt0.mtx", + "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\nlpkkt200.mtx"}; -// Sanity check for threading. +// Sanity check for threading. // Single thread gave avg 0.9 ms and 16 threads gave 0.14 msg avg #if 0 { @@ -650,11 +601,11 @@ int main() { mkl_set_num_threads(16); doTimings(paths[0]); } -#endif +#endif { - mkl_set_num_threads(16); // pick your core count - mkl_set_dynamic(0); // disable MKL changing thread count dynamically + mkl_set_num_threads(16); // pick your core count + mkl_set_dynamic(0); // disable MKL changing thread count dynamically printf("MKL max threads: %d\n", mkl_get_max_threads()); }