reading matrix market format in C

This commit is contained in:
antonl 2026-03-15 14:31:47 +01:00
parent 2e177250db
commit c42b7d98df

View File

@ -1,6 +1,11 @@
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "mkl.h"
#include "mkl_spblas.h"
typedef int32_t S32;
typedef uint32_t U32;
@ -9,13 +14,302 @@ typedef double F64;
typedef int32_t B32;
typedef struct CSRMatrix CSRMatrix;
struct CSRMatrix {
MKL_INT rows;
MKL_INT cols;
MKL_INT nnz;
MKL_INT *row_ptr; // size rows + 1
MKL_INT *col_ind; // size nnz
F64 *values; // size nnz
};
typedef struct Triplet Triplet;
struct Triplet {
MKL_INT i;
MKL_INT j;
double v;
};
static void panic(const char *msg) {
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}
static void *xmalloc(size_t n) {
void *p = malloc(n);
if (!p) panic("out of memory");
return p;
}
static void *xcalloc(size_t count, size_t size) {
void *p = calloc(count, size);
if(!p) {
panic("out of memory");
}
return p;
}
static char *trim_left(char *s) {
while (*s && isspace((unsigned char)*s)) s++;
return s;
}
/*
static int starts_with(const char *s, const char *prefix) {
while (*prefix) {
if (*s++ != *prefix++) return 0;
}
return 1;
}
*/
static int triplet_cmp(const void *a, const void *b) {
const Triplet *x = (const Triplet *)a;
const Triplet *y = (const Triplet *)b;
if (x->i < y->i) return -1;
if (x->i > y->i) return 1;
if (x->j < y->j) return -1;
if (x->j > y->j) return 1;
return 0;
}
// Supports common Matrix Market coordinate cases:
// %%MatrixMarket matrix coordinate real general
// %%MatrixMarket matrix coordinate integer general
// %%MatrixMarket matrix coordinate pattern general
// %%MatrixMarket matrix coordinate real symmetric
//
// For symmetric matrices, off-diagonal entries are mirrored.
// Duplicate entries are summed during COO->CSR compaction.
CSRMatrix read_matrix_market_to_csr(const char *path) {
FILE *fp = NULL;
errno_t err = fopen_s(&fp, path, "r");
if (err != 0 || fp == NULL) {
panic("failed to open Matrix Market file");
}
char line[4096];
if (fgets(line, sizeof(line), fp) == NULL) {
fclose(fp);
panic("failed to read Matrix Market header");
}
char banner[64];
char object[64];
char format[64];
char field[64];
char symmetry[64];
int scanned = sscanf_s(
line,
"%63s %63s %63s %63s %63s",
banner, (unsigned)_countof(banner),
object, (unsigned)_countof(object),
format, (unsigned)_countof(format),
field, (unsigned)_countof(field),
symmetry, (unsigned)_countof(symmetry)
);
if (scanned != 5) {
fclose(fp);
panic("invalid Matrix Market header");
}
if (strcmp(banner, "%%MatrixMarket") != 0) {
fclose(fp);
panic("not a Matrix Market file");
}
if (strcmp(object, "matrix") != 0) {
fclose(fp);
panic("only 'matrix' object supported");
}
if (strcmp(format, "coordinate") != 0) {
fclose(fp);
panic("only coordinate format supported");
}
int is_real = (strcmp(field, "real") == 0);
int is_integer = (strcmp(field, "integer") == 0);
int is_pattern = (strcmp(field, "pattern") == 0);
if (!is_real && !is_integer && !is_pattern) {
fclose(fp);
panic("unsupported Matrix Market field type");
}
int is_general = (strcmp(symmetry, "general") == 0);
int is_symmetric = (strcmp(symmetry, "symmetric") == 0);
if (!is_general && !is_symmetric) {
fclose(fp);
panic("unsupported Matrix Market symmetry");
}
int rows = 0;
int cols = 0;
int nnz_in_file = 0;
for (;;) {
if (fgets(line, sizeof(line), fp) == NULL) {
fclose(fp);
panic("missing size line");
}
char *s = trim_left(line);
if (*s == '%') {
continue;
}
scanned = sscanf_s(s, "%d %d %d", &rows, &cols, &nnz_in_file);
if (scanned != 3) {
fclose(fp);
panic("invalid size line");
}
break;
}
int cap = is_symmetric ? 2 * nnz_in_file : nnz_in_file;
Triplet *trips = (Triplet *)xmalloc((size_t)cap * sizeof(Triplet));
int tcount = 0;
while (fgets(line, sizeof(line), fp) != NULL) {
char *s = trim_left(line);
if (*s == '\0' || *s == '\n' || *s == '%') {
continue;
}
int i = 0;
int j = 0;
double v = 1.0;
if (is_pattern) {
scanned = sscanf_s(s, "%d %d", &i, &j);
if (scanned != 2) {
fclose(fp);
free(trips);
panic("bad pattern entry line");
}
} else if (is_integer) {
int iv = 0;
scanned = sscanf_s(s, "%d %d %d", &i, &j, &iv);
if (scanned != 3) {
fclose(fp);
free(trips);
panic("bad integer entry line");
}
v = (double)iv;
} else {
scanned = sscanf_s(s, "%d %d %lf", &i, &j, &v);
if (scanned != 3) {
fclose(fp);
free(trips);
panic("bad real entry line");
}
}
i -= 1;
j -= 1;
if (i < 0 || i >= rows || j < 0 || j >= cols) {
fclose(fp);
free(trips);
panic("entry index out of range");
}
trips[tcount].i = i;
trips[tcount].j = j;
trips[tcount].v = v;
tcount++;
if (is_symmetric && i != j) {
trips[tcount].i = j;
trips[tcount].j = i;
trips[tcount].v = v;
tcount++;
}
}
fclose(fp);
qsort(trips, (size_t)tcount, sizeof(Triplet), triplet_cmp);
Triplet *uniq = (Triplet *)xmalloc((size_t)tcount * sizeof(Triplet));
int ucount = 0;
for (int k = 0; k < tcount;) {
int i = trips[k].i;
int j = trips[k].j;
double sum = 0.0;
while (k < tcount && trips[k].i == i && trips[k].j == j) {
sum += trips[k].v;
k++;
}
uniq[ucount].i = i;
uniq[ucount].j = j;
uniq[ucount].v = sum;
ucount++;
}
free(trips);
CSRMatrix A;
A.rows = rows;
A.cols = cols;
A.nnz = ucount;
A.row_ptr = (int *)xcalloc((size_t)rows + 1, sizeof(int));
A.col_ind = (int *)xmalloc((size_t)ucount * sizeof(int));
A.values = (double *)xmalloc((size_t)ucount * sizeof(double));
for (int k = 0; k < ucount; k++) {
A.row_ptr[uniq[k].i + 1]++;
}
for (int i = 0; i < rows; i++) {
A.row_ptr[i + 1] += A.row_ptr[i];
}
int *next = (int *)xmalloc((size_t)rows * sizeof(int));
memcpy(next, A.row_ptr, (size_t)rows * sizeof(int));
for (int k = 0; k < ucount; k++) {
int row = uniq[k].i;
int p = next[row]++;
A.col_ind[p] = uniq[k].j;
A.values[p] = uniq[k].v;
}
free(next);
free(uniq);
return A;
}
static void free_csr(CSRMatrix *A) {
if (!A) return;
free(A->row_ptr);
free(A->col_ind);
free(A->values);
A->row_ptr = NULL;
A->col_ind = NULL;
A->values = NULL;
A->rows = 0;
A->cols = 0;
A->nnz = 0;
}
int main() {
printf(" ******* \n HELLO WARRUDU \n ****\n");
const char *thermal2Path = "E:\\dev\\go_matmul_perf\\suitesparse_test_matrices\\FEM_3D_thermal2.mtx";
printf("Reading market matrix %s \n", thermal2Path);
CSRMatrix A;
A = read_matrix_market_to_csr(thermal2Path);
printf("Read matrix with size %d x %d and nnz = %d \n", A.rows, A.cols, A.nnz);
free_csr(&A);