I cant time these big matrices it takes too long and I have to check that I use native Gonum

This commit is contained in:
antonl 2026-03-15 19:10:12 +01:00
parent 11ef165017
commit d7c10045ba
2 changed files with 68 additions and 103 deletions

View File

@ -1,54 +0,0 @@
[
{
"label": "FEM_3D_thermal2",
"rows": 147900,
"cols": 147900,
"nnz": 3489300,
"matmul_runs": 16,
"matmul_total_ns": 1416892200,
"matmul_avg_ns": 88555762,
"matmul_output_nnz": 14335500,
"spmv_runs": 16,
"spmv_total_ns": 21017900,
"spmv_avg_ns": 1313618
},
{
"label": "ldoor",
"rows": 952203,
"cols": 952203,
"nnz": 23737339,
"matmul_runs": 16,
"matmul_total_ns": 6867829500,
"matmul_avg_ns": 429239343,
"matmul_output_nnz": 43783061,
"spmv_runs": 16,
"spmv_total_ns": 156089700,
"spmv_avg_ns": 9755606
},
{
"label": "Cube_Coup_dt0",
"rows": 2164760,
"cols": 2164760,
"nnz": 64685452,
"matmul_runs": 16,
"matmul_total_ns": 31149139200,
"matmul_avg_ns": 1946821200,
"matmul_output_nnz": 234465452,
"spmv_runs": 16,
"spmv_total_ns": 394415100,
"spmv_avg_ns": 24650943
},
{
"label": "nlpkkt200",
"rows": 16240000,
"cols": 16240000,
"nnz": 232232816,
"matmul_runs": 16,
"matmul_total_ns": 19196846100,
"matmul_avg_ns": 1199802881,
"matmul_output_nnz": 232232816,
"spmv_runs": 16,
"spmv_total_ns": 1844563400,
"spmv_avg_ns": 115285212
}
]

View File

@ -4,6 +4,7 @@ import (
"bufio"
"encoding/json"
"fmt"
"math/rand"
"os"
"path/filepath"
"runtime"
@ -16,18 +17,20 @@ import (
"gonum.org/v1/gonum/mat"
)
var gNumTestIterations = 16
var gNumTestIterations = 32
type SparseMatrixTiming struct {
type Timing struct {
Label string `json:"label"`
Rows int `json:"rows"`
Cols int `json:"cols"`
NNZ int `json:"nnz"`
MatMulRuns int `json:"matmul_runs"`
MatMulTotalNs int64 `json:"matmul_total_ns"`
MatMulAvgNs int64 `json:"matmul_avg_ns"`
MatMulOutputNNZ int `json:"matmul_output_nnz"`
DenseRows int `json:"dense_rows"`
DenseCols int `json:"dense_cols"`
DenseRuns int `json:"dense_runs"`
DenseTotalNs int64 `json:"dense_total_ns"`
DenseAvgNs int64 `json:"dense_avg_ns"`
SpMVRuns int `json:"spmv_runs"`
SpMVTotalNs int64 `json:"spmv_total_ns"`
@ -35,11 +38,11 @@ type SparseMatrixTiming struct {
}
type SparseBenchmarkCase struct {
Timing SparseMatrixTiming
Timing Timing
Matrix *sparse.CSR
}
func writeTimingJSON(all []SparseMatrixTiming, outPath string) {
func writeTimingJSON(all []Timing, outPath string) {
f, err := os.Create(outPath)
if err != nil {
panic(err)
@ -137,40 +140,6 @@ func getSparseBenchmarkCase(path string) SparseBenchmarkCase {
return out
}
func timeSparseMatmuls(bcase *SparseBenchmarkCase) {
mCSR := bcase.Matrix
var warm sparse.CSR
for i := 0; i < 3; i += 1 {
warm.Mul(mCSR, mCSR)
}
// The CSR x CSR matrix multiplication is supposed to have a specific optimised
// path for matmuls
var out sparse.CSR
numberOfMults := gNumTestIterations
fmt.Printf("NNZ before matmuls: %d\n", mCSR.NNZ())
timeBegin := time.Now()
for i := 0; i < numberOfMults; i += 1 {
out.Mul(mCSR, mCSR)
}
timeElapsed := time.Since(timeBegin)
fmt.Printf("NNZ after matmuls: %d\n", out.NNZ())
if warm.NNZ() != out.NNZ() {
panic("Sparsity pattern changed unexpectedly after matmul!")
}
bcase.Timing.MatMulOutputNNZ = out.NNZ()
bcase.Timing.MatMulRuns = numberOfMults
bcase.Timing.MatMulTotalNs = timeElapsed.Nanoseconds()
timeAvgNS := timeElapsed.Nanoseconds() / int64(numberOfMults)
bcase.Timing.MatMulAvgNs = timeAvgNS
}
func timeSparseMatVec(bcase *SparseBenchmarkCase) {
A := bcase.Matrix
rows, cols := A.Dims()
@ -208,11 +177,44 @@ func timeSparseMatVec(bcase *SparseBenchmarkCase) {
bcase.Timing.SpMVRuns = numberOfRuns
}
func timeDenseMatmul(rows int) (runs int, totalNs int64, avgNs int64) {
n := rows
A := mat.NewDense(n, n, nil)
B := mat.NewDense(n, n, nil)
C := mat.NewDense(n, n, nil)
// fill deterministically
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
A.Set(i, j, rand.Float64())
B.Set(i, j, rand.Float64())
}
}
runs = gNumTestIterations
// warmup
C.Mul(A, B)
start := time.Now()
for i := 0; i < runs; i++ {
C.Mul(A, B)
}
elapsed := time.Since(start)
totalNs = elapsed.Nanoseconds()
avgNs = totalNs / int64(runs)
return runs, totalNs, avgNs
}
func timeNanoToMS(timeNS int64) float64 {
return float64(timeNS) / float64(1e6)
}
func doTimings(path string) SparseMatrixTiming {
func doTimings(path string, denseRows int) Timing {
bcase := getSparseBenchmarkCase(path)
rows := bcase.Timing.Rows
cols := bcase.Timing.Cols
@ -223,10 +225,17 @@ func doTimings(path string) SparseMatrixTiming {
doMatMul := true
if doMatMul {
fmt.Printf("Timing sparse matrix %s with size: %d x %d \n", bcase.Timing.Label, rows, cols)
drows := denseRows
dcols := denseRows
bcase.Timing.DenseRows = drows
bcase.Timing.DenseCols = dcols
fmt.Printf("Timing dense matmulwith size: %d x %d \n", drows, dcols)
fmt.Printf("Matmul:\n")
timeSparseMatmuls(&bcase)
avgMatMulTimeMS := timeNanoToMS(bcase.Timing.MatMulAvgNs)
runs, totalNs, avgNs := timeDenseMatmul(denseRows)
bcase.Timing.DenseRuns = runs
bcase.Timing.DenseTotalNs = totalNs
bcase.Timing.DenseAvgNs = avgNs
avgMatMulTimeMS := timeNanoToMS(bcase.Timing.DenseAvgNs)
fmt.Printf("Avg matmul time for %s: %.4f ms \n", bcase.Timing.Label, avgMatMulTimeMS)
}
@ -253,10 +262,20 @@ func main() {
"suitesparse_test_matrices/nlpkkt200.mtx",
}
results := make([]SparseMatrixTiming, 0, len(paths))
denseRows := []int{
2048, 4096, 4096 * 2, 4096 * 4,
}
for _, path := range paths {
timing := doTimings(path)
numPaths := len(paths)
if numPaths != len(denseRows) {
panic("bleh")
}
results := make([]Timing, 0, numPaths)
for i, path := range paths {
denseRow := denseRows[i]
timing := doTimings(path, denseRow)
results = append(results, timing)
// Make sure to free previous matrices to be effective with RAM