diff --git a/goSparseResults.json b/goSparseResults.json deleted file mode 100644 index cf3c76b..0000000 --- a/goSparseResults.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "label": "FEM_3D_thermal2", - "rows": 147900, - "cols": 147900, - "nnz": 3489300, - "matmul_runs": 16, - "matmul_total_ns": 1416892200, - "matmul_avg_ns": 88555762, - "matmul_output_nnz": 14335500, - "spmv_runs": 16, - "spmv_total_ns": 21017900, - "spmv_avg_ns": 1313618 - }, - { - "label": "ldoor", - "rows": 952203, - "cols": 952203, - "nnz": 23737339, - "matmul_runs": 16, - "matmul_total_ns": 6867829500, - "matmul_avg_ns": 429239343, - "matmul_output_nnz": 43783061, - "spmv_runs": 16, - "spmv_total_ns": 156089700, - "spmv_avg_ns": 9755606 - }, - { - "label": "Cube_Coup_dt0", - "rows": 2164760, - "cols": 2164760, - "nnz": 64685452, - "matmul_runs": 16, - "matmul_total_ns": 31149139200, - "matmul_avg_ns": 1946821200, - "matmul_output_nnz": 234465452, - "spmv_runs": 16, - "spmv_total_ns": 394415100, - "spmv_avg_ns": 24650943 - }, - { - "label": "nlpkkt200", - "rows": 16240000, - "cols": 16240000, - "nnz": 232232816, - "matmul_runs": 16, - "matmul_total_ns": 19196846100, - "matmul_avg_ns": 1199802881, - "matmul_output_nnz": 232232816, - "spmv_runs": 16, - "spmv_total_ns": 1844563400, - "spmv_avg_ns": 115285212 - } -] diff --git a/src/main.go b/src/main.go index 30a2385..8df258b 100644 --- a/src/main.go +++ b/src/main.go @@ -4,6 +4,7 @@ import ( "bufio" "encoding/json" "fmt" + "math/rand" "os" "path/filepath" "runtime" @@ -16,18 +17,20 @@ import ( "gonum.org/v1/gonum/mat" ) -var gNumTestIterations = 16 +var gNumTestIterations = 32 -type SparseMatrixTiming struct { +type Timing struct { Label string `json:"label"` Rows int `json:"rows"` Cols int `json:"cols"` NNZ int `json:"nnz"` - MatMulRuns int `json:"matmul_runs"` - MatMulTotalNs int64 `json:"matmul_total_ns"` - MatMulAvgNs int64 `json:"matmul_avg_ns"` - MatMulOutputNNZ int `json:"matmul_output_nnz"` + DenseRows int `json:"dense_rows"` + DenseCols int `json:"dense_cols"` + + DenseRuns int `json:"dense_runs"` + DenseTotalNs int64 `json:"dense_total_ns"` + DenseAvgNs int64 `json:"dense_avg_ns"` SpMVRuns int `json:"spmv_runs"` SpMVTotalNs int64 `json:"spmv_total_ns"` @@ -35,11 +38,11 @@ type SparseMatrixTiming struct { } type SparseBenchmarkCase struct { - Timing SparseMatrixTiming + Timing Timing Matrix *sparse.CSR } -func writeTimingJSON(all []SparseMatrixTiming, outPath string) { +func writeTimingJSON(all []Timing, outPath string) { f, err := os.Create(outPath) if err != nil { panic(err) @@ -137,40 +140,6 @@ func getSparseBenchmarkCase(path string) SparseBenchmarkCase { return out } -func timeSparseMatmuls(bcase *SparseBenchmarkCase) { - mCSR := bcase.Matrix - var warm sparse.CSR - for i := 0; i < 3; i += 1 { - warm.Mul(mCSR, mCSR) - } - - // The CSR x CSR matrix multiplication is supposed to have a specific optimised - // path for matmuls - var out sparse.CSR - numberOfMults := gNumTestIterations - - fmt.Printf("NNZ before matmuls: %d\n", mCSR.NNZ()) - - timeBegin := time.Now() - for i := 0; i < numberOfMults; i += 1 { - out.Mul(mCSR, mCSR) - } - timeElapsed := time.Since(timeBegin) - - fmt.Printf("NNZ after matmuls: %d\n", out.NNZ()) - - if warm.NNZ() != out.NNZ() { - panic("Sparsity pattern changed unexpectedly after matmul!") - } - - bcase.Timing.MatMulOutputNNZ = out.NNZ() - bcase.Timing.MatMulRuns = numberOfMults - bcase.Timing.MatMulTotalNs = timeElapsed.Nanoseconds() - - timeAvgNS := timeElapsed.Nanoseconds() / int64(numberOfMults) - bcase.Timing.MatMulAvgNs = timeAvgNS -} - func timeSparseMatVec(bcase *SparseBenchmarkCase) { A := bcase.Matrix rows, cols := A.Dims() @@ -208,11 +177,44 @@ func timeSparseMatVec(bcase *SparseBenchmarkCase) { bcase.Timing.SpMVRuns = numberOfRuns } +func timeDenseMatmul(rows int) (runs int, totalNs int64, avgNs int64) { + n := rows + A := mat.NewDense(n, n, nil) + B := mat.NewDense(n, n, nil) + C := mat.NewDense(n, n, nil) + + // fill deterministically + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + A.Set(i, j, rand.Float64()) + B.Set(i, j, rand.Float64()) + } + } + + runs = gNumTestIterations + + // warmup + C.Mul(A, B) + + start := time.Now() + + for i := 0; i < runs; i++ { + C.Mul(A, B) + } + + elapsed := time.Since(start) + + totalNs = elapsed.Nanoseconds() + avgNs = totalNs / int64(runs) + + return runs, totalNs, avgNs +} + func timeNanoToMS(timeNS int64) float64 { return float64(timeNS) / float64(1e6) } -func doTimings(path string) SparseMatrixTiming { +func doTimings(path string, denseRows int) Timing { bcase := getSparseBenchmarkCase(path) rows := bcase.Timing.Rows cols := bcase.Timing.Cols @@ -223,10 +225,17 @@ func doTimings(path string) SparseMatrixTiming { doMatMul := true if doMatMul { - fmt.Printf("Timing sparse matrix %s with size: %d x %d \n", bcase.Timing.Label, rows, cols) + drows := denseRows + dcols := denseRows + bcase.Timing.DenseRows = drows + bcase.Timing.DenseCols = dcols + fmt.Printf("Timing dense matmulwith size: %d x %d \n", drows, dcols) fmt.Printf("Matmul:\n") - timeSparseMatmuls(&bcase) - avgMatMulTimeMS := timeNanoToMS(bcase.Timing.MatMulAvgNs) + runs, totalNs, avgNs := timeDenseMatmul(denseRows) + bcase.Timing.DenseRuns = runs + bcase.Timing.DenseTotalNs = totalNs + bcase.Timing.DenseAvgNs = avgNs + avgMatMulTimeMS := timeNanoToMS(bcase.Timing.DenseAvgNs) fmt.Printf("Avg matmul time for %s: %.4f ms \n", bcase.Timing.Label, avgMatMulTimeMS) } @@ -253,10 +262,20 @@ func main() { "suitesparse_test_matrices/nlpkkt200.mtx", } - results := make([]SparseMatrixTiming, 0, len(paths)) + denseRows := []int{ + 2048, 4096, 4096 * 2, 4096 * 4, + } - for _, path := range paths { - timing := doTimings(path) + numPaths := len(paths) + if numPaths != len(denseRows) { + panic("bleh") + } + + results := make([]Timing, 0, numPaths) + + for i, path := range paths { + denseRow := denseRows[i] + timing := doTimings(path, denseRow) results = append(results, timing) // Make sure to free previous matrices to be effective with RAM