Avatar for the OpenMathLib user
OpenMathLib
OpenBLAS
BlogDocsChangelog

POWER10: Reduce sgemm loop unrolling

#5592Merged
Comparing
RajalakshmiSR:sgemm-p10-unroll
(
2283fcb
) with
develop
(
e4344de
)
CodSpeed Performance Gauge
0%
Untouched
62

Benchmarks

62 total
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+2%
49.1 ms48.1 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
65.2 ms64.8 ms
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.8 µs28.7 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
22.5 µs22.4 µs
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
38.2 µs38 µs
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
42.3 µs42.1 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.3 µs24.2 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.8 µs32.7 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26 µs25.9 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37.3 µs37.2 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
27.6 µs27.5 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
35.3 µs35.2 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.5 µs32.4 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37.9 µs37.8 µs
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.1 µs24.1 µs
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.2 ms1.2 ms
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.5 µs28.5 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.1 µs25.1 µs
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
938 µs937 µs
test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
140.8 µs140.6 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
30.4 µs30.4 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
118.9 µs118.8 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
99.4 µs99.4 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
104.1 µs104 µs
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
353.6 ms353.4 ms

Commits

Click on a commit to change the comparison range
Base
develop
e4344de
+0.08%
POWER10: Reduce sgemm loop unrolling
2283fcb
5 months ago
© 2026 CodSpeed Technology
Home Terms Privacy Docs