Avatar for the OpenMathLib user
OpenMathLib
OpenBLAS
BlogDocsChangelog

Add vector registers to clobber list to prevent compiler optimization.

#5203Merged
Comparing
quic:fix-sgemmdirect-sme1
(
04915be
) with
develop
(
3e961c2
)
CodSpeed Performance Gauge
0%
Improvements
0
Regressions
0
Untouched
62
New
0
Dropped
0
Ignored
0

Benchmarks

Passed

test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-d]
CodSpeed Performance Gauge
+1%
147.6 µs
146.8 µs
test_dgemv[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-c]
CodSpeed Performance Gauge
+1%
149.3 µs
148.5 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-z]
CodSpeed Performance Gauge
0%
40.1 µs
40 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-s]
CodSpeed Performance Gauge
0%
110.2 µs
109.8 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-z]
CodSpeed Performance Gauge
0%
25.3 µs
25.3 µs
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-z]
CodSpeed Performance Gauge
0%
41.5 µs
41.4 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[1000-dz]
CodSpeed Performance Gauge
0%
34.9 µs
34.8 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-d]
CodSpeed Performance Gauge
0%
31.9 µs
31.8 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-c]
CodSpeed Performance Gauge
0%
24.6 µs
24.6 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-d]
CodSpeed Performance Gauge
0%
23.6 µs
23.5 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py::test_dot[1000]
CodSpeed Performance Gauge
0%
27.9 µs
27.8 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-s]
CodSpeed Performance Gauge
0%
26.9 µs
26.9 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-c]
CodSpeed Performance Gauge
0%
693.8 µs
693.1 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-s]
CodSpeed Performance Gauge
0%
36.6 µs
36.6 µs
test_syev[50-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[50-d]
CodSpeed Performance Gauge
0%
1.4 ms
1.4 ms
test_syrk[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-s]
CodSpeed Performance Gauge
0%
213 µs
212.9 µs
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-s]
CodSpeed Performance Gauge
0%
272.7 µs
272.6 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-c]
CodSpeed Performance Gauge
0%
32.2 µs
32.2 µs
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-d]
CodSpeed Performance Gauge
0%
339.3 µs
339.1 µs
test_dgemv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-d]
CodSpeed Performance Gauge
0%
14.7 ms
14.7 ms
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-z]
CodSpeed Performance Gauge
0%
230.5 µs
230.5 µs
test_syrk[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-z]
CodSpeed Performance Gauge
0%
855.5 µs
855.4 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[1000-d]
CodSpeed Performance Gauge
0%
29.8 µs
29.8 µs
test_gesdd[mn1-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn1-d]
CodSpeed Performance Gauge
0%
93.9 ms
93.9 ms
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-z]
CodSpeed Performance Gauge
0%
1.2 ms
1.2 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-c]
CodSpeed Performance Gauge
0%
659 µs
658.9 µs
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-d]
CodSpeed Performance Gauge
0%
93.3 ms
93.3 ms
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[200-s]
CodSpeed Performance Gauge
0%
49.1 ms
49.1 ms
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-d]
CodSpeed Performance Gauge
0%
470.7 µs
470.7 µs
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-s]
CodSpeed Performance Gauge
0%
117.4 ms
117.4 ms
test_dgemv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-z]
CodSpeed Performance Gauge
0%
26.3 ms
26.3 ms
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-s]
CodSpeed Performance Gauge
0%
74.4 µs
74.4 µs
test_syev[50-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[50-s]
CodSpeed Performance Gauge
0%
1.3 ms
1.3 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn1-s]
CodSpeed Performance Gauge
0%
65.2 ms
65.2 ms
test_gemm[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-z]
CodSpeed Performance Gauge
0%
875.6 ms
875.6 ms
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-z]
CodSpeed Performance Gauge
0%
476.4 ms
476.4 ms
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-d]
CodSpeed Performance Gauge
0%
239.4 ms
239.4 ms
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[100-d]
CodSpeed Performance Gauge
0%
35.4 µs
35.4 µs
test_gesv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-c]
CodSpeed Performance Gauge
0%
188.6 ms
188.6 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-s]
CodSpeed Performance Gauge
0%
65.4 ms
65.4 ms
test_gemm[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-c]
CodSpeed Performance Gauge
0%
426 ms
426 ms
test_syev[200-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[200-d]
CodSpeed Performance Gauge
0%
58.6 ms
58.6 ms
test_syrk[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-c]
CodSpeed Performance Gauge
0%
227.5 ms
227.5 ms
test_gesv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-s]
CodSpeed Performance Gauge
0%
52.6 ms
52.6 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-c]
CodSpeed Performance Gauge
0%
14.8 ms
14.8 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-d]
CodSpeed Performance Gauge
0%
130.3 ms
130.4 ms
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-s]
CodSpeed Performance Gauge
0%
23.4 µs
23.4 µs
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-z]
CodSpeed Performance Gauge
0%
935.1 µs
935.2 µs
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-c]
CodSpeed Performance Gauge
0%
471.8 µs
471.8 µs
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[100-dz]
CodSpeed Performance Gauge
0%
27.9 µs
27.9 µs
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-s]
CodSpeed Performance Gauge
0%
7.7 ms
7.7 ms
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-c]
CodSpeed Performance Gauge
0%
98.7 µs
98.7 µs
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-z]
CodSpeed Performance Gauge
0%
353.2 ms
353.4 ms
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-d]
CodSpeed Performance Gauge
0%
37.3 µs
37.3 µs
test_gesv[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-d]
CodSpeed Performance Gauge
0%
393.8 µs
394 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-c]
CodSpeed Performance Gauge
0%
39.4 µs
39.4 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-z]
CodSpeed Performance Gauge
0%
118.2 µs
118.3 µs
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-d]
CodSpeed Performance Gauge
0%
82.9 µs
83 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py::test_dot[100]
CodSpeed Performance Gauge
0%
21.8 µs
21.9 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn0-s]
CodSpeed Performance Gauge
0%
108.5 µs
108.7 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn0-d]
CodSpeed Performance Gauge
0%
119.9 µs
120.2 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-s]
CodSpeed Performance Gauge
0%
256.8 µs
257.5 µs

Commits

Click on a commit to change the comparison range
Base
develop
3e961c2
0%
Add vector registers to clobber list to prevent compiler optimization. SME based SGEMMDIRECT kernel uses the vector registers (z) and adding clobber list informs compiler not to optimize these registers.
04915be
4 months ago
by vaiskv
© 2025 CodSpeed Technology
Home Terms Privacy Docs