OpenMathLib
OpenBLAS
BlogDocsChangelog

Support for SGEMM_DIRECT Kernel based on SME1

#5084Merged
Comparing
quic:topic/sgemm_direct_sme1
(
f66ca05
) with
develop
(
a64b75a
)
CodSpeed Performance Gauge
0%
Untouched
62

Benchmarks

Passed

test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
24.4 µs24.2 µs
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
340.6 µs339 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
39.2 µs39.1 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37 µs36.8 µs
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
23.2 µs23.1 µs
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
74 µs73.7 µs
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
41.1 µs41 µs
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
472.8 µs471.5 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
36.2 µs36.2 µs
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
35.3 µs35.3 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.4 µs117.2 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.1 µs25.1 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.9 µs117.8 µs
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
82.6 µs82.6 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
31.9 µs31.9 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
693.7 µs693.1 µs
test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
146.5 µs146.4 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
34.7 µs34.7 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
98.4 µs98.3 µs
test_dgemv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
149.3 µs149.2 µs
test_gesv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
395.2 µs395 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
230.4 µs230.3 µs
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
470.6 µs470.6 µs
test_syev[50-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.4 ms1.4 ms
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.3 ms93.3 ms
test_syev[50-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.3 ms1.3 ms
test_dgemv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
14.7 ms14.7 ms
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.2 ms1.2 ms
test_gesv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
52.6 ms52.6 ms
test_gesv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
188.6 ms188.6 ms
test_syrk[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
227.5 ms227.5 ms
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
239.4 ms239.4 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.4 ms65.4 ms
test_gemm[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
426 ms426 ms
test_gesdd[mn1-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.7 ms93.7 ms
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
353.4 ms353.4 ms
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
27.5 µs27.5 µs
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
476.4 ms476.4 ms
test_gemm[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
875.6 ms875.6 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.1 ms65.1 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
130.3 ms130.4 ms
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.4 ms117.4 ms
test_dgemv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26.3 ms26.3 ms
test_syev[200-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
58.5 ms58.5 ms
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
39.8 µs39.8 µs
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
49.1 ms49.1 ms
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
7.7 ms7.7 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
14.8 ms14.8 ms
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
935.2 µs935.3 µs
test_syrk[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
855.2 µs855.3 µs
test_syrk[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
212.7 µs212.7 µs
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
658.8 µs659 µs
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
272.3 µs272.4 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
109.9 µs110 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
29.6 µs29.7 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
23.3 µs23.4 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
31.4 µs31.5 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
27.6 µs27.7 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
256.6 µs257.4 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26.6 µs26.6 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
107.8 µs108.3 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
21.5 µs21.6 µs

Commits

Click on a commit to change the comparison range
Base
develop
a64b75a
+0.04%
Merge branch 'develop' into topic/sgemm_direct_sme1
f66ca05
11 months ago
by vaiskv
© 2026 CodSpeed Technology
Home Terms Privacy Docs