Avatar for the OpenMathLib user
OpenMathLib
OpenBLAS
BlogDocsChangelog

[WIP] Arm®v9-A architecture SME2 SGEMM kernels

#5011
Comparing
AymenQ:armv9-a-sme
(
3d282c9
) with
develop
(
89f02ed
)
CodSpeed Performance Gauge
0%
Untouched
62

Benchmarks

62 total
test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
146.7 µs145.8 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
39.7 µs39.5 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
23.2 µs23.1 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
31.3 µs31.2 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
105 µs104.8 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
34.7 µs34.6 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26.8 µs26.8 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
108.6 µs108.5 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
229.8 µs229.6 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
693 µs692.7 µs
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
7.7 ms7.7 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
658.8 µs658.6 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.7 µs117.7 µs
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
271.5 µs271.5 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
98.4 µs98.4 µs
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
14.8 ms14.8 ms
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
471.4 µs471.4 µs
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
239.4 ms239.4 ms
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
353.4 ms353.4 ms
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
48.5 ms48.5 ms
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.4 ms117.4 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.4 ms65.4 ms
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
476.4 ms476.4 ms
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
338.8 µs338.8 µs
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
130.3 ms130.4 ms

Commits

Click on a commit to change the comparison range
Base
develop
89f02ed
-0.03%
Add Arm®v9-A architecture SME SGEMM kernels
3d282c9
1 year ago
by AymenQ
© 2026 CodSpeed Technology
Home Terms Privacy Docs