Avatar for the OpenMathLib user
OpenMathLib
OpenBLAS
BlogDocsChangelog

provide a NEON version of arm/sgemm

#5800Merged
Comparing
notaz:armv7_sgemm
(
fc9d7c7
) with
develop
(
45e5426
)
CodSpeed Performance Gauge
0%
Untouched
62

Benchmarks

62 total
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
32.9 µs32.6 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.5 µs24.3 µs
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
341.5 µs339.9 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26.2 µs26 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.4 µs25.3 µs
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
474.1 µs472.7 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.9 µs40.8 µs
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.3 µs24.3 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
258 µs257.3 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.6 µs28.5 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.6 µs40.5 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
27.8 µs27.7 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.9 µs32.9 µs
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
83.9 µs83.7 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
111.6 µs111.4 µs
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
938.7 µs937.7 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37.4 µs37.4 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
104.3 µs104.2 µs
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
75.2 µs75.2 µs
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
273.7 µs273.5 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
35.6 µs35.6 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
231.5 µs231.4 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
122.6 µs122.6 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
119.1 µs119.1 µs
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
471.7 µs471.5 µs

Commits

Click on a commit to change the comparison range
Base
develop
45e5426
+0.05%
rename arm32 sgemm_kernel to indicate neon support
fc9d7c7
9 days ago
by notaz
© 2026 CodSpeed Technology
Home Terms Privacy Docs