Avatar for the OpenMathLib user
OpenMathLib
OpenBLAS
BlogDocsChangelog

Provide optimized ?SUM kernels for NeoverseN1 and related

#5621Merged
Comparing
martin-frbg:woa_sum
(
861b3db
) with
develop
(
413e609
)
CodSpeed Performance Gauge
0%
Untouched
62

Benchmarks

62 total
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
32.5 µs32.3 µs
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
24.1 µs24 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
26 µs25.8 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.2 µs25.1 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.8 µs32.6 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
22.5 µs22.4 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.2 µs24.1 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.7 µs40.6 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
109.3 µs109.1 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
119 µs118.8 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
231.3 µs231 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.5 µs28.5 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
695.5 µs694.8 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
38 µs38 µs
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
7 ms7 ms
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
273.2 µs273.1 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
104.1 µs104 µs
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
471.3 µs471.2 µs
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.3 ms93.3 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
14.8 ms14.8 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.2 ms65.2 ms
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
353.6 ms353.6 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
659.6 µs659.6 µs
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
49.1 ms49.1 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
130.4 ms130.3 ms

Commits

Click on a commit to change the comparison range
Base
develop
413e609
-0.02%
Reuse ?SUM kernels from ThunderX2T99
861b3db
2 months ago
by martin-frbg
© 2026 CodSpeed Technology
Home Terms Privacy Docs