OpenMathLib
OpenBLAS
Blog
Docs
Changelog
Blog
Docs
Changelog
Overview
Branches
Benchmarks
Runs
Setting optimized `[SD]GEMM_DEFAULT_[PQR]` parameters for `A64FX`
#5554
Comparing
hideaki-motoki:issue5553_gemm_default_pqr_for_a64fx
(
5f07358
) with
develop
(
d6b25c4
)
CodSpeed Performance Gauge
0%
Untouched
62
Benchmarks
Passed
test_dgemv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
150 µs
149.1 µs
test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
141.1 µs
140.6 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
38 µs
37.9 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
30.4 µs
30.4 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
120.1 µs
120 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
695.7 µs
695.1 µs
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
353.6 ms
353.4 ms
test_syev[50-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.4 ms
1.4 ms
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
273.1 µs
273.1 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
99.5 µs
99.4 µs
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
75 µs
75 µs
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
49.1 ms
49.1 ms
test_gesv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
188.6 ms
188.6 ms
test_gesdd[mn1-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.9 ms
93.8 ms
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.3 ms
93.3 ms
test_dgemv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
13.9 ms
13.9 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
14.8 ms
14.8 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.2 ms
65.2 ms
test_syrk[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
227.5 ms
227.5 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.4 ms
65.4 ms
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.4 ms
117.4 ms
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.1 µs
24.1 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.9 µs
25.9 µs
test_gemm[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
426 ms
426 ms
test_gesv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
52.6 ms
52.6 ms
test_gemm[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
875.6 ms
875.6 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
130.4 ms
130.4 ms
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
476.4 ms
476.4 ms
test_dgemv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26.3 ms
26.3 ms
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
239.4 ms
239.4 ms
test_syev[200-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
58.6 ms
58.6 ms
test_syev[50-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.3 ms
1.3 ms
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
118.9 µs
118.9 µs
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.2 ms
1.2 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
659.5 µs
659.6 µs
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
471.1 µs
471.2 µs
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
7 ms
7 ms
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
938.2 µs
938.5 µs
test_syrk[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
213.5 µs
213.6 µs
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
42.1 µs
42.1 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
231.2 µs
231.3 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
109.1 µs
109.1 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
35.4 µs
35.5 µs
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.7 µs
28.7 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
27.4 µs
27.5 µs
test_gesv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
394.1 µs
394.6 µs
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
83.5 µs
83.6 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.6 µs
40.7 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.4 µs
32.4 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37.2 µs
37.3 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
103.9 µs
104.1 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.7 µs
32.8 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.3 µs
40.3 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.1 µs
25.2 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.5 µs
28.6 µs
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
472.4 µs
473.9 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
256.8 µs
257.6 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.2 µs
24.3 µs
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
339.6 µs
341.2 µs
test_syrk[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
-1%
852.9 µs
857.2 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
-1%
22.5 µs
22.6 µs
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
-2%
37.3 µs
38 µs
Commits
Click on a commit to change the comparison range
Base
develop
d6b25c4
-0.08%
fix param.h: turn [sd]gemm_default_[pqr] parameters for a64fx
5f07358
7 days ago
by hideaki-motoki
© 2025 CodSpeed Technology
Home
Terms
Privacy
Docs