BlogDocs

Tune generic SVE parameters closer to other SVE cores and add tunings to baseline AArch64(#4833)

Merging
Mousius:improve-sve-constants
(
5f8744d
) into
develop
(
9afd0c8
)
0%
IMPROVEMENTS
0
REGRESSIONS
0
UNTOUCHED
62
NEW
0
DROPPED
0
IGNORED
0

Benchmarks

Passed

test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-d]
0%
340.6 µs
339.1 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-s]
0%
27.2 µs
27.1 µs
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-z]
0%
41.2 µs
41 µs
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-c]
0%
472.8 µs
471.5 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-c]
0%
32.2 µs
32.1 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-c]
0%
39.2 µs
39.1 µs
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-d]
0%
82.6 µs
82.5 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-d]
0%
36.8 µs
36.7 µs
test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-d]
0%
146 µs
145.7 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-s]
0%
36.3 µs
36.2 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn0-d]
0%
117.9 µs
117.7 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-s]
0%
109 µs
108.9 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[1000-dz]
0%
35 µs
35 µs
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[100-d]
0%
33.7 µs
33.7 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-z]
0%
117.8 µs
117.7 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-c]
0%
694.2 µs
693.9 µs
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-s]
0%
73.9 µs
73.8 µs
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-z]
0%
936.1 µs
935.8 µs
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-d]
0%
93.3 ms
93.3 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-c]
0%
658.8 µs
658.7 µs
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[200-s]
0%
48.5 ms
48.5 ms
test_gesv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-c]
0%
188.6 ms
188.6 ms
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-s]
0%
23.7 µs
23.7 µs
test_gesv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-s]
0%
52.6 ms
52.6 ms
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-s]
0%
7.7 ms
7.7 ms
test_gemm[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-c]
0%
426 ms
426 ms
test_syrk[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-c]
0%
227.5 ms
227.5 ms
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-s]
0%
117.4 ms
117.4 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-s]
0%
65.4 ms
65.4 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-d]
0%
130.4 ms
130.4 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-c]
0%
14.8 ms
14.8 ms
test_syev[50-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[50-d]
0%
1.4 ms
1.4 ms
test_syev[200-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[200-d]
0%
58.4 ms
58.4 ms
test_gesdd[mn1-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn1-d]
0%
92.7 ms
92.7 ms
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-z]
0%
1.2 ms
1.2 ms
test_dgemv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-z]
0%
26.3 ms
26.3 ms
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-z]
0%
476.4 ms
476.4 ms
test_gemm[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-z]
0%
875.6 ms
875.6 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn1-s]
0%
63.8 ms
63.8 ms
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-d]
0%
239.4 ms
239.4 ms
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-z]
0%
353.4 ms
353.4 ms
test_syrk[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-z]
0%
855.1 µs
855.1 µs
test_syev[50-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[50-s]
0%
1.3 ms
1.3 ms
test_dgemv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-d]
0%
14.7 ms
14.7 ms
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-z]
0%
25.2 µs
25.2 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-z]
0%
40 µs
40 µs
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-d]
0%
470.3 µs
470.4 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-c]
0%
98.5 µs
98.5 µs
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-s]
0%
271.6 µs
271.7 µs
test_syrk[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-s]
0%
212.8 µs
212.9 µs
test_gesv[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-d]
0%
394.5 µs
394.7 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn0-s]
0%
107.1 µs
107.2 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py::test_dot[1000]
0%
28.5 µs
28.5 µs
test_dgemv[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-c]
0%
148.5 µs
148.7 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-c]
0%
24.5 µs
24.5 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-d]
0%
23.6 µs
23.7 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-s]
0%
257.4 µs
258 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[1000-d]
0%
30 µs
30.1 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-z]
0%
229.5 µs
230.2 µs
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[100-dz]
0%
26.9 µs
27 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-d]
0%
31.7 µs
31.8 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py::test_dot[100]
-1%
23.2 µs
23.3 µs

Commits

Click on a commit to change the comparison range
base
develop
9afd0c8
0%
Add tunings for baseline AArch64 Previously this was left without `SWITCH_RATIO` or `GEMM_PREFERED_SIZE` and older default values, but it can be seen across other cores that these values seem to work for many devices.
5f8744d
2 months ago
by Mousius
ResourcesHomePricingDocsBlogGitHubChangelog
Copyright © 2024 CodSpeed Technology SAS. All rights reserved.