OpenMathLib
OpenBLAS
BlogDocsChangelog

Split VORTEXM4 from VORTEX target and fix SGEMM_DIRECT support for SME-capable targets

#5423
Comparing
martin-frbg:issue5414
(
e85efb8
) with
develop
(
68ff451
)
CodSpeed Performance Gauge
0%
Untouched
62

Benchmarks

Passed

test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
25.9 µs25.7 µs
test_dgemv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
150 µs149.2 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
35.4 µs35.2 µs
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
+1%
28.7 µs28.5 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.6 µs40.4 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
25.1 µs25 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
103.9 µs103.4 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
28.5 µs28.4 µs
test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
141.1 µs140.6 µs
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.1 µs24 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
27.4 µs27.4 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
30.4 µs30.3 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
38 µs37.9 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.7 µs32.6 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
24.2 µs24.1 µs
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37.3 µs37.2 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
32.4 µs32.3 µs
test_gesv[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
394.1 µs393.7 µs
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
109.1 µs109 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
120.2 µs120.1 µs
test_syrk[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
213.5 µs213.4 µs
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
75 µs75 µs
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
353.6 ms353.4 ms
test_syev[50-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.4 ms1.4 ms
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
938.2 µs938.1 µs
test_syrk[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
852.9 µs852.9 µs
test_dgemv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
13.9 ms13.9 ms
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
49.1 ms49.1 ms
test_gesdd[mn1-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.9 ms93.8 ms
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
695.7 µs695.6 µs
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.2 ms1.2 ms
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
93.3 ms93.3 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
659.5 µs659.4 µs
test_syrk[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
227.5 ms227.5 ms
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
472.4 µs472.3 µs
test_syev[200-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
58.6 ms58.6 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.4 ms65.4 ms
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
117.4 ms117.4 ms
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
65.2 ms65.2 ms
test_gemm[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
426 ms426 ms
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
22.5 µs22.5 µs
test_gemm[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
875.6 ms875.6 ms
test_dgemv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
26.3 ms26.3 ms
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
476.4 ms476.4 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
130.4 ms130.4 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
14.8 ms14.8 ms
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
239.4 ms239.4 ms
test_gesv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
52.6 ms52.6 ms
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
273.1 µs273.1 µs
test_syev[50-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
1.3 ms1.3 ms
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
7 ms7 ms
test_gesv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
188.6 ms188.6 ms
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
471 µs471.1 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
256.8 µs256.9 µs
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
339.6 µs339.7 µs
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
118.9 µs119 µs
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
231.2 µs231.3 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
40.3 µs40.3 µs
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
37.2 µs37.3 µs
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
83.5 µs83.7 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
99.5 µs99.6 µs
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py
CodSpeed Performance Gauge
0%
42.1 µs42.2 µs

Commits

Click on a commit to change the comparison range
Base
develop
68ff451
-0.15%
Add VORTEXM4
18f9582
4 months ago
by martin-frbg
0%
Add VORTEXM4
ca542f3
4 months ago
by martin-frbg
0%
Add VORTEXM4 to DYNAMIC_ARCH list
bf98e44
4 months ago
by martin-frbg
0%
Relax version number requirement for AppleClang
4609732
4 months ago
by martin-frbg
0%
Update SME-related kernels
107c883
4 months ago
by martin-frbg
+0.07%
Hide the local 2VLx2VL symbol as static is insufficient for this with gcc
edaa73f
4 months ago
by martin-frbg
-0.1%
Add VORTEXM4
1ee8879
4 months ago
by martin-frbg
+0.13%
Add registers d8 to d15 to clobber lists as the code does not expressly save them
b4fc09e
3 months ago
by martin-frbg
+0.03%
remove debugging printout
2b5d8c7
3 months ago
by martin-frbg
+0.02%
Merge branch 'develop' into issue5414
fc516af
2 months ago
by martin-frbg
-0.01%
Update Makefile.L3
b3d0bc4
2 months ago
by martin-frbg
+0.01%
Rework for DYNAMIC_ARCH use and use of SGEMM functions by SSYMM
c889558
2 months ago
by martin-frbg
+0.07%
Update limits based on benchmarking the SME code on Apple M4
47a66ae
2 months ago
by martin-frbg
-0.03%
Merge branch 'OpenMathLib:develop' into issue5414
9bfc361
2 months ago
by martin-frbg
-0.14%
Add prototype for gotoblas_corename
682f61e
2 months ago
by martin-frbg
+0.18%
Merge branch 'OpenMathLib:develop' into issue5414
ea85b66
12 days ago
by martin-frbg
0%
Merge branch 'OpenMathLib:develop' into issue5414
9c0965b
11 days ago
by martin-frbg
0%
Merge branch 'OpenMathLib:develop' into issue5414
8c0b13c
11 days ago
by martin-frbg
-0.06%
Add cpuid for Apple M5 (from a PR to the archspec project)
7d35bf6
11 days ago
by martin-frbg
+0.05%
fix sequence
c3c857c
10 days ago
by martin-frbg
0%
AppleClang does not define feature local_streaming
825d3ad
6 days ago
by martin-frbg
+0.01%
remove za from clobber lists
e85efb8
1 day ago
by martin-frbg
© 2025 CodSpeed Technology
Home Terms Privacy Docs