Avatar for the OpenMathLib user
OpenMathLib
OpenBLAS
BlogDocsChangelog

Switch power to use O3 instead of Ofast

#5323Merged
Comparing
imciner2:im/ofast
(
721c806
) with
develop
(
4e6da5e
)
CodSpeed Performance Gauge
0%
Improvements
0
Regressions
0
Untouched
62
New
0
Dropped
0
Ignored
0

Benchmarks

Passed

test_dgemv[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-d]
CodSpeed Performance Gauge
0%
140.9 µs
140.2 µs
test_daxpy[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-s]
CodSpeed Performance Gauge
0%
23.7 µs
23.6 µs
test_dgemv[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-c]
CodSpeed Performance Gauge
0%
149.4 µs
148.7 µs
test_daxpy[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-s]
CodSpeed Performance Gauge
0%
27.1 µs
27 µs
test_dot[100]
benchmark/pybench/benchmarks/bench_blas.py::test_dot[100]
CodSpeed Performance Gauge
0%
22 µs
21.9 µs
test_nrm2[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[100-d]
CodSpeed Performance Gauge
0%
35.5 µs
35.4 µs
test_dgbmv[1-100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-d]
CodSpeed Performance Gauge
0%
37.6 µs
37.5 µs
test_daxpy[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-z]
CodSpeed Performance Gauge
0%
25.4 µs
25.4 µs
test_daxpy[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-d]
CodSpeed Performance Gauge
0%
32 µs
32 µs
test_daxpy[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-c]
CodSpeed Performance Gauge
0%
24.7 µs
24.7 µs
test_dot[1000]
benchmark/pybench/benchmarks/bench_blas.py::test_dot[1000]
CodSpeed Performance Gauge
0%
28 µs
27.9 µs
test_dgbmv[1-1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-c]
CodSpeed Performance Gauge
0%
99.2 µs
99.1 µs
test_dgbmv[1-1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-s]
CodSpeed Performance Gauge
0%
74.8 µs
74.7 µs
test_gesv[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-z]
CodSpeed Performance Gauge
0%
938 µs
937.2 µs
test_gesdd[mn1-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn1-s]
CodSpeed Performance Gauge
0%
65.2 ms
65.2 ms
test_dgbmv[1-1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-z]
CodSpeed Performance Gauge
0%
118.6 µs
118.5 µs
test_dgemv[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-s]
CodSpeed Performance Gauge
0%
103.7 µs
103.7 µs
test_gemm[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-s]
CodSpeed Performance Gauge
0%
273 µs
272.9 µs
test_gemm[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-d]
CodSpeed Performance Gauge
0%
470.9 µs
470.8 µs
test_gesdd[mn0-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn0-d]
CodSpeed Performance Gauge
0%
119.9 µs
119.9 µs
test_gesdd[mn1-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn1-d]
CodSpeed Performance Gauge
0%
93.8 ms
93.8 ms
test_gemm[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-c]
CodSpeed Performance Gauge
0%
659.3 µs
659.2 µs
test_daxpy[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-z]
CodSpeed Performance Gauge
0%
40.1 µs
40.1 µs
test_gesv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-z]
CodSpeed Performance Gauge
0%
353.6 ms
353.6 ms
test_dgbmv[1-100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-s]
CodSpeed Performance Gauge
0%
36.9 µs
36.9 µs
test_dgemv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-d]
CodSpeed Performance Gauge
0%
13.9 ms
13.9 ms
test_gemm[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[100-z]
CodSpeed Performance Gauge
0%
1.2 ms
1.2 ms
test_syrk[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-z]
CodSpeed Performance Gauge
0%
856.1 µs
856.1 µs
test_gemm[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-d]
CodSpeed Performance Gauge
0%
239.4 ms
239.4 ms
test_syrk[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-d]
CodSpeed Performance Gauge
0%
130.4 ms
130.3 ms
test_dgemv[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-z]
CodSpeed Performance Gauge
0%
26.3 ms
26.3 ms
test_syrk[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-s]
CodSpeed Performance Gauge
0%
213.1 µs
213.1 µs
test_syrk[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-c]
CodSpeed Performance Gauge
0%
227.5 ms
227.5 ms
test_gesv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-s]
CodSpeed Performance Gauge
0%
52.6 ms
52.6 ms
test_gemm[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-c]
CodSpeed Performance Gauge
0%
426 ms
426 ms
test_syrk[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-z]
CodSpeed Performance Gauge
0%
476.4 ms
476.4 ms
test_gemm[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-s]
CodSpeed Performance Gauge
0%
117.4 ms
117.4 ms
test_syev[50-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[50-s]
CodSpeed Performance Gauge
0%
1.3 ms
1.3 ms
test_gesv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-c]
CodSpeed Performance Gauge
0%
188.6 ms
188.6 ms
test_syrk[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[1000-s]
CodSpeed Performance Gauge
0%
65.4 ms
65.4 ms
test_syev[200-s]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[200-s]
CodSpeed Performance Gauge
0%
49.1 ms
49.1 ms
test_dgemv[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-c]
CodSpeed Performance Gauge
0%
14.8 ms
14.8 ms
test_dgbmv[1-1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-1000-d]
CodSpeed Performance Gauge
0%
83.3 µs
83.3 µs
test_syev[200-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[200-d]
CodSpeed Performance Gauge
0%
58.6 ms
58.6 ms
test_syev[50-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syev[50-d]
CodSpeed Performance Gauge
0%
1.4 ms
1.4 ms
test_gesdd[mn0-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesdd[mn0-s]
CodSpeed Performance Gauge
0%
108.9 µs
108.9 µs
test_gesv[100-s]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-s]
CodSpeed Performance Gauge
0%
256.9 µs
257 µs
test_gesv[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[1000-d]
CodSpeed Performance Gauge
0%
93.3 ms
93.3 ms
test_dgemv[100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[100-z]
CodSpeed Performance Gauge
0%
230.7 µs
230.8 µs
test_dgemv[1000-s]
benchmark/pybench/benchmarks/bench_blas.py::test_dgemv[1000-s]
CodSpeed Performance Gauge
0%
7 ms
7 ms
test_gemm[1000-z]
benchmark/pybench/benchmarks/bench_blas.py::test_gemm[1000-z]
CodSpeed Performance Gauge
0%
875.2 ms
875.6 ms
test_dgbmv[1-100-z]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-z]
CodSpeed Performance Gauge
0%
41.7 µs
41.8 µs
test_gesv[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-c]
CodSpeed Performance Gauge
0%
695.4 µs
696 µs
test_daxpy[1000-c]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[1000-c]
CodSpeed Performance Gauge
0%
32.2 µs
32.3 µs
test_dgbmv[1-100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_dgbmv[1-100-c]
CodSpeed Performance Gauge
0%
39.8 µs
39.9 µs
test_nrm2[1000-dz]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[1000-dz]
CodSpeed Performance Gauge
0%
35 µs
35.1 µs
test_syrk[100-c]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-c]
CodSpeed Performance Gauge
0%
472.1 µs
473.3 µs
test_gesv[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_gesv[100-d]
CodSpeed Performance Gauge
0%
395.2 µs
396.3 µs
test_daxpy[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_daxpy[100-d]
CodSpeed Performance Gauge
0%
23.7 µs
23.8 µs
test_nrm2[1000-d]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[1000-d]
CodSpeed Performance Gauge
0%
30 µs
30.1 µs
test_nrm2[100-dz]
benchmark/pybench/benchmarks/bench_blas.py::test_nrm2[100-dz]
CodSpeed Performance Gauge
0%
28 µs
28.2 µs
test_syrk[100-d]
benchmark/pybench/benchmarks/bench_blas.py::test_syrk[100-d]
CodSpeed Performance Gauge
0%
339.2 µs
340.8 µs

Commits

Click on a commit to change the comparison range
Base
develop
4e6da5e
0%
Switch power to use O3 instead of Ofast Ofast enables possibly unsafe optimizations in addition to O3. This appears to have been added and then just continually copied into later Power architectures, and it wasn't included in the CMake build system when that was introduced. Replace this with O3 so that the same level of optimization is done by the compiler.
721c806
11 days ago
by imciner2
© 2025 CodSpeed Technology
Home Terms Privacy Docs