Latest Results
Implement `init_aggr` in IR and optimize aggregates initialization (#7530)
## Description
This PR adds `init_aggr` instruction to IR and in general optimizes
patterns of aggregate initialization.
Additionally, the PR extends `dce` pass to eliminate dead
`InstOp::MemClearVal` instructions.
`init_aggr` instruction captures the higher level notion of aggregate
initialization, as explained in #7489. It enables analyzing
initialization patterns in IR and lower them to most optimal sequence of
lower level instructions like `store`, `mem_clear_val`, etc.
Before compiling initializations to `init_aggr` instructions, the IR
generation will first check if some other optimizations can be done. The
implementation details of those checks vary between initialization of
arrays and product types (structs and tuples) but can be summarized as
following:
- If the resulting aggregate can be initialized by using a single
`store` always initialize it that way. E.g.: `Struct { x: 0 }` or
`[(Struct { x: 0 }, ); 1]`.
- Else, if the resulting aggregate is zeroed, initialized it with a
single `mem_clear_val`. E.g.: `Struct { x: 0, y: b256::zero() }` or
`[b256::zero(); 10]`.
- Else, if the resulting aggregate is a constant, **consider**
`mem_copy_val` it from the data section. E.g.: `(SOME_CONST,
SOME_OTHER_CONST)`.
- Else, compile to `init_aggr`.
A new `lower-init-aggr` pass is added to `PassManager` as the first step
before any other optimizations.
The default lowering searches for so called root `init_aggr`
instructions and lowers them to various patterns, like, e.g., `store`
instructions or loops for large arrays of repeated elements.
E.g., the following initialization of nested aggregates:
```
let x = (111u64, Struct { a: 222, b: [333; 32] }, [444u64; 555, 666]);
```
will initially be compiled to `init_aggr` instructions for each
aggregate, with the `v110v1 = init_aggr v106v1 [v107v1, v108v1, v109v1]`
being the root (outermost) initialization:
```
local [u64; 32] __array_init_0
local [u64; 3] __array_init_1
local { u64, [u64; 32] } __struct_init_0
local { u64, { u64, [u64; 32] }, [u64; 3] } __tuple_init_0
local { u64, { u64, [u64; 32] }, [u64; 3] } x
entry():
v96v1 = get_local __ptr { u64, { u64, [u64; 32] }, [u64; 3] }, __tuple_init_0
v98v1 = get_local __ptr { u64, [u64; 32] }, __struct_init_0
v100v1 = get_local __ptr [u64; 32], __array_init_0
v101v1 = const u64 333
v102v1 = init_aggr v100v1 [v101v1 x 32] <<<<---- `[333; 32]`.
v103v1 = load v102v1 ^------------
v99v1 = const u64 222 |
v104v1 = init_aggr v98v1 [v99v1, v103v1] <<<<---- `{ a: 222, b: * }`.
v105v1 = load v104v1
v106v1 = get_local __ptr [u64; 3], __array_init_1
v107v1 = const u64 444
v108v1 = const u64 555
v109v1 = const u64 666
v110v1 = init_aggr v106v1 [v107v1, v108v1, v109v1] <<<<---- `[444, 555, 666]`.
v111v1 = load v110v1
v97v1 = const u64 111, !30
v112v1 = init_aggr v96v1 [v97v1, v105v1, v111v1], !22 <<<<---- The root `init_aggr`.
```
After lowering, the resulting IR will look like:
```
//** Only a single aggregate is needed.
local { u64, { u64, [u64; 32] }, [u64; 3] } __tuple_init_0
entry():
v96v1 = get_local __ptr { u64, { u64, [u64; 32] }, [u64; 3] }, __tuple_init_0, !21
v151v1 = const u64 1
v152v1 = const u64 1
v153v1 = get_elem_ptr v96v1, __ptr [u64; 32], v151v1, v152v1, !22
v155v1 = const u64 0
br array_init_loop(v155v1)
//** `[333; 32]` gets lowered to a loop, but the loop writes to the root aggregate directly.
array_init_loop(v154v1: u64):
v157v1 = get_elem_ptr v153v1, __ptr u64, v154v1
v101v1 = const u64 333
store v101v1 to v157v1, !22
v159v1 = const u64 1
v160v1 = add v154v1, v159v1
v161v1 = const u64 32
v162v1 = cmp lt v160v1 v161v1
cbr v162v1, array_init_loop(v160v1), array_init_loop_exit()
array_init_loop_exit():
//** Values of all subaggregates are written to the root aggregate directly.
v144v1 = const u64 1
v145v1 = const u64 0
v146v1 = get_elem_ptr v96v1, __ptr u64, v144v1, v145v1, !23
v99v1 = const u64 222, !24
store v99v1 to v146v1, !23
v166v1 = const u64 2
v167v1 = const u64 0
v168v1 = get_elem_ptr v96v1, __ptr u64, v166v1, v167v1, !25
v107v1 = const u64 444, !26
store v107v1 to v168v1, !25
v170v1 = const u64 2
v171v1 = const u64 1
v172v1 = get_elem_ptr v96v1, __ptr u64, v170v1, v171v1, !25
v108v1 = const u64 555, !27
store v108v1 to v172v1, !25
v174v1 = const u64 2
v175v1 = const u64 2
v176v1 = get_elem_ptr v96v1, __ptr u64, v174v1, v175v1, !25
v109v1 = const u64 666, !28
store v109v1 to v176v1, !25
v139v1 = const u64 0
v140v1 = get_elem_ptr v96v1, __ptr u64, v139v1, !21
v97v1 = const u64 111, !29
store v97v1 to v140v1, !21
```
Previously, aggregates were (mostly) initialized to separate temporaries
and `mem_copy_val`ed to the root aggregate:
```
//** Separate temporary needed for subaggregates.
local [u64; 32] __anon_0
local { u64, [u64; 32] } __anon_1
local { u64, { u64, [u64; 32] }, [u64; 3] } __anon_3
entry():
v98v1 = get_local __ptr [u64; 32], __anon_0
v100v1 = const u64 0
br array_init_loop(v100v1)
array_init_loop(v102v1: u64):
v103v1 = get_elem_ptr v98v1, __ptr u64, v102v1
v99v1 = const u64 333
store v99v1 to v103v1
v105v1 = const u64 1
v106v1 = add v102v1, v105v1
v107v1 = const u64 32
v108v1 = cmp lt v106v1 v107v1
cbr v108v1, array_init_loop(v106v1), array_init_exit()
array_init_exit():
v111v1 = get_local __ptr { u64, [u64; 32] }, __anon_1
v112v1 = const u64 0
v113v1 = get_elem_ptr v111v1, __ptr u64, v112v1
v97v1 = const u64 222
store v97v1 to v113v1
v115v1 = const u64 1
v116v1 = get_elem_ptr v111v1, __ptr [u64; 32], v115v1
//** Initialized temporaries copied to the root aggregate.
mem_copy_val v116v1, v98v1
v133v1 = get_local __ptr { u64, { u64, [u64; 32] }, [u64; 3] }, __anon_3
v134v1 = const u64 0
v135v1 = get_elem_ptr v133v1, __ptr u64, v134v1
v96v1 = const u64 111
store v96v1 to v135v1
v137v1 = const u64 1
v138v1 = get_elem_ptr v133v1, __ptr { u64, [u64; 32] }, v137v1
//** Initialized temporaries copied to the root aggregate.
mem_copy_val v138v1, v111v1
v140v1 = const u64 2
v141v1 = get_elem_ptr v133v1, __ptr [u64; 3], v140v1
v187v1 = const u64 0
v188v1 = get_elem_ptr v141v1, __ptr u64, v187v1
v120v1 = const u64 444
store v120v1 to v188v1
v190v1 = const u64 1
v191v1 = get_elem_ptr v141v1, __ptr u64, v190v1
v124v1 = const u64 555
store v124v1 to v191v1
v193v1 = const u64 2
v194v1 = get_elem_ptr v141v1, __ptr u64, v193v1
v128v1 = const u64 666
store v128v1 to v194v1
```
This PR implements only the default lowering, which already brings
benefits described above. Having `init_aggr` instruction potentially
enables additional optimizations. Those optimizations are explained in
code and marked as `TODO: (INIT-AGGR)` and will be done in follow up
PRs.
- Closes #7489
## Checklist
- [x] I have linked to any relevant issues.
- [x] I have commented my code, particularly in hard-to-understand
areas.
- [ ] I have updated the documentation where relevant (API docs, the
reference, and the Sway book).
- [ ] If my change requires substantial documentation changes, I have
[requested support from the DevRel
team](https://github.com/FuelLabs/devrel-requests/issues/new/choose)
- [x] I have added tests that prove my fix is effective or that my
feature works.
- [ ] I have added (or requested a maintainer to add) the necessary
`Breaking*` or `New Feature` labels where relevant.
- [x] I have done my best to ensure that my PR adheres to [the Fuel Labs
Code Review
Standards](https://github.com/FuelLabs/rfcs/blob/master/text/code-standards/external-contributors.md).
- [x] I have requested a review from the relevant team or maintainers. Active Branches
#75350%
#75360%
#75170%
© 2026 CodSpeed Technology