Skip to content

Commit

Permalink
optimizer: SROA mutable(immutable(...)) case correctly
Browse files Browse the repository at this point in the history
Our SROA should be able to handle `mutable(immutable(...))` case even
without "proper" alias analysis, since we eliminate `immutable` first
and then process `mutable` within our iterative approach.

It turns out it hasn't worked at all because after the immutable
handling
we keep the reference count _before_ DCE, but didn't update dead
reference
count. This commit fixes it and now we should be able to eliminate more
allocations, e.g.:
```julia
julia> simple_sroa(s) = broadcast(identity, Ref(s))
simple_sroa (generic function with 1 method)

julia> simple_sroa("compile"); @allocated(simple_sroa("julia"))
0
```
  • Loading branch information
aviatesk committed Nov 27, 2021
1 parent 6c4e203 commit b33430e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 12 deletions.
12 changes: 8 additions & 4 deletions base/compiler/ssair/ir.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,9 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
compact.result[old_result_idx][:inst]), (compact.idx, active_bb)
end

function maybe_erase_unused!(extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, callback = x::SSAValue->nothing)
function maybe_erase_unused!(
extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int,
callback = null_dce_callback)
stmt = compact.result[idx][:inst]
stmt === nothing && return false
if compact_exprtype(compact, SSAValue(idx)) === Bottom
Expand Down Expand Up @@ -1404,19 +1406,21 @@ function just_fixup!(compact::IncrementalCompact)
end
end

function simple_dce!(compact::IncrementalCompact)
function simple_dce!(compact::IncrementalCompact, callback = null_dce_callback)
# Perform simple DCE for unused values
extra_worklist = Int[]
for (idx, nused) in Iterators.enumerate(compact.used_ssas)
idx >= compact.result_idx && break
nused == 0 || continue
maybe_erase_unused!(extra_worklist, compact, idx)
maybe_erase_unused!(extra_worklist, compact, idx, callback)
end
while !isempty(extra_worklist)
maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist))
maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist), callback)
end
end

null_dce_callback(x::SSAValue) = return

function non_dce_finish!(compact::IncrementalCompact)
result_idx = compact.result_idx
resize!(compact.result, result_idx - 1)
Expand Down
10 changes: 4 additions & 6 deletions base/compiler/ssair/passes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -763,9 +763,10 @@ function sroa_pass!(ir::IRCode)
# now go through analyzed mutable structs and see which ones we can eliminate
# NOTE copy the use count here, because `simple_dce!` may modify it and we need it
# consistent with the state of the IR here (after tracking `PhiNode` arguments,
# but before the DCE) for our predicate within `sroa_mutables!`
# but before the DCE) for our predicate within `sroa_mutables!`, but we also
# try an extra effort using a callback so that reference counts are updated
used_ssas = copy(compact.used_ssas)
simple_dce!(compact)
simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1)
ir = complete(compact)
sroa_mutables!(ir, defuses, used_ssas)
return ir
Expand Down Expand Up @@ -835,10 +836,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
isempty(du.uses) && continue
push!(du.defs, newidx)
ldu = compute_live_ins(ir.cfg, du)
phiblocks = Int[]
if !isempty(ldu.live_in_bbs)
phiblocks = iterated_dominance_frontier(ir.cfg, ldu, domtree)
end
phiblocks = isempty(ldu.live_in_bbs) ? Int[] : iterated_dominance_frontier(ir.cfg, ldu, domtree)
allblocks = sort(vcat(phiblocks, ldu.def_bbs))
blocks[fidx] = phiblocks, allblocks
if fidx + 1 > length(defexpr.args)
Expand Down
38 changes: 36 additions & 2 deletions test/compiler/irpasses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -199,21 +199,55 @@ let src = code_typed1((Any,Any,Any)) do x, y, z
x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
end
end
# FIXME our analysis isn't yet so powerful at this moment, e.g. it can't handle nested mutable objects

# FIXME our analysis isn't yet so powerful at this moment: may be unable to handle nested objects well
# OK: mutable(immutable(...)) case
let src = code_typed1((Any,Any,Any)) do x, y, z
xyz = MutableXYZ(x, y, z)
t = (xyz,)
v = t[1].x
v, v, v
end
@test !any(isnew, src.code)
end
let src = code_typed1((Any,Any,Any)) do x, y, z
xyz = MutableXYZ(x, y, z)
outer = ImmutableOuter(xyz, xyz, xyz)
outer.x.x, outer.y.y, outer.z.z
end
@test_broken !any(isnew, src.code)
@test !any(isnew, src.code)
@test any(src.code) do @nospecialize x
iscall((src, tuple), x) &&
x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
end
end
let # this is a simple end to end test case, which demonstrates allocation elimination
# by handling `mutable[RefValue{String}](immutable[Tuple](...))` case correctly
# NOTE this test case isn't so robust and might be subject to future changes of the broadcasting implementation,
# in that case you don't really need to stick to keeping this test case around
simple_sroa(s) = broadcast(identity, Ref(s))
s = Base.inferencebarrier("julia")::String
simple_sroa(s)
# NOTE don't hard-code `"julia"` in `@allocated` clause and make sure to execute the
# compiled code for `simple_sroa`, otherwise everything can be folded even without SROA
@test @allocated(simple_sroa(s)) == 0
end
# FIXME: immutable(mutable(...)) case
let src = code_typed1((Any,Any,Any)) do x, y, z
xyz = ImmutableXYZ(x, y, z)
outer = MutableOuter(xyz, xyz, xyz)
outer.x.x, outer.y.y, outer.z.z
end
@test_broken !any(isnew, src.code)
end
# FIXME: mutable(mutable(...)) case
let src = code_typed1((Any,Any,Any)) do x, y, z
xyz = MutableXYZ(x, y, z)
outer = MutableOuter(xyz, xyz, xyz)
outer.x.x, outer.y.y, outer.z.z
end
@test_broken !any(isnew, src.code)
end

# should work nicely with inlining to optimize away a complicated case
# adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B
Expand Down

0 comments on commit b33430e

Please sign in to comment.