Skip to content

Commit

Permalink
Allow taking Matrix slices without an extra allocation (#56236)
Browse files Browse the repository at this point in the history
Since changing Array to use Memory as the backing, we had the option of
making non-Vector arrays more flexible, but had instead preserved the
restriction that they must be zero offset and equal in length to the
Memory. This results in extra complexity, restrictions, and allocations
however, but doesn't gain many known benefits. Nanosoldier shows a
decrease in performance on linear eachindex loops, which we theorize is
due to a minor failure to CSE before SCEV or a lack of NUW/NSW on the
length multiplication calculation.
  • Loading branch information
vtjnash authored Nov 6, 2024
1 parent ded3e3d commit bce3d4d
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 93 deletions.
8 changes: 4 additions & 4 deletions base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -354,14 +354,14 @@ include("set.jl")
include("char.jl")
function array_new_memory(mem::Memory{UInt8}, newlen::Int)
# add an optimization to array_new_memory for StringVector
if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) isa String
if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) === mem
# TODO: when implemented, this should use a memory growing call
return typeof(mem)(undef, newlen)
else
# If data is in a String, keep it that way.
# When implemented, this could use jl_gc_expand_string(oldstr, newlen) as an optimization
str = _string_n(newlen)
return (@assume_effects :total !:consistent @ccall jl_string_to_genericmemory(str::Any,)::Memory{UInt8})
else
# TODO: when implemented, this should use a memory growing call
return typeof(mem)(undef, newlen)
end
end
include("strings/basic.jl")
Expand Down
2 changes: 1 addition & 1 deletion base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1584,7 +1584,7 @@ their component parts. A typical definition for an array that wraps a parent is
`Base.dataids(C::CustomArray) = dataids(C.parent)`.
"""
dataids(A::AbstractArray) = (UInt(objectid(A)),)
dataids(A::Memory) = (B = ccall(:jl_genericmemory_owner, Any, (Any,), A); (UInt(pointer(B isa typeof(A) ? B : A)),))
dataids(A::Memory) = (UInt(A.ptr),)
dataids(A::Array) = dataids(A.ref.mem)
dataids(::AbstractRange) = ()
dataids(x) = ()
Expand Down
4 changes: 0 additions & 4 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3141,10 +3141,6 @@ function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N}
mem_len = length(mem) + 1 - memoryrefoffset(ref)
len = Core.checked_dims(dims...)
@boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
if N != 1 && !(ref === GenericMemoryRef(mem) && len === mem_len)
mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
ref = memoryref(mem)
end
return ref
end

Expand Down
3 changes: 2 additions & 1 deletion base/essentials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ const Bottom = Union{}
# Define minimal array interface here to help code used in macros:
length(a::Array{T, 0}) where {T} = 1
length(a::Array{T, 1}) where {T} = getfield(a, :size)[1]
length(a::Array) = getfield(getfield(getfield(a, :ref), :mem), :length)
length(a::Array{T, 2}) where {T} = (sz = getfield(a, :size); sz[1] * sz[2])
# other sizes are handled by generic prod definition for AbstractArray
length(a::GenericMemory) = getfield(a, :length)
throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))

Expand Down
32 changes: 17 additions & 15 deletions base/reshapedarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,33 @@ end
length(R::ReshapedArrayIterator) = length(R.iter)
eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedIndex{eltype(I)} : Any

## reshape(::Array, ::Dims) returns an Array, except for isbitsunion eltypes (issue #28611)
@noinline throw_dmrsa(dims, len) =
throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len"))

## reshape(::Array, ::Dims) returns a new Array (to avoid conditionally aliasing the structure, only the data)
# reshaping to same # of dimensions
@eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
throw_dmrsa(dims, len) =
throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len"))
len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
if len != length(a)
throw_dmrsa(dims, length(a))
end
isbitsunion(T) && return ReshapedArray(a, dims, ())
if N == M && dims == size(a)
return a
end
ref = a.ref
if M == 1 && N !== 1
mem = ref.mem::Memory{T}
if !(ref === memoryref(mem) && len === mem.length)
mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
ref = memoryref(mem)::typeof(ref)
end
end
# or we could use `a = Array{T,N}(undef, ntuple(0, Val(N))); a.ref = ref; a.size = dims; return a` here
# or we could use `a = Array{T,N}(undef, ntuple(i->0, Val(N))); a.ref = ref; a.size = dims; return a` here to avoid the eval
return $(Expr(:new, :(Array{T,N}), :ref, :dims))
end

## reshape!(::Array, ::Dims) returns the original array, but must have the same dimensions and length as the original
# see also resize! for a similar operation that can change the length
function reshape!(a::Array{T,N}, dims::NTuple{N,Int}) where {T,N}
len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
if len != length(a)
throw_dmrsa(dims, length(a))
end
setfield!(a, :dims, dims)
return a
end



"""
reshape(A, dims...) -> AbstractArray
Expand Down
15 changes: 1 addition & 14 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1435,18 +1435,6 @@ static const auto jl_allocgenericmemory = new JuliaFunction<TypeFnContextAndSize
AttributeSet::get(C, RetAttrs),
None); },
};
static const auto jlarray_data_owner_func = new JuliaFunction<>{
XSTR(jl_array_data_owner),
[](LLVMContext &C) {
auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
return FunctionType::get(T_prjlvalue,
{T_prjlvalue}, false);
},
[](LLVMContext &C) { return AttributeList::get(C,
Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind}),
Attributes(C, {Attribute::NonNull}),
None); },
};
#define BOX_FUNC(ct,at,attrs,nbytes) \
static const auto box_##ct##_func = new JuliaFunction<>{ \
XSTR(jl_box_##ct), \
Expand Down Expand Up @@ -4341,8 +4329,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
}
Value *data_owner = NULL; // owner object against which the write barrier must check
if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
Value *V = emit_memoryref_FCA(ctx, ref, layout);
data_owner = emit_genericmemoryowner(ctx, CreateSimplifiedExtractValue(ctx, V, 1));
data_owner = emit_memoryref_mem(ctx, ref, layout);
}
*ret = typed_store(ctx,
ptr,
Expand Down
35 changes: 1 addition & 34 deletions src/genericmemory.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
if (how != 0) {
jl_value_t *o = jl_genericmemory_data_owner_field(m);
jl_genericmemory_data_owner_field(m) = NULL;
if (how == 3 && jl_is_string(o) &&
if (how == 3 && // implies jl_is_string(o)
((mlength + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS))) {
if (jl_string_data(o)[len] != '\0')
jl_string_data(o)[len] = '\0';
Expand All @@ -221,39 +221,6 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n)
return jl_alloc_genericmemory(jl_memory_any_type, n);
}

JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_slice(jl_genericmemory_t *mem, void *data, size_t len)
{
// Given a GenericMemoryRef represented as `jl_genericmemory_ref ref = {data, mem}`,
// return a new GenericMemory that only accesses the slice from the given GenericMemoryRef to
// the given length if this is possible to return. This allows us to make
// `length(Array)==length(Array.ref.mem)`, for simplification of this.
jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(mem);
const jl_datatype_layout_t *layout = dt->layout;
// repeated checks here ensure the values cannot overflow, since we know mem->length is a reasonable value
if (len > mem->length)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
if (layout->flags.arrayelem_isunion) {
if (!((size_t)data == 0 && mem->length == len))
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // only exact slices are supported
data = mem->ptr;
}
else if (layout->size == 0) {
if ((size_t)data > mem->length || (size_t)data + len > mem->length)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
data = mem->ptr;
}
else {
if (data < mem->ptr || (char*)data > (char*)mem->ptr + mem->length * layout->size || (char*)data + len * layout->size > (char*)mem->ptr + mem->length * layout->size)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
}
jl_task_t *ct = jl_current_task;
jl_genericmemory_t *newmem = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemory_t) + sizeof(void*), dt);
newmem->length = len;
newmem->ptr = data;
jl_genericmemory_data_owner_field(newmem) = jl_genericmemory_owner(mem);
return newmem;
}

JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
jl_genericmemory_t *src, char* srcdata,
size_t n) JL_NOTSAFEPOINT
Expand Down
6 changes: 1 addition & 5 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,7 @@ STATIC_INLINE jl_value_t *jl_svecset(
0 = data is inlined
1 = owns the gc-managed data, exclusively (will free it)
2 = malloc-allocated pointer (does not own it)
3 = has a pointer to the object that owns the data pointer
3 = has a pointer to the String object that owns the data pointer (m must be isbits)
*/
STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
{
Expand All @@ -1249,8 +1249,6 @@ STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT

STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
{
if (jl_genericmemory_how(m) == 3)
return jl_genericmemory_data_owner_field(m);
return (jl_value_t*)m;
}

Expand Down Expand Up @@ -1280,8 +1278,6 @@ STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
assert(i < m_->length);
jl_atomic_store_release(((_Atomic(jl_value_t*)*)(m_->ptr)) + i, (jl_value_t*)x);
if (x) {
if (jl_genericmemory_how(m_) == 3)
m = (void*)jl_genericmemory_data_owner_field(m_);
jl_gc_wb(m, x);
}
return (jl_value_t*)x;
Expand Down
24 changes: 9 additions & 15 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
const char *data = (const char*)m->ptr;
if (jl_genericmemory_how(m) == 3) {
jl_queue_for_serialization_(s, jl_genericmemory_data_owner_field(v), 1, immediate);
assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
}
else if (layout->flags.arrayelem_isboxed) {
size_t i, l = m->length;
Expand Down Expand Up @@ -1472,17 +1472,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
const jl_datatype_layout_t *layout = t->layout;
size_t len = m->length;
if (jl_genericmemory_how(m) == 3 && jl_is_genericmemory(jl_genericmemory_data_owner_field(m))) {
jl_genericmemory_t *owner = (jl_genericmemory_t*)jl_genericmemory_data_owner_field(m);
size_t data = ((char*)m->ptr - (char*)owner->ptr); // relocation offset (bytes)
write_uint(f, len);
write_uint(f, data);
write_pointerfield(s, (jl_value_t*)owner);
// similar to record_memoryref, but the field is always an (offset) pointer
arraylist_push(&s->memowner_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
arraylist_push(&s->memowner_list, NULL); // relocation target (ignored)
}
// else if (jl_genericmemory_how(m) == 3) {
// if (jl_genericmemory_how(m) == 3) {
// jl_value_t *owner = jl_genericmemory_data_owner_field(m);
// write_uint(f, len);
// write_pointerfield(s, owner);
Expand All @@ -1491,7 +1481,8 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
// assert(new_mem->ptr == NULL);
// new_mem->ptr = (void*)((char*)m->ptr - (char*)owner); // relocation offset
// }
else {
// else
{
size_t datasize = len * layout->size;
size_t tot = datasize;
int isbitsunion = layout->flags.arrayelem_isunion;
Expand Down Expand Up @@ -1538,10 +1529,13 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
ios_write(s->const_data, (char*)m->ptr, tot);
}
}
if (len == 0) // TODO: should we have a zero-page, instead of writing each type's fragment separately?
if (len == 0) { // TODO: should we have a zero-page, instead of writing each type's fragment separately?
write_padding(s->const_data, layout->size ? layout->size : isbitsunion);
else if (jl_genericmemory_how(m) == 3 && jl_is_string(jl_genericmemory_data_owner_field(m)))
}
else if (jl_genericmemory_how(m) == 3) {
assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
write_padding(s->const_data, 1);
}
}
else {
// Pointer eltypes are encoded in the mutable data section
Expand Down

0 comments on commit bce3d4d

Please sign in to comment.