Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/druntime.d_arrayappendcTX_template.dd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Templatize `_d_arrayappendcTX` runtime hook

This refactorization discards the `TypeInfo` parameter, replacing it with a template type parameter.
89 changes: 77 additions & 12 deletions druntime/src/core/internal/array/appending.d
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
*/
module core.internal.array.appending;

/// See $(REF _d_arrayappendcTX, rt,lifetime,_d_arrayappendcTX)
private extern (C) byte[] _d_arrayappendcTX(const TypeInfo ti, ref return scope byte[] px, size_t n) @trusted pure nothrow;
private extern (C)
{
bool gc_expandArrayUsed(void[] slice, size_t newUsed, bool atomic) pure nothrow;
bool gc_shrinkArrayUsed(void[] slice, size_t existingUsed, bool atomic) pure nothrow;
}

private enum isCopyingNothrow(T) = __traits(compiles, (ref T rhs) nothrow { T lhs = rhs; });

Expand All @@ -25,22 +28,83 @@ private enum isCopyingNothrow(T) = __traits(compiles, (ref T rhs) nothrow { T lh
* Bugs:
* This function template was ported from a much older runtime hook that bypassed safety,
* purity, and throwabilty checks. To prevent breaking existing code, this function template
* is temporarily declared `@trusted pure` until the implementation can be brought up to modern D expectations.
* is temporarily declared `@trusted` until the implementation can be brought up to modern D expectations.
*/
ref Tarr _d_arrayappendcTX(Tarr : T[], T)(return ref scope Tarr px, size_t n) @trusted
{
// needed for CTFE: https://github.com/dlang/druntime/pull/3870#issuecomment-1178800718
import core.internal.traits: Unqual;

alias Unqual_T = Unqual!T;
alias Unqual_Tarr = Unqual_T[];
enum isshared = is(T == shared);
auto unqual_px = cast(Unqual_Tarr) px;

// Ignoring additional attributes allows reusing the same generated code
px = cast(Tarr)_d_arrayappendcTX_(unqual_px, n, isshared);
return px;
}

private ref Tarr _d_arrayappendcTX_(Tarr : T[], T)(return ref scope Tarr px, size_t n, bool isshared) @trusted
{
version (DigitalMars) pragma(inline, false);
version (D_TypeInfo)
{
auto ti = typeid(Tarr);
// Short circuit if no data is being appended.
if (n == 0)
return px;

import core.stdc.string : memcpy, memset;
import core.internal.lifetime : __doPostblit;
import core.internal.array.utils: __arrayAlloc, newCapacity, __typeAttrs;
import core.internal.gc.blockmeta : PAGESIZE;
import core.exception: onOutOfMemoryError;
import core.memory: GC;

alias BlkAttr = GC.BlkAttr;

// _d_arrayappendcTX takes the `px` as a ref byte[], but its length
// should still be the original length
auto pxx = (cast(byte*)px.ptr)[0 .. px.length];
._d_arrayappendcTX(ti, pxx, n);
px = (cast(T*)pxx.ptr)[0 .. pxx.length];
enum sizeelem = T.sizeof;
auto length = px.length;
auto newlength = length + n;
auto newsize = newlength * sizeelem;
auto size = length * sizeelem;

if (!gc_expandArrayUsed(px, newsize, isshared))
{
// could not set the size, we must reallocate.
auto newcap = newCapacity(newlength, sizeelem);
auto attrs = __typeAttrs!T(cast(void*)px.ptr) | BlkAttr.APPENDABLE;

T* ptr = cast(T*)GC.malloc(newcap, attrs, typeid(T));
if (ptr is null)
{
onOutOfMemoryError();
assert(0);
}

if (newsize != newcap)
{
// For small blocks that are always fully scanned, if we allocated more
// capacity than was requested, we are responsible for zeroing that
// memory.
// TODO: should let the GC figure this out, as this property may
// not always hold.
if (!(attrs & BlkAttr.NO_SCAN) && newcap < PAGESIZE)
memset(ptr + newlength, 0, newcap - newsize);

gc_shrinkArrayUsed(ptr[0 .. newlength], newcap, isshared);
}

memcpy(ptr, px.ptr, size);

// do potsblit processing.
__doPostblit!T(ptr[0 .. length]);

px = ptr[0 .. newlength];
return px;
}

// we were able to expand in place, just update the length
px = px.ptr[0 .. newlength];
return px;
}
else
Expand All @@ -50,9 +114,10 @@ ref Tarr _d_arrayappendcTX(Tarr : T[], T)(return ref scope Tarr px, size_t n) @t
version (D_ProfileGC)
{
/**
* TraceGC wrapper around $(REF _d_arrayappendT, core,internal,array,appending).
* TraceGC wrapper around _d_arrayappendcTX.
*/
ref Tarr _d_arrayappendcTXTrace(Tarr : T[], T)(return ref scope Tarr px, size_t n, string file = __FILE__, int line = __LINE__, string funcname = __FUNCTION__) @trusted
ref Tarr _d_arrayappendcTXTrace(Tarr : T[], T)(return ref scope Tarr px, size_t n,
string file = __FILE__, int line = __LINE__, string funcname = __FUNCTION__) @trusted
{
version (D_TypeInfo)
{
Expand Down
66 changes: 66 additions & 0 deletions druntime/src/core/internal/array/utils.d
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,72 @@ void[] __arrayAlloc(T)(size_t arrSize) @trusted
return null;
}

/**
Given an array of length `size` that needs to be expanded to `newlength`,
compute a new capacity.

Better version by Dave Fladebo, enhanced by Steven Schveighoffer:
This uses an inverse logorithmic algorithm to pre-allocate a bit more
space for larger arrays.
- The maximum "extra" space is about 80% of the requested space. This is for
PAGE size and smaller.
- As the arrays grow, the relative pre-allocated space shrinks.
- Perhaps most importantly, overall memory usage and stress on the GC
is decreased significantly for demanding environments.
- The algorithm is tuned to avoid any division at runtime.

Params:
newlength = new `.length`
elemsize = size of the element in the new array
Returns: new capacity for array
*/
size_t newCapacity(size_t newlength, size_t elemsize) pure nothrow
{
size_t newcap = newlength * elemsize;

/*
* Max growth factor numerator is 234, so allow for multiplying by 256.
* But also, the resulting size cannot be more than 2x, so prevent
* growing if 2x would fill up the address space (for 32-bit)
*/
enum largestAllowed = (ulong.max >> 8) & (size_t.max >> 1);
if (!newcap || (newcap & ~largestAllowed))
return newcap;

/*
* The calculation for "extra" space depends on the requested capacity.
* We use an inverse logarithm of the new capacity to add an extra 15%
* to 83% capacity. Note that normally we humans think in terms of
* percent, but using 128 instead of 100 for the denominator means we
* can avoid all division by simply bit-shifthing. Since there are only
* 64 bits in a long, the bsr of a size_t is going to be 0 - 63. Using
* a lookup table allows us to precalculate the multiplier based on the
* inverse logarithm. The formula rougly is:
*
* newcap = request * (1.0 + min(0.83, 10.0 / (log(request) + 1)))
*/
import core.bitop;
static immutable multTable = (){
assert(__ctfe);
ulong[size_t.sizeof * 8] result;
foreach (i; 0 .. result.length)
{
auto factor = 128 + 1280 / (i + 1);
result[i] = factor > 234 ? 234 : factor;
}
return result;
}();

auto mult = multTable[bsr(newcap)];

// if this were per cent, then the code would look like:
// ((newlength * mult + 99) / 100) * elemsize
newcap = cast(size_t)((newlength * mult + 127) >> 7) * elemsize;
debug(PRINTF) printf("mult: %2.2f, alloc: %2.2f\n",mult/128.0,newcap / cast(double)elemsize);
debug(PRINTF) printf("newcap = %zd, newlength = %zd, elemsize = %zd\n", newcap, newlength, elemsize);
return newcap;
}

uint __typeAttrs(T)(void *copyAttrsFrom = null)
{
import core.internal.traits : hasIndirections, hasElaborateDestructor;
Expand Down
137 changes: 0 additions & 137 deletions druntime/src/rt/lifetime.d
Original file line number Diff line number Diff line change
Expand Up @@ -628,143 +628,6 @@ extern (C) void rt_finalizeFromGC(void* p, size_t size, uint attr, TypeInfo type
}


/**
Given an array of length `size` that needs to be expanded to `newlength`,
compute a new capacity.

Better version by Dave Fladebo, enhanced by Steven Schveighoffer:
This uses an inverse logorithmic algorithm to pre-allocate a bit more
space for larger arrays.
- The maximum "extra" space is about 80% of the requested space. This is for
PAGE size and smaller.
- As the arrays grow, the relative pre-allocated space shrinks.
- Perhaps most importantly, overall memory usage and stress on the GC
is decreased significantly for demanding environments.
- The algorithm is tuned to avoid any division at runtime.

Params:
newlength = new `.length`
elemsize = size of the element in the new array
Returns: new capacity for array
*/
size_t newCapacity(size_t newlength, size_t elemsize)
{
size_t newcap = newlength * elemsize;

/*
* Max growth factor numerator is 234, so allow for multiplying by 256.
* But also, the resulting size cannot be more than 2x, so prevent
* growing if 2x would fill up the address space (for 32-bit)
*/
enum largestAllowed = (ulong.max >> 8) & (size_t.max >> 1);
if (!newcap || (newcap & ~largestAllowed))
return newcap;

/*
* The calculation for "extra" space depends on the requested capacity.
* We use an inverse logarithm of the new capacity to add an extra 15%
* to 83% capacity. Note that normally we humans think in terms of
* percent, but using 128 instead of 100 for the denominator means we
* can avoid all division by simply bit-shifthing. Since there are only
* 64 bits in a long, the bsr of a size_t is going to be 0 - 63. Using
* a lookup table allows us to precalculate the multiplier based on the
* inverse logarithm. The formula rougly is:
*
* newcap = request * (1.0 + min(0.83, 10.0 / (log(request) + 1)))
*/
import core.bitop;
static immutable multTable = (){
assert(__ctfe);
ulong[size_t.sizeof * 8] result;
foreach (i; 0 .. result.length)
{
auto factor = 128 + 1280 / (i + 1);
result[i] = factor > 234 ? 234 : factor;
}
return result;
}();

auto mult = multTable[bsr(newcap)];

// if this were per cent, then the code would look like:
// ((newlength * mult + 99) / 100) * elemsize
newcap = cast(size_t)((newlength * mult + 127) >> 7) * elemsize;
debug(PRINTF) printf("mult: %2.2f, alloc: %2.2f\n",mult/128.0,newcap / cast(double)elemsize);
debug(PRINTF) printf("newcap = %zd, newlength = %zd, elemsize = %zd\n", newcap, newlength, elemsize);
return newcap;
}


/**
Extend an array by n elements.

Caller must initialize those elements.

Params:
ti = type info of array type (not element type)
px = array to append to, cast to `byte[]` while keeping the same `.length`. Will be updated.
n = number of elements to append
Returns: `px` after being appended to
*/
extern (C)
byte[] _d_arrayappendcTX(const TypeInfo ti, return scope ref byte[] px, size_t n) @weak
{
// This is a cut&paste job from _d_arrayappendT(). Should be refactored.

// Short circuit if no data is being appended.
if (n == 0)
return px;


// only optimize array append where ti is not a shared type
auto tinext = unqualify(ti.next);
auto sizeelem = tinext.tsize; // array element size
auto isshared = typeid(ti) is typeid(TypeInfo_Shared);
auto length = px.length;
auto newlength = length + n;
auto newsize = newlength * sizeelem;
auto size = length * sizeelem;

if (!gc_expandArrayUsed(px.ptr[0 .. size], newsize, isshared))
{
// could not set the size, we must reallocate.
auto newcap = newCapacity(newlength, sizeelem);
auto attrs = __typeAttrs(tinext, px.ptr) | BlkAttr.APPENDABLE;
auto ptr = cast(byte*) GC.malloc(newcap, attrs, tinext);
if (ptr is null)
{
onOutOfMemoryError();
assert(0);
}

if (newsize != newcap)
{
// For small blocks that are always fully scanned, if we allocated more
// capacity than was requested, we are responsible for zeroing that
// memory.
// TODO: should let the GC figure this out, as this property may
// not always hold.
if (!(attrs & BlkAttr.NO_SCAN) && newcap < PAGESIZE)
memset(ptr + newsize, 0, newcap - newsize);

gc_shrinkArrayUsed(ptr[0 .. newsize], newcap, isshared);
}

memcpy(ptr, px.ptr, size);

// do potsblit processing.
__doPostblit(ptr, size, tinext);

px = ptr[0 .. newlength];
return px;
}

// we were able to expand in place, just update the length
px = px.ptr[0 .. newlength];
return px;
}


/**
Append `dchar` to `char[]`, converting UTF-32 to UTF-8

Expand Down
1 change: 0 additions & 1 deletion druntime/src/rt/tracegc.d
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ extern (C) void _d_delmemory(void* *p);
extern (C) void* _d_arrayliteralTX(const TypeInfo ti, size_t length);
extern (C) void* _d_assocarrayliteralTX(const TypeInfo_AssociativeArray ti,
void[] keys, void[] vals);
extern (C) byte[] _d_arrayappendcTX(const TypeInfo ti, return scope ref byte[] px, size_t n);
extern (C) void[] _d_arrayappendcd(ref byte[] x, dchar c);
extern (C) void[] _d_arrayappendwd(ref byte[] x, dchar c);
extern (C) void* _d_allocmemory(size_t sz);
Expand Down
Loading