Skip to content

Commit

Permalink
8332455: Improve G1 tasks to not override array lengths
Browse files Browse the repository at this point in the history
  • Loading branch information
rkennke committed May 17, 2024
1 parent 0b0445b commit aed57d2
Show file tree
Hide file tree
Showing 12 changed files with 431 additions and 169 deletions.
3 changes: 2 additions & 1 deletion src/hotspot/share/gc/g1/g1CollectedHeap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "gc/g1/g1MonotonicArenaFreePool.hpp"
#include "gc/g1/g1NUMA.hpp"
#include "gc/g1/g1SurvivorRegions.hpp"
#include "gc/g1/g1TaskQueueEntry.hpp"
#include "gc/g1/g1YoungGCAllocationFailureInjector.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/collectedHeap.hpp"
Expand Down Expand Up @@ -86,7 +87,7 @@ class ReferenceProcessor;
class STWGCTimer;
class WorkerThreads;

typedef OverflowTaskQueue<ScannerTask, mtGC> G1ScannerTasksQueue;
typedef OverflowTaskQueue<G1TaskQueueEntry, mtGC> G1ScannerTasksQueue;
typedef GenericTaskQueueSet<G1ScannerTasksQueue, mtGC> G1ScannerTasksQueueSet;

typedef int RegionIdx_t; // needs to hold [ 0..max_reserved_regions() )
Expand Down
14 changes: 7 additions & 7 deletions src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
// We move that task's local finger along.
_task->move_finger_to(addr);

_task->scan_task_entry(G1TaskQueueEntry::from_oop(cast_to_oop(addr)));
_task->scan_task_entry(G1TaskQueueEntry(cast_to_oop(addr)));
// we only partially drain the local queue and global stack
_task->drain_local_queue(true);
_task->drain_global_stack(true);
Expand Down Expand Up @@ -1967,16 +1967,16 @@ class VerifyNoCSetOops {

void operator()(G1TaskQueueEntry task_entry) const {
if (task_entry.is_array_slice()) {
guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
guarantee(_g1h->is_in_reserved(task_entry.to_oop()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.to_oop()));
return;
}
guarantee(oopDesc::is_oop(task_entry.obj()),
guarantee(oopDesc::is_oop(task_entry.to_oop()),
"Non-oop " PTR_FORMAT ", phase: %s, info: %d",
p2i(task_entry.obj()), _phase, _info);
HeapRegion* r = _g1h->heap_region_containing(task_entry.obj());
p2i(task_entry.to_oop()), _phase, _info);
HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
"obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
p2i(task_entry.obj()), _phase, _info, r->hrm_index());
p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
}
};

Expand Down Expand Up @@ -2346,7 +2346,7 @@ bool G1CMTask::get_entries_from_global_stack() {
if (task_entry.is_null()) {
break;
}
assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
bool success = _task_queue->push(task_entry);
// We only call this when the local queue is empty or under a
// given target limit. So, we do not expect this push to fail.
Expand Down
40 changes: 4 additions & 36 deletions src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "gc/g1/g1HeapRegionSet.hpp"
#include "gc/g1/g1HeapVerifier.hpp"
#include "gc/g1/g1RegionMarkStatsCache.hpp"
#include "gc/g1/g1TaskQueueEntry.hpp"
#include "gc/shared/gcCause.hpp"
#include "gc/shared/taskTerminator.hpp"
#include "gc/shared/taskqueue.hpp"
Expand All @@ -51,41 +52,6 @@ class G1RegionToSpaceMapper;
class G1SurvivorRegions;
class ThreadClosure;

// This is a container class for either an oop or a continuation address for
// mark stack entries. Both are pushed onto the mark stack.
class G1TaskQueueEntry {
private:
void* _holder;

static const uintptr_t ArraySliceBit = 1;

G1TaskQueueEntry(oop obj) : _holder(obj) {
assert(_holder != nullptr, "Not allowed to set null task queue element");
}
G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { }
public:

G1TaskQueueEntry() : _holder(nullptr) { }
// Trivially copyable, for use in GenericTaskQueue.

static G1TaskQueueEntry from_slice(HeapWord* what) { return G1TaskQueueEntry(what); }
static G1TaskQueueEntry from_oop(oop obj) { return G1TaskQueueEntry(obj); }

oop obj() const {
assert(!is_array_slice(), "Trying to read array slice " PTR_FORMAT " as oop", p2i(_holder));
return cast_to_oop(_holder);
}

HeapWord* slice() const {
assert(is_array_slice(), "Trying to read oop " PTR_FORMAT " as array slice", p2i(_holder));
return (HeapWord*)((uintptr_t)_holder & ~ArraySliceBit);
}

bool is_oop() const { return !is_array_slice(); }
bool is_array_slice() const { return ((uintptr_t)_holder & ArraySliceBit) != 0; }
bool is_null() const { return _holder == nullptr; }
};

typedef GenericTaskQueue<G1TaskQueueEntry, mtGC> G1CMTaskQueue;
typedef GenericTaskQueueSet<G1CMTaskQueue, mtGC> G1CMTaskQueueSet;

Expand Down Expand Up @@ -841,9 +807,11 @@ class G1CMTask : public TerminatorTerminator {

template<bool scan> void process_grey_task_entry(G1TaskQueueEntry task_entry);
public:
// Apply the closure on the objArray metadata.
inline void scan_objArray_start(objArrayOop obj);
// Apply the closure on the given area of the objArray. Return the number of words
// scanned.
inline size_t scan_objArray(objArrayOop obj, MemRegion mr);
inline size_t scan_objArray(objArrayOop obj, int start, int end);
// Resets the task; should be called right at the beginning of a marking phase.
void reset(G1CMBitMap* mark_bitmap);
// Clears all the fields that correspond to a claimed region.
Expand Down
28 changes: 17 additions & 11 deletions src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ inline void G1CMMarkStack::iterate(Fn fn) const {
inline void G1CMTask::scan_task_entry(G1TaskQueueEntry task_entry) { process_grey_task_entry<true>(task_entry); }

inline void G1CMTask::push(G1TaskQueueEntry task_entry) {
assert(task_entry.is_array_slice() || _g1h->is_in_reserved(task_entry.obj()), "invariant");
assert(task_entry.is_array_slice() || _g1h->is_in_reserved(task_entry.to_oop()), "invariant");
assert(task_entry.is_array_slice() || !_g1h->is_on_master_free_list(
_g1h->heap_region_containing(task_entry.obj())), "invariant");
assert(task_entry.is_array_slice() || _mark_bitmap->is_marked(cast_from_oop<HeapWord*>(task_entry.obj())), "invariant");
_g1h->heap_region_containing(task_entry.to_oop())), "invariant");
assert(task_entry.is_array_slice() || _mark_bitmap->is_marked(cast_from_oop<HeapWord*>(task_entry.to_oop())), "invariant");

if (!_task_queue->push(task_entry)) {
// The local task queue looks full. We need to push some entries
Expand Down Expand Up @@ -160,15 +160,15 @@ inline bool G1CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {

template<bool scan>
inline void G1CMTask::process_grey_task_entry(G1TaskQueueEntry task_entry) {
assert(scan || (task_entry.is_oop() && task_entry.obj()->is_typeArray()), "Skipping scan of grey non-typeArray");
assert(task_entry.is_array_slice() || _mark_bitmap->is_marked(cast_from_oop<HeapWord*>(task_entry.obj())),
assert(scan || (task_entry.is_oop() && task_entry.to_oop()->is_typeArray()), "Skipping scan of grey non-typeArray");
assert(task_entry.is_array_slice() || _mark_bitmap->is_marked(cast_from_oop<HeapWord*>(task_entry.to_oop())),
"Any stolen object should be a slice or marked");

if (scan) {
oop obj = task_entry.to_oop();
if (task_entry.is_array_slice()) {
_words_scanned += _objArray_processor.process_slice(task_entry.slice());
_words_scanned += _objArray_processor.process_slice(obj, task_entry.slice(), task_entry.pow());
} else {
oop obj = task_entry.obj();
if (G1CMObjArrayProcessor::should_be_sliced(obj)) {
_words_scanned += _objArray_processor.process_obj(obj);
} else {
Expand All @@ -179,9 +179,15 @@ inline void G1CMTask::process_grey_task_entry(G1TaskQueueEntry task_entry) {
check_limits();
}

inline size_t G1CMTask::scan_objArray(objArrayOop obj, MemRegion mr) {
obj->oop_iterate(_cm_oop_closure, mr);
return mr.word_size();
inline void G1CMTask::scan_objArray_start(objArrayOop obj) {
if (Devirtualizer::do_metadata(_cm_oop_closure)) {
Devirtualizer::do_klass(_cm_oop_closure, obj->klass());
}
}

inline size_t G1CMTask::scan_objArray(objArrayOop obj, int start, int end) {
obj->oop_iterate_range(_cm_oop_closure, start, end);
return (end - start) * (UseCompressedOops ? 2 : 1);
}

inline void G1ConcurrentMark::update_top_at_mark_start(HeapRegion* r) {
Expand Down Expand Up @@ -264,7 +270,7 @@ inline bool G1CMTask::make_reference_grey(oop obj) {
// be pushed on the stack. So, some duplicate work, but no
// correctness problems.
if (is_below_finger(obj, global_finger)) {
G1TaskQueueEntry entry = G1TaskQueueEntry::from_oop(obj);
G1TaskQueueEntry entry(obj);
if (obj->is_typeArray()) {
// Immediately process arrays of primitive types, rather
// than pushing on the mark stack. This keeps us from
Expand Down
108 changes: 74 additions & 34 deletions src/hotspot/share/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,51 +31,91 @@
#include "memory/memRegion.hpp"
#include "utilities/globalDefinitions.hpp"

void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) {
_task->push(G1TaskQueueEntry::from_slice(what));
}
size_t G1CMObjArrayProcessor::process_obj(oop obj) {
assert(should_be_sliced(obj), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), obj->size());

size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) {
size_t words_to_scan = MIN2(remaining, (size_t)ObjArrayMarkingStride);
assert(obj->is_objArray(), "expect object array");
objArrayOop array = objArrayOop(obj);

if (remaining > ObjArrayMarkingStride) {
push_array_slice(start_from + ObjArrayMarkingStride);
}
_task->scan_objArray_start(array);

// Then process current area.
MemRegion mr(start_from, words_to_scan);
return _task->scan_objArray(obj, mr);
}
int len = array->length();

size_t G1CMObjArrayProcessor::process_obj(oop obj) {
assert(should_be_sliced(obj), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), obj->size());
int bits = log2i_graceful(len);
// Compensate for non-power-of-two arrays, cover the array in excess:
if (len != (1 << bits)) bits++;

// Only allow full slices on the queue. This frees do_sliced_array() from checking from/to
// boundaries against array->length(), touching the array header on every slice.
//
// To do this, we cut the prefix in full-sized slices, and submit them on the queue.
// If the array is not divided in slice sizes, then there would be an irregular tail,
// which we will process separately.

int last_idx = 0;

return process_array_slice(objArrayOop(obj), cast_from_oop<HeapWord*>(obj), objArrayOop(obj)->size());
int slice = 1;
int pow = bits;

// Handle overflow
if (pow >= 31) {
assert (pow == 31, "sanity");
pow--;
slice = 2;
last_idx = (1 << pow);
_task->push(G1TaskQueueEntry(array, 1, pow));
}

// Split out tasks, as suggested in G1TaskQueueEntry docs. Record the last
// successful right boundary to figure out the irregular tail.
while ((1 << pow) > (int)ObjArrayMarkingStride &&
(slice * 2 < G1TaskQueueEntry::slice_size())) {
pow--;
int left_slice = slice * 2 - 1;
int right_slice = slice * 2;
int left_slice_end = left_slice * (1 << pow);
if (left_slice_end < len) {
_task->push(G1TaskQueueEntry(array, left_slice, pow));
slice = right_slice;
last_idx = left_slice_end;
} else {
slice = left_slice;
}
}

// Process the irregular tail, if present
int from = last_idx;
if (from < len) {
return _task->scan_objArray(array, from, len);
}
return 0;
}

size_t G1CMObjArrayProcessor::process_slice(HeapWord* slice) {
size_t G1CMObjArrayProcessor::process_slice(oop obj, int slice, int pow) {

// Find the start address of the objArrayOop.
// Shortcut the BOT access if the given address is from a humongous object. The BOT
// slide is fast enough for "smaller" objects in non-humongous regions, but is slower
// than directly using heap region table.
G1CollectedHeap* g1h = G1CollectedHeap::heap();
HeapRegion* r = g1h->heap_region_containing(slice);
assert(obj->is_objArray(), "expect object array");
objArrayOop array = objArrayOop(obj);

HeapWord* const start_address = r->is_humongous() ?
r->humongous_start_region()->bottom() :
r->block_start(slice);
assert (ObjArrayMarkingStride > 0, "sanity");

// Split out tasks, as suggested in ShenandoahMarkTask docs. Avoid pushing tasks that
// are known to start beyond the array.
while ((1 << pow) > (int)ObjArrayMarkingStride && (slice*2 < G1TaskQueueEntry::slice_size())) {
pow--;
slice *= 2;
_task->push(G1TaskQueueEntry(array, slice - 1, pow));
}

assert(cast_to_oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address));
assert(start_address < slice,
"Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT,
p2i(start_address),
p2i(slice));
int slice_size = 1 << pow;

objArrayOop objArray = objArrayOop(cast_to_oop(start_address));
int from = (slice - 1) * slice_size;
int to = slice * slice_size;

size_t already_scanned = pointer_delta(slice, start_address);
size_t remaining = objArray->size() - already_scanned;
#ifdef ASSERT
int len = array->length();
assert (0 <= from && from < len, "from is sane: %d/%d", from, len);
assert (0 < to && to <= len, "to is sane: %d/%d", to, len);
#endif

return process_array_slice(objArray, slice, remaining);
return _task->scan_objArray(array, from, to);
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,14 @@ class G1CMObjArrayProcessor {
// Reference to the task for doing the actual work.
G1CMTask* _task;

// Push the continuation at the given address onto the mark stack.
void push_array_slice(HeapWord* addr);

// Process (apply the closure) on the given continuation of the given objArray.
size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining);
public:
static bool should_be_sliced(oop obj);

G1CMObjArrayProcessor(G1CMTask* task) : _task(task) {
}

// Process the given continuation. Returns the number of words scanned.
size_t process_slice(HeapWord* slice);
size_t process_slice(oop ary, int chunk, int pow);
// Start processing the given objArrayOop by scanning the header and pushing its
// continuation.
size_t process_obj(oop obj);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ inline void G1ScanClosureBase::prefetch_and_push(T* p, const oop obj) {
obj->forwardee() == RawAccess<>::oop_load(p)),
"p should still be pointing to obj or to its forwardee");

_par_scan_state->push_on_queue(ScannerTask(p));
_par_scan_state->push_on_queue(G1TaskQueueEntry(p));
}

template <class T>
Expand Down
Loading

0 comments on commit aed57d2

Please sign in to comment.