Skip to content

Commit 8a398e0

Browse files
authored
Collect live bytes per space, and report by space (#1238)
The current `count_live_bytes_in_gc` feature adds the size of all live objects and compare with the used pages reported by the plan. There are two issues with the feature: 1. VM space is not included in the used pages reported by the plan, but the live objects include objects in the VM space. So the reported fragmentation/utilization is wrong when the VM space is in use. 2. Spaces/policies have very different fragmentation ratio. Reporting the fragmentation for the entire heap is not useful. This PR refactors the current `count_live_bytes_in_gc` feature so we collect live bytes per space, and report by space.
1 parent 3d7bc11 commit 8a398e0

File tree

22 files changed

+167
-92
lines changed

22 files changed

+167
-92
lines changed

Cargo.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,6 @@ work_packet_stats = []
172172
# Count the malloc'd memory into the heap size
173173
malloc_counted_size = []
174174

175-
# Count the size of all live objects in GC
176-
count_live_bytes_in_gc = []
177-
178175
# Workaround a problem where bpftrace scripts (see tools/tracing/timeline/capture.bt) cannot
179176
# capture the type names of work packets.
180177
bpftrace_workaround = []

docs/userguide/src/migration/prefix.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,28 @@ Notes for the mmtk-core developers:
3232

3333
## 0.30.0
3434

35+
### `live_bytes_in_last_gc` becomes a runtime option, and returns a map for live bytes in each space
36+
37+
```admonish tldr
38+
`count_live_bytes_in_gc` is now a runtime option instead of a features (build-time), and we collect
39+
live bytes statistics per space. Correspondingly, `memory_manager::live_bytes_in_last_gc` now returns a map for
40+
live bytes in each space.
41+
```
42+
43+
API changes:
44+
45+
- module `util::options`
46+
+ `Options` includes `count_live_bytes_in_gc`, which defaults to `false`. This can be turned on at run-time.
47+
+ The old `count_live_bytes_in_gc` feature is removed.
48+
- module `memory_manager`
49+
+ `live_bytes_in_last_gc` now returns a `HashMap<&'static str, LiveBytesStats>`. The keys are
50+
strings for space names, and the values are statistics for live bytes in the space.
51+
52+
See also:
53+
54+
- PR: <https://github.com/mmtk/mmtk-core/pull/1238>
55+
56+
3557
### mmap-related functions require annotation
3658

3759
```admonish tldr

src/global_state.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
use atomic_refcell::AtomicRefCell;
2+
use std::collections::HashMap;
13
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24
use std::sync::Mutex;
35
use std::time::Instant;
46

5-
use atomic_refcell::AtomicRefCell;
6-
77
/// This stores some global states for an MMTK instance.
88
/// Some MMTK components like plans and allocators may keep an reference to the struct, and can access it.
99
// This used to be a part of the `BasePlan`. In that case, any component that accesses
@@ -45,9 +45,8 @@ pub struct GlobalState {
4545
/// A counteer that keeps tracks of the number of bytes allocated by malloc
4646
#[cfg(feature = "malloc_counted_size")]
4747
pub(crate) malloc_bytes: AtomicUsize,
48-
/// This stores the size in bytes for all the live objects in last GC. This counter is only updated in the GC release phase.
49-
#[cfg(feature = "count_live_bytes_in_gc")]
50-
pub(crate) live_bytes_in_last_gc: AtomicUsize,
48+
/// This stores the live bytes and the used bytes (by pages) for each space in last GC. This counter is only updated in the GC release phase.
49+
pub(crate) live_bytes_in_last_gc: AtomicRefCell<HashMap<&'static str, LiveBytesStats>>,
5150
}
5251

5352
impl GlobalState {
@@ -183,16 +182,6 @@ impl GlobalState {
183182
pub(crate) fn decrease_malloc_bytes_by(&self, size: usize) {
184183
self.malloc_bytes.fetch_sub(size, Ordering::SeqCst);
185184
}
186-
187-
#[cfg(feature = "count_live_bytes_in_gc")]
188-
pub fn get_live_bytes_in_last_gc(&self) -> usize {
189-
self.live_bytes_in_last_gc.load(Ordering::SeqCst)
190-
}
191-
192-
#[cfg(feature = "count_live_bytes_in_gc")]
193-
pub fn set_live_bytes_in_last_gc(&self, size: usize) {
194-
self.live_bytes_in_last_gc.store(size, Ordering::SeqCst);
195-
}
196185
}
197186

198187
impl Default for GlobalState {
@@ -213,8 +202,7 @@ impl Default for GlobalState {
213202
allocation_bytes: AtomicUsize::new(0),
214203
#[cfg(feature = "malloc_counted_size")]
215204
malloc_bytes: AtomicUsize::new(0),
216-
#[cfg(feature = "count_live_bytes_in_gc")]
217-
live_bytes_in_last_gc: AtomicUsize::new(0),
205+
live_bytes_in_last_gc: AtomicRefCell::new(HashMap::new()),
218206
}
219207
}
220208
}
@@ -225,3 +213,15 @@ pub enum GcStatus {
225213
GcPrepare,
226214
GcProper,
227215
}
216+
217+
/// Statistics for the live bytes in the last GC. The statistics is per space.
218+
#[derive(Copy, Clone, Debug)]
219+
pub struct LiveBytesStats {
220+
/// Total accumulated bytes of live objects in the space.
221+
pub live_bytes: usize,
222+
/// Total pages used by the space.
223+
pub used_pages: usize,
224+
/// Total bytes used by the space, computed from `used_pages`.
225+
/// The ratio of live_bytes and used_bytes reflects the utilization of the memory in the space.
226+
pub used_bytes: usize,
227+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub(crate) use mmtk::MMAPPER;
4646
pub use mmtk::MMTK;
4747

4848
mod global_state;
49+
pub use crate::global_state::LiveBytesStats;
4950

5051
mod policy;
5152

src/memory_manager.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ use crate::vm::slot::MemorySlice;
2626
use crate::vm::ReferenceGlue;
2727
use crate::vm::VMBinding;
2828

29+
use std::collections::HashMap;
30+
2931
/// Initialize an MMTk instance. A VM should call this method after creating an [`crate::MMTK`]
3032
/// instance but before using any of the methods provided in MMTk (except `process()` and `process_bulk()`).
3133
///
@@ -531,16 +533,18 @@ pub fn free_bytes<VM: VMBinding>(mmtk: &MMTK<VM>) -> usize {
531533
mmtk.get_plan().get_free_pages() << LOG_BYTES_IN_PAGE
532534
}
533535

534-
/// Return the size of all the live objects in bytes in the last GC. MMTk usually accounts for memory in pages.
536+
/// Return a hash map for live bytes statistics in the last GC for each space.
537+
///
538+
/// MMTk usually accounts for memory in pages by each space.
535539
/// This is a special method that we count the size of every live object in a GC, and sum up the total bytes.
536-
/// We provide this method so users can compare with `used_bytes` (which does page accounting), and know if
537-
/// the heap is fragmented.
540+
/// We provide this method so users can use [`crate::LiveBytesStats`] to know if
541+
/// the space is fragmented.
538542
/// The value returned by this method is only updated when we finish tracing in a GC. A recommended timing
539543
/// to call this method is at the end of a GC (e.g. when the runtime is about to resume threads).
540-
#[cfg(feature = "count_live_bytes_in_gc")]
541-
pub fn live_bytes_in_last_gc<VM: VMBinding>(mmtk: &MMTK<VM>) -> usize {
542-
use std::sync::atomic::Ordering;
543-
mmtk.state.live_bytes_in_last_gc.load(Ordering::SeqCst)
544+
pub fn live_bytes_in_last_gc<VM: VMBinding>(
545+
mmtk: &MMTK<VM>,
546+
) -> HashMap<&'static str, crate::LiveBytesStats> {
547+
mmtk.state.live_bytes_in_last_gc.borrow().clone()
544548
}
545549

546550
/// Return the starting address of the heap. *Note that currently MMTk uses

src/mmtk.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::util::address::ObjectReference;
1212
use crate::util::analysis::AnalysisManager;
1313
use crate::util::finalizable_processor::FinalizableProcessor;
1414
use crate::util::heap::gc_trigger::GCTrigger;
15+
use crate::util::heap::layout::heap_parameters::MAX_SPACES;
1516
use crate::util::heap::layout::vm_layout::VMLayout;
1617
use crate::util::heap::layout::{self, Mmapper, VMMap};
1718
use crate::util::heap::HeapMeta;
@@ -26,6 +27,7 @@ use crate::util::statistics::stats::Stats;
2627
use crate::vm::ReferenceGlue;
2728
use crate::vm::VMBinding;
2829
use std::cell::UnsafeCell;
30+
use std::collections::HashMap;
2931
use std::default::Default;
3032
use std::sync::atomic::{AtomicBool, Ordering};
3133
use std::sync::Arc;
@@ -526,4 +528,34 @@ impl<VM: VMBinding> MMTK<VM> {
526528
space.enumerate_objects(&mut enumerator);
527529
})
528530
}
531+
532+
/// Aggregate a hash map of live bytes per space with the space stats to produce
533+
/// a map of live bytes stats for the spaces.
534+
pub(crate) fn aggregate_live_bytes_in_last_gc(
535+
&self,
536+
live_bytes_per_space: [usize; MAX_SPACES],
537+
) -> HashMap<&'static str, crate::LiveBytesStats> {
538+
use crate::policy::space::Space;
539+
let mut ret = HashMap::new();
540+
self.get_plan().for_each_space(&mut |space: &dyn Space<VM>| {
541+
let space_name = space.get_name();
542+
let space_idx = space.get_descriptor().get_index();
543+
let used_pages = space.reserved_pages();
544+
if used_pages != 0 {
545+
let used_bytes = crate::util::conversions::pages_to_bytes(used_pages);
546+
let live_bytes = live_bytes_per_space[space_idx];
547+
debug_assert!(
548+
live_bytes <= used_bytes,
549+
"Live bytes of objects in {} ({} bytes) is larger than used pages ({} bytes), something is wrong.",
550+
space_name, live_bytes, used_bytes
551+
);
552+
ret.insert(space_name, crate::LiveBytesStats {
553+
live_bytes,
554+
used_pages,
555+
used_bytes,
556+
});
557+
}
558+
});
559+
ret
560+
}
529561
}

src/plan/markcompact/gc_work.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ impl<VM: VMBinding> GCWork<VM> for UpdateReferences<VM> {
5151
mmtk.slot_logger.reset();
5252

5353
// We do two passes of transitive closures. We clear the live bytes from the first pass.
54-
#[cfg(feature = "count_live_bytes_in_gc")]
5554
mmtk.scheduler
5655
.worker_group
5756
.get_and_clear_worker_live_bytes();

src/policy/copyspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ pub struct CopySpace<VM: VMBinding> {
2525
}
2626

2727
impl<VM: VMBinding> SFT for CopySpace<VM> {
28-
fn name(&self) -> &str {
28+
fn name(&self) -> &'static str {
2929
self.get_name()
3030
}
3131

src/policy/immix/immixspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ pub struct ImmixSpaceArgs {
8686
unsafe impl<VM: VMBinding> Sync for ImmixSpace<VM> {}
8787

8888
impl<VM: VMBinding> SFT for ImmixSpace<VM> {
89-
fn name(&self) -> &str {
89+
fn name(&self) -> &'static str {
9090
self.get_name()
9191
}
9292

src/policy/immortalspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pub struct ImmortalSpace<VM: VMBinding> {
2727
}
2828

2929
impl<VM: VMBinding> SFT for ImmortalSpace<VM> {
30-
fn name(&self) -> &str {
30+
fn name(&self) -> &'static str {
3131
self.get_name()
3232
}
3333
fn is_live(&self, _object: ObjectReference) -> bool {

src/policy/largeobjectspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pub struct LargeObjectSpace<VM: VMBinding> {
3232
}
3333

3434
impl<VM: VMBinding> SFT for LargeObjectSpace<VM> {
35-
fn name(&self) -> &str {
35+
fn name(&self) -> &'static str {
3636
self.get_name()
3737
}
3838
fn is_live(&self, object: ObjectReference) -> bool {

src/policy/lockfreeimmortalspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub struct LockFreeImmortalSpace<VM: VMBinding> {
4646
}
4747

4848
impl<VM: VMBinding> SFT for LockFreeImmortalSpace<VM> {
49-
fn name(&self) -> &str {
49+
fn name(&self) -> &'static str {
5050
self.get_name()
5151
}
5252
fn is_live(&self, _object: ObjectReference) -> bool {

src/policy/markcompactspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub const GC_EXTRA_HEADER_WORD: usize = 1;
3333
const GC_EXTRA_HEADER_BYTES: usize = GC_EXTRA_HEADER_WORD << LOG_BYTES_IN_WORD;
3434

3535
impl<VM: VMBinding> SFT for MarkCompactSpace<VM> {
36-
fn name(&self) -> &str {
36+
fn name(&self) -> &'static str {
3737
self.get_name()
3838
}
3939

src/policy/marksweepspace/malloc_ms/global.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub struct MallocSpace<VM: VMBinding> {
6464
}
6565

6666
impl<VM: VMBinding> SFT for MallocSpace<VM> {
67-
fn name(&self) -> &str {
67+
fn name(&self) -> &'static str {
6868
self.get_name()
6969
}
7070

src/policy/marksweepspace/native_ms/global.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ impl AbandonedBlockLists {
157157
}
158158

159159
impl<VM: VMBinding> SFT for MarkSweepSpace<VM> {
160-
fn name(&self) -> &str {
160+
fn name(&self) -> &'static str {
161161
self.common.name
162162
}
163163

src/policy/sft.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use std::marker::PhantomData;
2424
/// table of SFT rather than Space.
2525
pub trait SFT {
2626
/// The space name
27-
fn name(&self) -> &str;
27+
fn name(&self) -> &'static str;
2828

2929
/// Get forwarding pointer if the object is forwarded.
3030
fn get_forwarded_object(&self, _object: ObjectReference) -> Option<ObjectReference> {
@@ -120,7 +120,7 @@ pub const EMPTY_SFT_NAME: &str = "empty";
120120
pub const EMPTY_SPACE_SFT: EmptySpaceSFT = EmptySpaceSFT {};
121121

122122
impl SFT for EmptySpaceSFT {
123-
fn name(&self) -> &str {
123+
fn name(&self) -> &'static str {
124124
EMPTY_SFT_NAME
125125
}
126126
fn is_live(&self, object: ObjectReference) -> bool {

src/policy/space.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
328328
self.common().name
329329
}
330330

331+
fn get_descriptor(&self) -> SpaceDescriptor {
332+
self.common().descriptor
333+
}
334+
331335
fn common(&self) -> &CommonSpace<VM>;
332336
fn get_gc_trigger(&self) -> &GCTrigger<VM> {
333337
self.common().gc_trigger.as_ref()

src/policy/vmspace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ pub struct VMSpace<VM: VMBinding> {
2929
}
3030

3131
impl<VM: VMBinding> SFT for VMSpace<VM> {
32-
fn name(&self) -> &str {
32+
fn name(&self) -> &'static str {
3333
self.common.name
3434
}
3535
fn is_live(&self, _object: ObjectReference) -> bool {

src/scheduler/gc_work.rs

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,14 +154,12 @@ impl<C: GCWorkContext + 'static> GCWork<C::VM> for Release<C> {
154154
debug_assert!(result.is_ok());
155155
}
156156

157-
#[cfg(feature = "count_live_bytes_in_gc")]
158-
{
159-
let live_bytes = mmtk
160-
.scheduler
161-
.worker_group
162-
.get_and_clear_worker_live_bytes();
163-
mmtk.state.set_live_bytes_in_last_gc(live_bytes);
164-
}
157+
let live_bytes = mmtk
158+
.scheduler
159+
.worker_group
160+
.get_and_clear_worker_live_bytes();
161+
*mmtk.state.live_bytes_in_last_gc.borrow_mut() =
162+
mmtk.aggregate_live_bytes_in_last_gc(live_bytes);
165163
}
166164
}
167165

@@ -820,7 +818,7 @@ pub trait ScanObjectsWork<VM: VMBinding>: GCWork<VM> + Sized {
820818
&self,
821819
buffer: &[ObjectReference],
822820
worker: &mut GCWorker<<Self::E as ProcessEdgesWork>::VM>,
823-
_mmtk: &'static MMTK<<Self::E as ProcessEdgesWork>::VM>,
821+
mmtk: &'static MMTK<<Self::E as ProcessEdgesWork>::VM>,
824822
) {
825823
let tls = worker.tls;
826824

@@ -830,14 +828,21 @@ pub trait ScanObjectsWork<VM: VMBinding>: GCWork<VM> + Sized {
830828
let mut scan_later = vec![];
831829
{
832830
let mut closure = ObjectsClosure::<Self::E>::new(worker, self.get_bucket());
833-
for object in objects_to_scan.iter().copied() {
834-
// For any object we need to scan, we count its liv bytes
835-
#[cfg(feature = "count_live_bytes_in_gc")]
836-
closure
837-
.worker
838-
.shared
839-
.increase_live_bytes(VM::VMObjectModel::get_current_size(object));
840831

832+
// For any object we need to scan, we count its live bytes.
833+
// Check the option outside the loop for better performance.
834+
if crate::util::rust_util::unlikely(*mmtk.get_options().count_live_bytes_in_gc) {
835+
// Borrow before the loop.
836+
let mut live_bytes_stats = closure.worker.shared.live_bytes_per_space.borrow_mut();
837+
for object in objects_to_scan.iter().copied() {
838+
crate::scheduler::worker::GCWorkerShared::<VM>::increase_live_bytes(
839+
&mut live_bytes_stats,
840+
object,
841+
);
842+
}
843+
}
844+
845+
for object in objects_to_scan.iter().copied() {
841846
if <VM as VMBinding>::VMScanning::support_slot_enqueuing(tls, object) {
842847
trace!("Scan object (slot) {}", object);
843848
// If an object supports slot-enqueuing, we enqueue its slots.

0 commit comments

Comments
 (0)