Skip to content

Commit

Permalink
Finish the hierarchy walking experiment
Browse files Browse the repository at this point in the history
- Includes cgroups v1 code for cpu/memory
- Includes cgroups v2 code for cpu/memory
  • Loading branch information
jerboaa committed Apr 30, 2024
1 parent 92aaa6f commit 4a22816
Show file tree
Hide file tree
Showing 7 changed files with 315 additions and 73 deletions.
16 changes: 16 additions & 0 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
return mem_limit;
}

CgroupMemoryController* CgroupSubsystem::adjust_controller(CgroupMemoryController* mem) {
if (mem->needs_hierarchy_adjustment()) {
julong phys_mem = os::Linux::physical_memory();
return mem->adjust_controller(phys_mem);
}
return mem;
}

CgroupCpuController* CgroupSubsystem::adjust_controller(CgroupCpuController* cpu) {
if (cpu->needs_hierarchy_adjustment()) {
int cpu_total = os::Linux::active_processor_count();
return cpu->adjust_controller(cpu_total);
}
return cpu;
}

// CgroupSubsystem implementations

jlong CgroupSubsystem::memory_and_swap_limit_in_bytes() {
Expand Down
13 changes: 11 additions & 2 deletions src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,12 @@
#define PIDS_IDX 4

class CgroupController: public CHeapObj<mtInternal> {
protected:
char* _cgroup_path;
public:
char* cgroup_path() { return _cgroup_path; }
virtual char *subsystem_path() = 0;
virtual bool needs_hierarchy_adjustment() { return false; }
};

PRAGMA_DIAG_PUSH
Expand Down Expand Up @@ -222,6 +226,7 @@ template <typename T> int cg_file_multi_line_ctrl(CgroupController* c,
}
PRAGMA_DIAG_POP


class CachedMetric : public CHeapObj<mtInternal>{
private:
volatile jlong _metric;
Expand Down Expand Up @@ -267,7 +272,8 @@ class CgroupCpuController: public CgroupController {
virtual int cpu_quota() = 0;
virtual int cpu_period() = 0;
virtual int cpu_shares() = 0;
virtual char *subsystem_path() = 0;
virtual bool needs_hierarchy_adjustment() = 0;
virtual CgroupCpuController* adjust_controller(int host_cpus) = 0;
};

class CgroupMemoryController: public CgroupController {
Expand All @@ -281,9 +287,10 @@ class CgroupMemoryController: public CgroupController {
virtual jlong rss_usage_in_bytes() = 0;
virtual jlong cache_usage_in_bytes() = 0;
virtual char *subsystem_path() = 0;
virtual CgroupMemoryController* adjust_controller(julong phys_mem) = 0;
virtual bool needs_hierarchy_adjustment() = 0;
};


class CgroupSubsystem: public CHeapObj<mtInternal> {
public:
jlong memory_limit_in_bytes();
Expand All @@ -309,6 +316,8 @@ class CgroupSubsystem: public CHeapObj<mtInternal> {
jlong rss_usage_in_bytes();
jlong cache_usage_in_bytes();
virtual void print_version_specific_info(outputStream* st) = 0;
static CgroupMemoryController* adjust_controller(CgroupMemoryController* m);
static CgroupCpuController* adjust_controller(CgroupCpuController* c);
};

// Utility class for storing info retrieved from /proc/cgroups,
Expand Down
159 changes: 104 additions & 55 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@
* on the contents of the mountinfo and cgroup files.
*/
void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
if (_cgroup_path != nullptr) {
os::free(_cgroup_path);
}
if (_path != nullptr) {
os::free(_path);
}
_cgroup_path = os::strdup(cgroup_path);
stringStream ss;
if (_root != nullptr && cgroup_path != nullptr) {
if (strcmp(_root, "/") == 0) {
Expand Down Expand Up @@ -66,32 +73,106 @@ void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
}
}

/* uses_mem_hierarchy
*
* Return whether or not hierarchical cgroup accounting is being
* done.
*
* return:
* A number > 0 if true, or
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV1MemoryController::uses_mem_hierarchy() {
jlong use_hierarchy;
int err = cg_file_contents_ctrl(static_cast<CgroupV1Controller*>(this), "/memory.use_hierarchy", JLONG_FORMAT, &use_hierarchy);
if (err != 0) {
log_trace(os, container)("Use Hierarchy is: %d", OSCONTAINER_ERROR);
return (jlong)OSCONTAINER_ERROR;
bool CgroupV1MemoryController::needs_hierarchy_adjustment() {
return CgroupV1Controller::needs_hierarchy_adjustment();
}

bool CgroupV1CpuController::needs_hierarchy_adjustment() {
return CgroupV1Controller::needs_hierarchy_adjustment();
}

CgroupV1MemoryController* CgroupV1MemoryController::adjust_controller(julong phys_mem) {
log_trace(os, container)("Adjusting v1 controller path for memory: %s", subsystem_path());
CgroupV1Controller* base_ctrl = static_cast<CgroupV1Controller*>(this);
assert(base_ctrl->cgroup_path() != nullptr, "invariant");
char* orig = os::strdup(base_ctrl->cgroup_path());
char* cg_path = os::strdup(orig);
char* last_slash;
jlong limit = read_memory_limit_in_bytes(phys_mem);
bool path_iterated = false;
while (limit < 0 && (last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
base_ctrl->set_subsystem_path(cg_path);
limit = read_memory_limit_in_bytes(phys_mem);
path_iterated = true;
if (limit > 0) {
log_trace(os, container)("Adjusted v1 controller path for memory to: %s", subsystem_path());
os::free(cg_path);
os::free(orig);
return this;
}
}
// no lower limit found or limit at leaf
os::free(cg_path);
if (path_iterated) {
base_ctrl->set_subsystem_path((char*)"/");
limit = read_memory_limit_in_bytes(phys_mem);
if (limit > 0) {
// handle limit set at mount point
log_trace(os, container)("Adjusted v1 controller path for memory to: %s", subsystem_path());
os::free(orig);
return this;
}
log_trace(os, container)("No lower limit found in hierarchy %s, adjusting to original path %s",
base_ctrl->mount_point(), orig);
base_ctrl->set_subsystem_path(orig);
} else {
log_trace(os, container)("Lowest limit for memory at leaf: %s",
base_ctrl->subsystem_path());
}
log_trace(os, container)("Use Hierarchy is: " JLONG_FORMAT, use_hierarchy);
return use_hierarchy;
os::free(orig);
return this;
}

void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) {
CgroupV1Controller::set_subsystem_path(cgroup_path);
jlong hierarchy = uses_mem_hierarchy();
if (hierarchy > 0) {
set_hierarchical(true);
CgroupV1CpuController* CgroupV1CpuController::adjust_controller(int host_cpus) {
log_trace(os, container)("Adjusting v1 controller path for cpu: %s", subsystem_path());
CgroupV1Controller* base_ctrl = static_cast<CgroupV1Controller*>(this);
assert(base_ctrl->cgroup_path() != nullptr, "invariant");
assert(host_cpus > 0, "Negative host cpus?");
char* orig = os::strdup(base_ctrl->cgroup_path());
char* cg_path = os::strdup(orig);
char* last_slash;
int cpus = CgroupUtil::processor_count(this, host_cpus);
bool path_iterated = false;
while (cpus == host_cpus && (last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
base_ctrl->set_subsystem_path((char*)cg_path);
cpus = CgroupUtil::processor_count(this, host_cpus);
path_iterated = true;
if (cpus != host_cpus) {
log_trace(os, container)("Adjusted v1 controller path for cpu to: %s", subsystem_path());
os::free(cg_path);
os::free(orig);
return this;
}
}
// no lower limit found or limit at leaf
os::free(cg_path);
if (path_iterated) {
base_ctrl->set_subsystem_path((char*)"/");
cpus = CgroupUtil::processor_count(this, host_cpus);
if (cpus != host_cpus) {
// handle limit set at mount point
log_trace(os, container)("Adjusted v1 controller path for cpu to: %s", subsystem_path());
os::free(orig);
return this;
}
log_trace(os, container)("No lower limit found in hierarchy %s, adjusting to original path %s",
base_ctrl->mount_point(), orig);
base_ctrl->set_subsystem_path(orig);
} else {
log_trace(os, container)("Lowest limit for cpu at leaf: %s",
base_ctrl->subsystem_path());
}
os::free(orig);
return this;
}

bool CgroupV1Controller::needs_hierarchy_adjustment() {
assert(_cgroup_path != nullptr, "sanity");
return strcmp(_root, _cgroup_path) != 0;
}

static inline
Expand Down Expand Up @@ -125,25 +206,7 @@ jlong CgroupV1MemoryController::read_memory_limit_in_bytes(julong phys_mem) {
return OSCONTAINER_ERROR;
}
log_trace(os, container)("Memory Limit is: " JULONG_FORMAT, memlimit);

if (memlimit >= phys_mem) {
log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
if (is_hierarchical()) {
julong hier_memlimit;
err = cg_file_multi_line_ctrl(static_cast<CgroupV1Controller*>(this), "/memory.stat",
"hierarchical_memory_limit", JULONG_FORMAT, &hier_memlimit);
if (err != 0) {
do_trace_log(OSCONTAINER_ERROR, phys_mem);
return OSCONTAINER_ERROR;
}
log_trace(os, container)("Hierarchical Memory Limit is: " JULONG_FORMAT, hier_memlimit);
if (hier_memlimit >= phys_mem) {
log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
} else {
do_trace_log(hier_memlimit, phys_mem);
return (jlong)hier_memlimit;
}
}
do_trace_log(memlimit, phys_mem);
return (jlong)-1;
} else {
Expand All @@ -165,7 +228,6 @@ jlong CgroupV1MemoryController::read_memory_limit_in_bytes(julong phys_mem) {
* upper bound)
*/
jlong CgroupV1MemoryController::read_mem_swap(julong host_total_memsw) {
julong hier_memswlimit;
julong memswlimit;
int err = cg_file_contents_ctrl(static_cast<CgroupV1Controller*>(this), "/memory.memsw.limit_in_bytes", JULONG_FORMAT, &memswlimit);
if (err != 0) {
Expand All @@ -174,20 +236,7 @@ jlong CgroupV1MemoryController::read_mem_swap(julong host_total_memsw) {
}
log_trace(os, container)("Memory and Swap Limit is: " JULONG_FORMAT, memswlimit);
if (memswlimit >= host_total_memsw) {
log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
if (is_hierarchical()) {
const char* matchline = "hierarchical_memsw_limit";
err = cg_file_multi_line_ctrl(static_cast<CgroupV1Controller*>(this), "/memory.stat", matchline, JULONG_FORMAT, &hier_memswlimit);
if (err != 0) {
return OSCONTAINER_ERROR;
}
log_trace(os, container)("Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, hier_memswlimit);
if (hier_memswlimit >= host_total_memsw) {
log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
} else {
return (jlong)hier_memswlimit;
}
}
log_trace(os, container)("Memory and Swap Limit is: Unlimited");
return (jlong)-1;
} else {
return (jlong)memswlimit;
Expand Down
22 changes: 11 additions & 11 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,18 @@ class CgroupV1Controller: public CgroupController {
_root = os::strdup(root);
_mount_point = os::strdup(mountpoint);
_path = nullptr;
_cgroup_path = nullptr;
}

virtual void set_subsystem_path(char *cgroup_path);
char *subsystem_path() { return _path; }
bool needs_hierarchy_adjustment();
char *mount_point() { return _mount_point; }
};

class CgroupV1MemoryController: public CgroupV1Controller, public CgroupMemoryController {

public:
bool is_hierarchical() { return _uses_mem_hierarchy; }
void set_subsystem_path(char *cgroup_path);
jlong read_memory_limit_in_bytes(julong upper_bound);
jlong memory_usage_in_bytes();
jlong memory_and_swap_limit_in_bytes(julong host_mem, julong host_swap);
Expand All @@ -67,19 +68,14 @@ class CgroupV1MemoryController: public CgroupV1Controller, public CgroupMemoryCo
jlong kernel_memory_limit_in_bytes(julong host_mem);
jlong kernel_memory_max_usage_in_bytes();
char *subsystem_path() override { return CgroupV1Controller::subsystem_path(); }
bool needs_hierarchy_adjustment();
CgroupV1MemoryController* adjust_controller(julong phys_mem);
private:
/* Some container runtimes set limits via cgroup
* hierarchy. If set to true consider also memory.stat
* file if everything else seems unlimited */
bool _uses_mem_hierarchy;
jlong uses_mem_hierarchy();
void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
jlong read_mem_swappiness();
jlong read_mem_swap(julong host_total_memsw);

public:
CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) {
_uses_mem_hierarchy = false;
}

};
Expand All @@ -95,6 +91,8 @@ class CgroupV1CpuController: public CgroupV1Controller, public CgroupCpuControll
CgroupV1CpuController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) {
}
char *subsystem_path() override { return CgroupV1Controller::subsystem_path(); }
bool needs_hierarchy_adjustment();
CgroupV1CpuController* adjust_controller(int host_cpus);
};

class CgroupV1Subsystem: public CgroupSubsystem {
Expand Down Expand Up @@ -135,10 +133,12 @@ class CgroupV1Subsystem: public CgroupSubsystem {
CgroupV1Controller* pids,
CgroupV1MemoryController* memory) {
_cpuset = cpuset;
_cpu = new CachingCgroupController<CgroupCpuController*>(cpu);
CgroupCpuController* c = adjust_controller(cpu);
_cpu = new CachingCgroupController<CgroupCpuController*>(c);
_cpuacct = cpuacct;
_pids = pids;
_memory = new CachingCgroupController<CgroupMemoryController*>(memory);
CgroupMemoryController* m = adjust_controller(memory);
_memory = new CachingCgroupController<CgroupMemoryController*>(m);
}
};

Expand Down
Loading

0 comments on commit 4a22816

Please sign in to comment.