From a8c7a281d43e10488591777ab7930cb3f099f844 Mon Sep 17 00:00:00 2001 From: ArtoLord Date: Thu, 11 Apr 2024 11:43:13 +0300 Subject: [PATCH 1/3] Add container memory limit --- .../src/main/java/ai/lzy/env/Environment.java | 9 ++++- .../ai/lzy/env/base/DockerEnvDescription.java | 12 +++++-- .../ai/lzy/env/base/DockerEnvironment.java | 35 +++++++++++++++++-- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/Environment.java b/lzy/execution-env/src/main/java/ai/lzy/env/Environment.java index 63bbb18b1..0d270b67b 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/Environment.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/Environment.java @@ -53,8 +53,15 @@ public void signal(int sigValue) {} InputStream err(); - int waitFor() throws InterruptedException; + int waitFor() throws InterruptedException, OomKilledException; void signal(int sigValue); } + + + class OomKilledException extends RuntimeException { + public OomKilledException(String msg) { + super(msg); + } + } } diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java index bb5b2a887..dc0971c5a 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java @@ -18,7 +18,9 @@ public record DockerEnvDescription( String networkMode, DockerClientConfig dockerClientConfig, String user, - Set allowedPlatforms // In format os/arch like "linux/amd64". Empty means all are allowed + Set allowedPlatforms, // In format os/arch like "linux/amd64". Empty means all are allowed + @Nullable + Long memLimitMb ) { public static Builder newBuilder() { @@ -57,6 +59,7 @@ public static class Builder { DockerClientConfig dockerClientConfig; String user = ROOT_USER_UID; Set allowedPlatforms = new HashSet<>(); + Long memLimitMb = null; public Builder withName(String name) { this.name = name; @@ -108,12 +111,17 @@ public Builder withAllowedPlatforms(Collection allowedPlatforms) { return this; } + public Builder setMemLimitMb(Long memLimitMb) { + this.memLimitMb = memLimitMb; + return this; + } + public DockerEnvDescription build() { if (StringUtils.isBlank(name)) { name = "job-" + RandomStringUtils.randomAlphanumeric(5); } return new DockerEnvDescription(name, image, mounts, gpu, envVars, networkMode, dockerClientConfig, user, - allowedPlatforms); + allowedPlatforms, memLimitMb); } } diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java index 3a8c76f32..203476a4b 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java @@ -28,6 +28,7 @@ import java.io.OutputStream; import java.io.PipedInputStream; import java.io.PipedOutputStream; +import java.lang.management.ManagementFactory; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -114,6 +115,10 @@ public void install(LogStream outStream, LogStream errStream) throws Environment hostConfig.withNetworkMode(config.networkMode()); } + if (config.memLimitMb() != null) { + hostConfig.withMemory(config.memLimitMb() * 1024 * 1024); + } + AtomicInteger containerCreatingAttempt = new AtomicInteger(0); final var container = retry.executeSupplier(() -> { LOG.info("Creating container {}... (attempt {}); image: {}, config: {}", @@ -243,11 +248,21 @@ public InputStream err() { } @Override - public int waitFor() throws InterruptedException { + public int waitFor() throws InterruptedException, OomKilledException { try { feature.get(); - return Math.toIntExact(retry.executeSupplier(() -> client.inspectExecCmd(exec.getId()).exec()) + var rc = Math.toIntExact(retry.executeSupplier(() -> client.inspectExecCmd(exec.getId()).exec()) .getExitCodeLong()); + + if (rc == 0) { + return 0; + } + + if (isOomKilled()) { + throw new OomKilledException("Process exited with rc %s, and it was killed by OOM killer".formatted(rc)); + } + + return rc; } catch (InterruptedException e) { try { startCmd.close(); @@ -361,4 +376,20 @@ private void checkPlatform(InspectImageResponse inspectImageResponse, LogStream throw new RuntimeException(msg); } } + + private boolean isOomKilled() { + if (containerId == null) { + return false; + } + + var killed = client.inspectContainerCmd(containerId) + .exec() + .getState().getOOMKilled(); + + if (killed != null) { + return killed; + } + + return false; + } } From d28072297e6510d1c292fde34bdbac036324cb69 Mon Sep 17 00:00:00 2001 From: ArtoLord Date: Thu, 11 Apr 2024 14:13:59 +0300 Subject: [PATCH 2/3] Add test --- .../ai/lzy/env/base/DockerEnvDescription.java | 2 +- .../src/main/java/ai/lzy/env/logs/Logs.java | 6 ++++ .../test/java/ai/lzy/env/DockerOomTest.java | 33 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 lzy/execution-env/src/test/java/ai/lzy/env/DockerOomTest.java diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java index dc0971c5a..9835b9bdf 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java @@ -111,7 +111,7 @@ public Builder withAllowedPlatforms(Collection allowedPlatforms) { return this; } - public Builder setMemLimitMb(Long memLimitMb) { + public Builder withMemLimitMb(Long memLimitMb) { this.memLimitMb = memLimitMb; return this; } diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/logs/Logs.java b/lzy/execution-env/src/main/java/ai/lzy/env/logs/Logs.java index e87674323..f5c8a7975 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/logs/Logs.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/logs/Logs.java @@ -1,5 +1,6 @@ package ai.lzy.env.logs; +import com.google.common.annotations.VisibleForTesting; import jakarta.annotation.Nullable; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -67,4 +68,9 @@ public void close() { LOG.debug("Stream {} closed", stream.name()); } } + + @VisibleForTesting + public LogStream empty() { + return stream("empty"); + } } diff --git a/lzy/execution-env/src/test/java/ai/lzy/env/DockerOomTest.java b/lzy/execution-env/src/test/java/ai/lzy/env/DockerOomTest.java new file mode 100644 index 000000000..e0818f5b8 --- /dev/null +++ b/lzy/execution-env/src/test/java/ai/lzy/env/DockerOomTest.java @@ -0,0 +1,33 @@ +package ai.lzy.env; + +import ai.lzy.env.base.DockerEnvDescription; +import ai.lzy.env.base.DockerEnvironment; +import ai.lzy.env.logs.LogStream; +import ai.lzy.env.logs.Logs; +import com.github.dockerjava.core.DefaultDockerClientConfig; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +public class DockerOomTest { + private static final Logs logs = new Logs(); + private static final LogStream stream = logs.empty(); + static { + logs.init(List.of()); + } + + @Test + public void testSimple() throws Exception { + var dockerEnvDesc = DockerEnvDescription.newBuilder() + .withImage("alpine:3.19.1") + .withMemLimitMb(6L) + .withDockerClientConfig(DefaultDockerClientConfig.createDefaultConfigBuilder().build()) + .build(); + + try (var env = new DockerEnvironment(dockerEnvDesc)) { + env.install(stream, stream); + Assert.assertThrows(Environment.OomKilledException.class, () -> env.runProcess("/bin/sh", "-c", "tail /dev/zero").waitFor()); + } + } +} From a5d6a4d0136c03996fa50851d6ed67ecf0de6945 Mon Sep 17 00:00:00 2001 From: ArtoLord Date: Thu, 11 Apr 2024 14:15:17 +0300 Subject: [PATCH 3/3] Fix formatting --- .../src/main/java/ai/lzy/env/base/DockerEnvDescription.java | 6 ++---- .../src/main/java/ai/lzy/env/base/DockerEnvironment.java | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java index 9835b9bdf..74d5d869f 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvDescription.java @@ -14,13 +14,11 @@ public record DockerEnvDescription( List mounts, boolean needGpu, List envVars, // In format = - @Nullable - String networkMode, + @Nullable String networkMode, DockerClientConfig dockerClientConfig, String user, Set allowedPlatforms, // In format os/arch like "linux/amd64". Empty means all are allowed - @Nullable - Long memLimitMb + @Nullable Long memLimitMb ) { public static Builder newBuilder() { diff --git a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java index 203476a4b..1a5e58508 100644 --- a/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java +++ b/lzy/execution-env/src/main/java/ai/lzy/env/base/DockerEnvironment.java @@ -384,7 +384,8 @@ private boolean isOomKilled() { var killed = client.inspectContainerCmd(containerId) .exec() - .getState().getOOMKilled(); + .getState() + .getOOMKilled(); if (killed != null) { return killed;