-
Notifications
You must be signed in to change notification settings - Fork 42
Enforcing CPU and memory limits #1455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
51ed06c
88f540e
edb7031
56e7afd
0bd4cb1
d5621c3
0eb8707
637b528
a5519cb
e393fd2
d8883ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| from pathlib import Path | ||
| import random | ||
| import shutil | ||
| import math | ||
| import yaml | ||
| from collections import OrderedDict | ||
| from datetime import datetime | ||
|
|
@@ -613,6 +614,56 @@ def _populate_image_names(self): | |
| else: | ||
| service['image'] = f"{service_name}_{random.randint(500000,10000000)}" | ||
|
|
||
| def _populate_cpu_and_memory_limits(self): | ||
| services = self._usage_scenario.get('services', {}) | ||
|
|
||
| DOCKER_AVAILABLE_MEMORY = int(subprocess.check_output(['docker', 'info', '--format', '{{.MemTotal}}'], encoding='UTF-8', errors='replace').strip()) | ||
| unassigned_memory = DOCKER_AVAILABLE_MEMORY-1024**3 # we want to leave 1 GB free on the host / docker VM to avoid OOM situations | ||
|
|
||
| if unassigned_memory <= 0: | ||
| raise RuntimeError(f"Docker has insufficient memory available. Available: {DOCKER_AVAILABLE_MEMORY/1024**3:.2f}GB, Required: at least 1GB for GMT overhead") | ||
|
|
||
| SYSTEM_ASSIGNABLE_CPU_COUNT = int(subprocess.check_output(['docker', 'info', '--format', '{{.NCPU}}'], encoding='UTF-8', errors='replace').strip()) -1 | ||
| if SYSTEM_ASSIGNABLE_CPU_COUNT <= 0: | ||
| raise RuntimeError(f"Cannot assign docker containers to any CPU as no CPUs are available to Docker. Available CPU count: {SYSTEM_ASSIGNABLE_CPU_COUNT}") | ||
|
|
||
| to_be_assigned_services = [] | ||
| for service_name, service in services.items(): | ||
| # wildly the docker compose spec allows deploy to be None | ||
| # ... thus we need to check and cannot .get() | ||
| if 'deploy' in service and service['deploy'] is not None and (memory := service['deploy'].get('resources', {}).get('limits', {}).get('memory', None)) is not None: | ||
| del service['deploy']['resources']['limits']['memory'] | ||
| service['mem_limit'] = memory | ||
| if service.get('mem_limit', 0) == 0: | ||
| to_be_assigned_services.append(service_name) | ||
| self.__warnings.append(f"Service '{service_name}' had no memory limit set. GMT does not allow unbounded memory limits and auto value was applied.") | ||
ArneTR marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| else: | ||
| memory_bytes = utils.docker_memory_to_bytes(service['mem_limit']) | ||
| if memory_bytes > unassigned_memory: | ||
| raise ValueError(f"You are trying to assign more memory to service {service_name} than is left available on host system and already assigned containers. Requested memory: {memory_bytes} Bytes. Left unassigned memory: {unassigned_memory} Bytes") | ||
| unassigned_memory -= memory_bytes | ||
|
|
||
| if 'deploy' in service and service['deploy'] is not None and (cpus := service['deploy'].get('resources', {}).get('limits', {}).get('cpus', None)) is not None: | ||
| del service['deploy']['resources']['limits']['cpus'] | ||
| service['cpus'] = cpus | ||
| REQUESTED_CPUS = float(service.get('cpus', 0)) | ||
| if REQUESTED_CPUS == 0: | ||
| # we do not want to auto enforce CPU limits. So we re-map the limit spec here to the host system for transparency and for comparing with other runs | ||
| service['cpus'] = SYSTEM_ASSIGNABLE_CPU_COUNT | ||
| elif REQUESTED_CPUS > SYSTEM_ASSIGNABLE_CPU_COUNT: | ||
| raise ValueError(f"You are trying to assign more cpus to service {service_name} than is available host system. Requested CPUs: {REQUESTED_CPUS}. Available CPUs: {SYSTEM_ASSIGNABLE_CPU_COUNT}") | ||
|
|
||
|
|
||
|
|
||
| service_count = len(to_be_assigned_services) | ||
| if service_count > 0: | ||
| memory_per_service = math.floor(unassigned_memory/service_count) | ||
| if memory_per_service < 1024**3: | ||
| self.__warnings.append('Auto-assigned memory for containers was less than 1 GB per container because no more memory was available to the host. If you feel that this is too low please set memory limits manually or upgrade to a bigger host.') | ||
| for service_name in to_be_assigned_services: | ||
| services[service_name]['mem_limit'] = memory_per_service | ||
|
|
||
|
|
||
| def _remove_docker_images(self): | ||
| print(TerminalColors.HEADER, '\nRemoving all temporary GMT images', TerminalColors.ENDC) | ||
|
|
||
|
|
@@ -1060,6 +1111,10 @@ def _setup_services(self): | |
| # This use case is when you have running containers on your host and want to benchmark some code running in them | ||
| services = self._usage_scenario.get('services', {}) | ||
|
|
||
| SYSTEM_ASSIGNABLE_CPU_COUNT = int(subprocess.check_output(['docker', 'info', '--format', '{{.NCPU}}'], encoding='UTF-8', errors='replace').strip()) -1 | ||
| if SYSTEM_ASSIGNABLE_CPU_COUNT <= 0: | ||
| raise RuntimeError(f"Cannot assign docker containers to any CPU as no CPUs are available to Docker. Available CPU count: {SYSTEM_ASSIGNABLE_CPU_COUNT}") | ||
ArneTR marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # Check if there are service dependencies defined with 'depends_on'. | ||
| # If so, change the order of the services accordingly. | ||
| services_ordered = self._order_services(services) | ||
|
|
@@ -1275,24 +1330,17 @@ def _setup_services(self): | |
| if 'pause-after-phase' in service: | ||
| self.__services_to_pause_phase[service['pause-after-phase']] = self.__services_to_pause_phase.get(service['pause-after-phase'], []) + [container_name] | ||
|
|
||
| # wildly the docker compose spec allows deploy to be None ... thus we need to check and cannot .get() | ||
| if 'deploy' in service and service['deploy'] is not None and (memory := service['deploy'].get('resources', {}).get('limits', {}).get('memory', None)): | ||
| docker_run_string.append('--memory') # value in bytes | ||
| docker_run_string.append(str(memory)) | ||
| print('Applying Memory Limit from deploy') | ||
| elif memory := service.get('mem_limit', None): # we only need to get resources or cpus. they must align anyway | ||
| docker_run_string.append('--memory') | ||
| docker_run_string.append(str(memory)) # value in bytes e.g. "10M" | ||
| print('Applying Memory Limit from services') | ||
|
|
||
| if 'deploy' in service and service['deploy'] is not None and (cpus := service['deploy'].get('resources', {}).get('limits', {}).get('cpus', None)): | ||
| docker_run_string.append('--cpus') # value in cores | ||
| docker_run_string.append(str(cpus)) | ||
| print('Applying CPU Limit from deploy') | ||
| elif cpus := service.get('cpus', None): # we only need to get resources or cpus. they must align anyway | ||
| docker_run_string.append('--cpus') | ||
| docker_run_string.append(str(cpus)) # value in (fractional) cores | ||
| print('Applying CPU Limit from services') | ||
| # apply cpuset but keep one core for GMT and metric providers free | ||
| # This cannot be configured via user as no knowledge of machine shall be required | ||
| docker_run_string.append('--cpuset-cpus') | ||
| docker_run_string.append(','.join(map(str, range(1,SYSTEM_ASSIGNABLE_CPU_COUNT+1)))) # range inclusive as we do not assign to 0 | ||
ArneTR marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there is bug. Based on my understanding, the If there are in total 4 cores, the variable
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you take into account the maybe unexpected range function behaviour in python? The right boundary is exclusive, not inclusive. Please follow up if I misunderstood the bug report |
||
|
|
||
| docker_run_string.append('--memory-swappiness=0') # GMT should never swap as it gives hard to interpret / non-linear performance results | ||
| docker_run_string.append('--oom-score-adj=1000') # containers will be killed first so host does not OOM | ||
| docker_run_string.append(f"--memory={service['mem_limit']}") | ||
| docker_run_string.append(f"--memory-swap={service['mem_limit']}") # effectively disable swap | ||
|
|
||
| docker_run_string.append(f"--cpus={service['cpus']}") | ||
|
|
||
| if 'healthcheck' in service: # must come last | ||
| if 'disable' in service['healthcheck'] and service['healthcheck']['disable'] is True: | ||
|
|
@@ -2378,6 +2426,7 @@ def run(self): | |
| self._register_machine_id() | ||
| self._import_metric_providers() | ||
| self._populate_image_names() | ||
| self._populate_cpu_and_memory_limits() | ||
| self._prepare_docker() | ||
| self._check_running_containers_before_start() | ||
| self._remove_docker_images() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| --- | ||
| name: Test Stress | ||
| author: Dan Mateas | ||
| description: test | ||
|
|
||
| services: | ||
| test-container: | ||
| type: container | ||
| image: gcb_stress | ||
| mem_limit: 100MB | ||
| cpus: 2 | ||
| build: | ||
| context: ../stress-application | ||
|
|
||
| flow: | ||
|
|
||
| - name: I am a hidden phase | ||
| container: test-container | ||
| hidden: true | ||
| commands: | ||
| - type: console | ||
| command: echo 1 | ||
|
|
||
| - name: Stress | ||
| container: test-container | ||
| commands: | ||
| - type: console | ||
| command: stress-ng -c 1 -t 1 -q | ||
| note: Starting Stress | ||
|
|
||
| - name: I am hidden too | ||
| container: test-container | ||
| hidden: true | ||
| commands: | ||
| - type: console | ||
| command: echo 1 | ||
| note: echo |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| --- | ||
| name: Testing SHM | ||
| author: Arne Tarara | ||
| description: Testing Too high limits | ||
|
|
||
| services: | ||
| test-container: | ||
| type: container | ||
| image: alpine | ||
| cpus: 400 | ||
|
|
||
|
|
||
| flow: | ||
| - name: Testing SHM | ||
| container: test-container | ||
| commands: | ||
| - type: console | ||
| command: 'echo "SHM size is: $(df -h /dev/shm)"' | ||
| shell: sh | ||
| log-stdout: True | ||
| log-stderr: True | ||
|
|
||
| - name: Testing SHM 2 | ||
| container: test-container-2 | ||
| commands: | ||
| - type: console | ||
| command: 'echo "SHM size is: $(df -h /dev/shm)"' | ||
| shell: sh | ||
| log-stdout: True | ||
| log-stderr: True |
Uh oh!
There was an error while loading. Please reload this page.