Skip to content

Commit ddd97b1

Browse files
connersaeliConner Saeli
and
Conner Saeli
authored
Prometheus Integration (#268)
Changes to incorporate Prometheus as a data source for summarization Co-authored-by: Conner Saeli <saelic01@mail.buffalostate.edu>
1 parent 325645e commit ddd97b1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2418
-606
lines changed

.circleci/config.yml

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,64 @@
11
version: 2.1
22
jobs:
33
build:
4+
docker:
5+
- image: rockylinux:8
6+
steps:
7+
- checkout
8+
- run:
9+
name: Install System Dependencies
10+
command: ./tests/ci/setup.sh build
11+
- run:
12+
name: Build Software Package
13+
command: ./tests/ci/build.sh
14+
- persist_to_workspace:
15+
root: .
16+
paths:
17+
- dist/*
18+
19+
test:
420
parameters:
21+
test-mode:
22+
type: string
523
install-type:
624
type: string
725
docker:
8-
# The first image entry here will be used as the image for the parent container.
926
- image: tools-ext-01.ccr.xdmod.org/xdmod-job_performance-10.0.0:rockylinux8-0.1
1027
environment:
1128
TERM: xterm
29+
TERMINFO: /bin/bash
1230
COMPOSER_ALLOW_SUPERUSER: 1
1331
XDMOD_REALMS: 'jobs,storage,cloud'
1432
XDMOD_IS_CORE: yes
1533
XDMOD_INSTALL_DIR: /xdmod
16-
XDMOD_TEST_MODE: << parameters.install-type >>
34+
XDMOD_TEST_MODE: << parameters.test-mode >>
35+
SUPREMM_INSTALL_TYPE: << parameters.install-type >>
1736
steps:
1837
- checkout
19-
- run:
20-
name: Install System Dependencies
21-
command: ./tests/ci/setup.sh
2238
- run:
2339
name: Create Test Result Directories
2440
command: |
2541
mkdir -p shippable/testresults
2642
mkdir -p shippable/codecoverage
43+
- attach_workspace:
44+
at: .
45+
- run:
46+
name: Install Docker Compose
47+
command: |
48+
dnf install -y dnf-utils
49+
dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
50+
dnf install -y docker-ce docker-ce-cli docker-compose-plugin
51+
52+
- setup_remote_docker
53+
- run:
54+
name: Build services
55+
command: docker compose -f ./tests/ci/srv/services.yml build
56+
- run:
57+
name: Start services
58+
command: docker compose -f ./tests/ci/srv/services.yml up -d
2759
- run:
2860
name: Run Bootstrap
29-
command: ./tests/integration_tests/bootstrap.sh
61+
command: ./tests/ci/test/bootstrap.sh $SUPREMM_INSTALL_TYPE
3062
- run:
3163
name: Run Integration Tests
3264
command: ./tests/integration_tests/integration_test.bash
@@ -42,24 +74,15 @@ jobs:
4274
- run:
4375
name: Ingest Jobs
4476
command: ingest_jobscripts.py -d
45-
- run:
46-
name: Remove Currently Installed SUPREMM
47-
command: yum remove -y supremm
48-
- run:
49-
name: Install SUPREMM
50-
command: python3 setup.py install --user --prefix=
5177
- run:
5278
name: Pylint
5379
command: pylint-3 --errors-only supremm
5480
- run:
5581
name: Pytest
5682
command: pytest-3 --junitxml=shippable/testresults/testreport.xml --cov=supremm --cov-report xml:shippable/codecoverage/coverage.xml
5783
- run:
58-
name: Summarize Jobs
59-
command: /root/.local/bin/summarize_jobs.py -h > /dev/null
60-
- run:
61-
name: Index Archives
62-
command: /root/.local/bin/indexarchives.py -h > /dev/null
84+
name: Remove Currently Installed SUPREMM
85+
command: dnf remove -y supremm
6386
- store_test_results:
6487
path: shippable/testresults
6588
- store_artifacts:
@@ -70,7 +93,11 @@ jobs:
7093
workflows:
7194
full-build:
7295
jobs:
73-
- build:
96+
- build
97+
- test:
7498
matrix:
7599
parameters:
76-
install-type: ["fresh_install", "upgrade"]
100+
test-mode: ["fresh_install", "upgrade"]
101+
install-type: ["rpm", "wheel", "src"]
102+
requires:
103+
- build

config/prometheus/mapping.json

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
{
2+
"common": {
3+
"params": ["host"],
4+
"defaults": {"environment": "prod"}
5+
},
6+
"metrics": {
7+
"cgroup.memory.usage": {
8+
"name": "cgroup_memory_used_bytes",
9+
"params": ["cgroup"],
10+
"groupby": "cgroup"
11+
},
12+
"cgroup.memory.limit": {
13+
"name": "cgroup_memory_total_bytes",
14+
"params": ["cgroup"],
15+
"groupby": "cgroup"
16+
},
17+
"disk.dev.read": {
18+
"name": "node_disk_reads_completed_total",
19+
"groupby": "device"
20+
},
21+
"disk.dev.read_bytes": {
22+
"name": "node_disk_read_bytes_total",
23+
"scaling": "0.0009765625",
24+
"groupby": "device"
25+
},
26+
"disk.dev.write": {
27+
"name": "node_disk_writes_completed_total",
28+
"groupby": "device"
29+
},
30+
"disk.dev.write_bytes": {
31+
"name": "node_disk_written_bytes_total",
32+
"scaling": "0.0009765625",
33+
"groupby": "device"
34+
},
35+
"infiniband.port.switch.in.bytes": {
36+
"name": "node_infiniband_port_data_received_bytes_total",
37+
"groupby": "port",
38+
"out_fmt": ["{}:{}", "device", "port"]
39+
},
40+
"infiniband.port.switch.in.packets": {
41+
"name": "node_infiniband_port_packets_received_total",
42+
"groupby": "port",
43+
"out_fmt": ["{}:{}", "device", "port"]
44+
},
45+
"infiniband.port.switch.out.bytes": {
46+
"name": "node_infiniband_port_data_transmitted_bytes_total",
47+
"groupby": "port",
48+
"out_fmt": ["{}:{}", "device", "port"]
49+
},
50+
"infiniband.port.switch.out.packets": {
51+
"name": "node_infiniband_port_packets_transmitted_total",
52+
"groupby": "port",
53+
"out_fmt": ["{}:{}", "device", "port"]
54+
},
55+
"ipmi.dcmi.power": {
56+
"name": "ipmi_dcmi_power_consumption_watts",
57+
"groupby": "host"
58+
},
59+
"kernel.all.load": {
60+
"name": "node_load1",
61+
"groupby": "host"
62+
},
63+
"kernel.percpu.cpu.user": {
64+
"name": "node_cpu_seconds_total",
65+
"defaults": {"mode" : "user"},
66+
"scaling": "1000",
67+
"groupby": "cpu",
68+
"out_fmt": ["cpu{}", "cpu"]
69+
},
70+
"kernel.percpu.cpu.idle": {
71+
"name": "node_cpu_seconds_total",
72+
"defaults": {"mode" : "idle"},
73+
"scaling": "1000",
74+
"groupby": "cpu",
75+
"out_fmt": ["cpu{}", "cpu"]
76+
},
77+
"kernel.percpu.cpu.nice": {
78+
"name": "node_cpu_seconds_total",
79+
"defaults": {"mode" : "nice"},
80+
"scaling": "1000",
81+
"groupby": "cpu",
82+
"out_fmt": ["cpu{}", "cpu"]
83+
},
84+
"kernel.percpu.cpu.sys": {
85+
"name": "node_cpu_seconds_total",
86+
"defaults": {"mode" : "system"},
87+
"scaling": "1000",
88+
"groupby": "cpu",
89+
"out_fmt": ["cpu{}", "cpu"]
90+
},
91+
"kernel.percpu.cpu.wait.total": {
92+
"name": "node_cpu_seconds_total",
93+
"defaults": {"mode" : "iowait"},
94+
"scaling": "1000",
95+
"groupby": "cpu",
96+
"out_fmt": ["cpu{}", "cpu"]
97+
},
98+
"kernel.percpu.cpu.irq.hard": {
99+
"name": "node_cpu_seconds_total",
100+
"defaults": {"mode" : "irq"},
101+
"scaling": "1000",
102+
"groupby": "cpu",
103+
"out_fmt": ["cpu{}", "cpu"]
104+
},
105+
"kernel.percpu.cpu.irq.soft": {
106+
"name": "node_cpu_seconds_total",
107+
"defaults": {"mode" : "softirq"},
108+
"scaling": "1000",
109+
"groupby": "cpu",
110+
"out_fmt": ["cpu{}", "cpu"]
111+
},
112+
"mem.numa.util.filePages": {
113+
"name": "node_memory_numa_FilePages",
114+
"groupby": "node"
115+
},
116+
"mem.numa.util.slab": {
117+
"name": "node_memory_numa_Slab",
118+
"groupby": "node"
119+
},
120+
"mem.numa.util.used": {
121+
"name": "node_memory_numa_MemUsed",
122+
"groupby": "node"
123+
},
124+
"mem.freemem": {
125+
"name": "node_memory_MemFree_bytes",
126+
"scaling": "0.0009765625",
127+
"groupby": "host"
128+
},
129+
"mem.physmem": {
130+
"name": "node_memory_MemTotal_bytes",
131+
"scaling": "0.0009765625",
132+
"groupby": "host"
133+
},
134+
"network.interface.in.bytes": {
135+
"name": "node_network_receive_bytes_total",
136+
"groupby": "device"
137+
},
138+
"network.interface.out.bytes": {
139+
"name": "node_network_transmit_bytes_total",
140+
"groupby": "device"
141+
},
142+
"nvidia.gpuactive": {
143+
"name": "DCGM_FI_DEV_GPU_UTIL",
144+
"groupby": "gpu",
145+
"out_fmt": ["gpu{}", "gpu"]
146+
},
147+
"nvidia.memused": {
148+
"name": "DCGM_FI_DEV_FB_USED",
149+
"groupby": "gpu",
150+
"out_fmt": ["gpu{}", "gpu"]
151+
},
152+
"nvidia.powerused": {
153+
"name": "DCGM_FI_DEV_POWER_USAGE",
154+
"scaling": "1000",
155+
"groupby": "gpu",
156+
"out_fmt": ["gpu{}", "gpu"]
157+
},
158+
"prom:cgroup_cpu_info": {
159+
"name": "cgroup_cpu_info",
160+
"params": ["cgroup"],
161+
"groupby": "cpus"
162+
},
163+
"prom:cgroup_process_exec_count": {
164+
"name": "cgroup_process_exec_count",
165+
"params": ["cgroup"],
166+
"groupby": "exec"
167+
}
168+
}
169+
}

setup.cfg

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
[bdist_rpm]
2-
release = 1.0-beta3%%{?dist}
2+
release = 1.0-rc.3%%{?dist}
33
build_requires = python36-devel, pcp-libs-devel >= 5.3, pcp-libs-devel < 5.4
4-
requires = python3, python3-pymongo, python3-numpy, python3-scipy, python3-PyMySQL, python3-pcp >= 5.3, python3-pcp < 5.4, pcp-libs >= 5.3, pcp-libs < 5.4, python3-Cython, python3-pytz
4+
requires = python3, python3-pymongo, python3-numpy, python3-scipy, python3-PyMySQL, python3-pcp >= 5.3, python3-pcp < 5.4, pcp-libs >= 5.3, pcp-libs < 5.4, python3-Cython, python3-pytz, python3-requests
55
install_script = .rpm_install_script.txt
6+
7+
[bdist_wheel]
8+
python-tag = py36

setup.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@
3636
packages=find_packages(where='src'),
3737
package_data={
3838
'supremm': ['assets/modw_supremm.sql', 'assets/mongo_setup.js', '*.pxd', '*.pyx'],
39-
'supremm.pcpcinterface': ['*.pxd', '*.pyx']
39+
'supremm.datasource.pcp.pcpcinterface': ['*.pxd', '*.pyx']
4040
},
4141
data_files=[
42-
(confpath, ['config/config.json']),
42+
(confpath, ['config/config.json', 'config/prometheus/mapping.json']),
4343
('share/supremm/templates/slurm', ['config/templates/slurm/slurm-epilog', 'config/templates/slurm/slurm-prolog']),
4444
('share/supremm/templates/hotproc', ['config/templates/hotproc/hotproc.conf']),
4545
('share/supremm/templates/pmlogger', ['config/templates/pmlogger/control', 'config/templates/pmlogger/pmlogger-supremm.config'])
@@ -52,7 +52,7 @@
5252
'gen-pmlogger-control.py = supremm.gen_pmlogger_control:main',
5353
'summarize_jobs.py = supremm.summarize_jobs:main',
5454
'summarize_mpi.py = supremm.summarize_mpi:main',
55-
'indexarchives.py = supremm.indexarchives:runindexing',
55+
'indexarchives.py = supremm.datasource.pcp.indexarchives:runindexing',
5656
'account.py = supremm.account:runingest',
5757
'supremmconf.py = supremm.supremmconf:main',
5858
'supremm-setup = supremm.supremm_setup:main',
@@ -68,10 +68,11 @@
6868
'Cython',
6969
'scipy',
7070
'pymongo',
71-
'pytz'
71+
'pytz',
72+
'requests'
7273
],
7374
ext_modules=cythonize([
74-
Extension("supremm.pcpcinterface.pcpcinterface", ["src/supremm/pcpcinterface/pcpcinterface.pyx"], libraries=["pcp"], include_dirs=[numpy.get_include()])
75+
Extension("supremm.datasource.pcp.pcpcinterface.pcpcinterface", ["src/supremm/datasource/pcp/pcpcinterface/pcpcinterface.pyx"], libraries=["pcp"], include_dirs=[numpy.get_include()])
7576
])
7677
)
7778

src/supremm/Job.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def __init__(self, job_pk_id, job_id, acct):
7777
self.job_id = job_id
7878
self.acct = acct
7979
self._nodecount = acct['nodes']
80+
8081
self._start_datetime = datetimeconvert(acct['start_time'])
8182
self._end_datetime = datetimeconvert(acct['end_time'])
8283

@@ -130,6 +131,11 @@ def rawarchives(self):
130131
if len(nodedata.rawarchives) > 0:
131132
yield nodename, nodedata.rawarchives
132133

134+
def nodenames(self):
135+
""" iterator for all nodenames that the job ran on """
136+
for nodename in self._nodes.keys():
137+
yield nodename
138+
133139
def nodearchives(self):
134140
""" iterator for the combined archives for the nodes in the job """
135141
for nodename, nodedata in self._nodes.items():

src/supremm/config.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pkg_resources
99
import logging
1010

11+
1112
def iscomment(line):
1213
""" check is line is a c++ style comment """
1314
if re.search(r"^\s*//", line):
@@ -47,7 +48,7 @@ def __str__(self):
4748
return json.dumps(self._config, indent=4)
4849

4950
@staticmethod
50-
def autodetectconfpath():
51+
def autodetectconfpath(filename="config.json"):
5152
""" search known paths for the configuration directory
5253
List of paths support the three typical install locations
5354
1) Environment variable SUPREMM_CONFIG_DIR
@@ -63,7 +64,7 @@ def autodetectconfpath():
6364
]
6465

6566
for path in searchpaths:
66-
if os.path.exists(os.path.join(path, "config.json")):
67+
if os.path.exists(os.path.join(path, filename)):
6768
return os.path.abspath(path)
6869

6970
return None

0 commit comments

Comments
 (0)