-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheztimem.py
executable file
·397 lines (337 loc) · 16.1 KB
/
eztimem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
#!/usr/bin/env python3
import sys
import psutil
import time
import GPUtil
import numpy as np
import matplotlib.pyplot as plt
from threading import Thread
from datetime import datetime
from typing import Dict, List
from prettytable import PrettyTable
def monitor_resources(pid: int, monitor_interval: float, log: Dict[str, List]):
"""
Monitors and logs various system and process-related resources for a given process over time.
It tracks CPU usage, memory usage, disk I/O, context switches, and if available, GPU values.
Data is collected at specified intervals.
Parameters:
- pid (int): Process ID of the process to monitor.
- monitor_interval (float): Time in seconds between data collection points.
- log (Dict[str, List]): Dictionary where collected data will be appended.
"""
try:
process = psutil.Process(pid)
start_time = time.time()
# Initialize CPU usage percentage tracking
process.cpu_percent(interval=None)
total_memory = psutil.virtual_memory().total
cpu_count = psutil.cpu_count()
while process.is_running():
# Gather resource usage data
with process.oneshot():
mem_info = process.memory_info()
cpu_usage = process.cpu_percent(interval=None) / cpu_count
io_counters = process.io_counters()
memory_percent = (mem_info.rss / total_memory) * 100
context_switches = process.num_ctx_switches()
process_priority = process.nice()
# Gather GPU data
gpus = GPUtil.getGPUs()
gpu_load = [gpu.load for gpu in gpus]
# GPU memory will be kept in bytes for consistency
gpu_memory_used = [gpu.memoryUsed*(1024**2) for gpu in gpus]
gpu_memory_total = [gpu.memoryTotal*(1024**2) for gpu in gpus]
gpu_memory_util = [(gpu.memoryUsed / gpu.memoryTotal) * 100
if gpu.memoryTotal else 0 for gpu in gpus]
# Appending new values
log["memory_rss"].append(mem_info.rss)
log["memory_vms"].append(mem_info.vms)
log["cpu_usage"].append(cpu_usage)
log["io_read_bytes"].append(io_counters.read_bytes)
log["io_write_bytes"].append(io_counters.write_bytes)
log["memory_usage_percent"].append(memory_percent)
log["threads"].append(process.num_threads())
log["open_files"].append(len(process.open_files()))
log["voluntary_ctxt_switches"].append(context_switches.voluntary)
log["involuntary_ctxt_switches"].append(context_switches.involuntary)
log["process_priority"].append(process_priority)
log["gpu_load"].append(gpu_load)
log["gpu_memory_used"].append(gpu_memory_used)
log["gpu_memory_util"].append(gpu_memory_util)
# Update track of time
current_time = time.time() - start_time
log["times"].append(current_time)
# Wait for interval
time.sleep(monitor_interval)
except Exception as e:
pass
def init_empty_log() -> Dict[str, List]:
"""
Initialize and return an empty log dictionary.
Returns:
Dict[str, List]: A dictionary with keys representing different system metrics,
each associated with an empty list to collect data.
"""
return {
"times": [],
"memory_rss": [],
"memory_vms": [],
"cpu_usage": [],
"io_read_bytes": [],
"io_write_bytes": [],
"memory_usage_percent": [],
"threads": [],
"open_files": [],
"voluntary_ctxt_switches": [],
"involuntary_ctxt_switches": [],
"process_priority": [],
"gpu_load": [],
"gpu_memory_used": [],
"gpu_memory_util": []
}
def transpose_list(matrix):
"""
Transpose a given 2D list (matrix).
This function takes a two-dimensional list (matrix) and returns a new matrix which is the transpose
of the input. The transpose of a matrix is formed by turning rows into columns and vice versa.
Args:
matrix (List[List[Any]]): A 2D list representing the original matrix to be transposed.
Returns:
List[List[Any]]: A new 2D list representing the transposed matrix.
"""
return [list(row) for row in zip(*matrix)]
def memory_to_str(memory_value):
"""
Converts a memory size from bytes to a human-readable string using
megabytes (MB) or gigabytes (GB) as units.
Parameters:
- memory_value (int or float): Memory size in bytes.
Returns:
- str: A string representing the memory size in MB or GB with two decimal places.
"""
# Check if the memory value is at least 1 gigabyte
if memory_value >= 1000**3: # 1000^3 bytes = around 1 gigabyte
# Convert from bytes to gigabytes and format to two decimal places
return f"{memory_value / (1024**3):.2f} GB"
else:
# Convert from bytes to megabytes and format to two decimal places
return f"{memory_value / (1024**2):.2f} MB"
def display_statistics(log: Dict[str, List]):
"""
Displays a summary table and printouts of reports considering the resources used
by the command (application) that was monitored.
Parameters:
- log (Dict[str, List]): A dictionary containing lists of system process statistics.
"""
# Initialize a pretty table for displaying process statistics
table = PrettyTable()
table.align = "l" # Left align text in cells
table.field_names = ["Process Report", "Average", "Std. Dev.", "Max."] # Set column headers
# Calculate total disk read and write by subtracting the first from the last recorded value
read_bytes = log["io_read_bytes"][-1] - log["io_read_bytes"][0]
write_bytes = log["io_write_bytes"][-1] - log["io_write_bytes"][0]
# Print total disk I/O
print(" Total Disk Read:", memory_to_str(read_bytes))
print(" Total Disk Write:", memory_to_str(write_bytes))
# Print total context switches
print(" Total Voluntary Ctxt Switch:", log["voluntary_ctxt_switches"][-1])
print(" Total Involuntary Ctxt Switch:", log["involuntary_ctxt_switches"][-1])
# Mode identifiers for formatting output
mode_default = 0
mode_percent = 1
mode_bytes = 2
# Dictionary mapping keys to labels and modes for table formatting
table_keys = {
"cpu_usage": ["CPU Usage", mode_percent],
"memory_usage_percent": ["Memory (%)", mode_percent],
"memory_rss": ["Memory (RSS)", mode_bytes],
"memory_vms": ["Memory (VMS)", mode_bytes],
"threads": ["Threads Used", mode_default],
"open_files": ["Open Files", mode_default],
"process_priority": ["Process Priority", mode_default],
}
# Process each metric and add rows to the table
for key, info in table_keys.items():
label, print_mode = info
values = log[key]
if values: # Only process non-empty lists
if isinstance(values[0], list): # Handle nested lists (e.g., per-thread values)
values = [item for sublist in values for item in sublist]
avg = np.mean(values) # Compute average
std = np.std(values) # Compute standard deviation
max_value = np.max(values) # Compute maximum value
# Add row to table based on the specified print mode
if print_mode == mode_default:
table.add_row([label, f"{avg:.2f}", f"{std:.2f}", f"{max_value:.2f}"])
elif print_mode == mode_percent:
table.add_row([label, f"{avg:.2f} %", f"{std:.2f} %", f"{max_value:.2f} %"])
elif print_mode == mode_bytes:
table.add_row([label, memory_to_str(avg), memory_to_str(std), memory_to_str(max_value)])
# Print the formatted table
print(table)
def display_statistics_gpu(log: Dict[str, List]):
"""
Displays a summary table of GPU statistics including average, standard deviation, and maximum values
for GPU load, memory usage, and memory utilization.
Data is reported for all GPUs.
Parameters:
- log (Dict[str, List]): A dictionary containing lists of GPU statistics. Each key in the dictionary
('gpu_load', 'gpu_memory_used', 'gpu_memory_util') must contain a list of lists where each sub-list
represents the values for a respective GPU across different times.
"""
# Initialize a pretty table for displaying GPU statistics
table_gpu = PrettyTable()
table_gpu.align = "l" # Left align text in cells
table_gpu.field_names = ["GPU Report", "Average", "Std. Dev.", "Max."] # Set column headers
# Number of GPUs based on the length of first list in gpu_load
num_gpus = len(log["gpu_load"][0])
# Transposing lists for easier statistics computation
log["gpu_load"] = transpose_list(log["gpu_load"])
log["gpu_memory_used"] = transpose_list(log["gpu_memory_used"])
log["gpu_memory_util"] = transpose_list(log["gpu_memory_util"])
# Mode identifiers for how to print the statistics
mode_default = 0
mode_percent = 1
mode_bytes = 2
# Dictionary to map log keys to labels and modes
table_gpu_keys = {
"gpu_load": ["Usage", mode_percent],
"gpu_memory_used": ["Memory", mode_bytes],
"gpu_memory_util": ["Memory (%)", mode_percent],
}
# Iterate through each GPU and each type of statistic
for gpu_count in range(num_gpus):
for key, info in table_gpu_keys.items():
label, print_mode = info
values = log[key][gpu_count] # Extract data for current GPU
avg = np.mean(values) # Compute average
std = np.std(values) # Compute standard deviation
max_value = np.max(values) # Compute maximum value
# Format label depending on the number of GPUs
if num_gpus <= 1:
label = "GPU {}".format(label)
else:
label = "GPU {} {}".format(gpu_count, label)
# Add row to table based on the mode specified
if print_mode == mode_default:
table_gpu.add_row([label, f"{avg:.2f}", f"{std:.2f}", f"{max_value:.2f}"])
elif print_mode == mode_percent:
table_gpu.add_row([label, f"{avg:.2f} %", f"{std:.2f} %", f"{max_value:.2f} %"])
elif print_mode == mode_bytes:
table_gpu.add_row([label, memory_to_str(avg), memory_to_str(std), memory_to_str(max_value)])
# Print the table
print(table_gpu)
def plot_resources(log: Dict[str, List]):
"""
Plots the CPU usage, number of threads, RSS memory, VMS memory, and number of open files over time
and saves the plot to a file named 'eztimem_plots.png', with legends placed outside the graph on the right side.
Parameters:
- log (Dict[str, List]): Dictionary containing the logged data for plotting.
"""
# Convert times from seconds to minutes for a more readable format
times_in_minutes = [t / 60 for t in log['times']]
# Create a figure and a set of subplots
fig, axs = plt.subplots(5, 1, figsize=(10, 24)) # Adjusted subplot count and figure size
# Get correct length
correct_len = min(len(log['cpu_usage']), len(times_in_minutes))
# Plot CPU usage
axs[0].plot(times_in_minutes, log['cpu_usage'][:correct_len], label='CPU Usage (%)', color='blue')
axs[0].set_title('CPU Usage Over Time')
axs[0].set_xlabel('Time (minutes)')
axs[0].set_ylabel('CPU Usage (%)')
axs[0].grid(True)
axs[0].legend(loc='upper left', bbox_to_anchor=(1, 1))
# Plot Number of Threads
axs[1].plot(times_in_minutes, log['threads'][:correct_len], label='Number of Threads', color='purple')
axs[1].set_title('Number of Threads Over Time')
axs[1].set_xlabel('Time (minutes)')
axs[1].set_ylabel('Number of Threads')
axs[1].grid(True)
axs[1].legend(loc='upper left', bbox_to_anchor=(1, 1))
# Plot RSS Memory
rss_memory_in_mb = [m / (1024 ** 2) for m in log['memory_rss']] # Convert from bytes to MB
axs[2].plot(times_in_minutes, rss_memory_in_mb[:correct_len], label='Memory RSS (MB)', color='red')
axs[2].set_title('RSS Memory Usage Over Time')
axs[2].set_xlabel('Time (minutes)')
axs[2].set_ylabel('Memory RSS (MB)')
axs[2].grid(True)
axs[2].legend(loc='upper left', bbox_to_anchor=(1, 1))
# Plot VMS Memory
vms_memory_in_mb = [m / (1024 ** 2) for m in log['memory_vms']] # Convert from bytes to MB
axs[3].plot(times_in_minutes, vms_memory_in_mb[:correct_len], label='Memory VMS (MB)', color='green')
axs[3].set_title('VMS Memory Usage Over Time')
axs[3].set_xlabel('Time (minutes)')
axs[3].set_ylabel('Memory VMS (MB)')
axs[3].grid(True)
axs[3].legend(loc='upper left', bbox_to_anchor=(1, 1))
# Plot Number of Open Files
axs[4].plot(times_in_minutes, log['open_files'][:correct_len], label='Open Files', color='orange')
axs[4].set_title('Open Files Over Time')
axs[4].set_xlabel('Time (minutes)')
axs[4].set_ylabel('Open Files')
axs[4].grid(True)
axs[4].legend(loc='upper left', bbox_to_anchor=(1, 1))
# Tight layout before saving to handle overlaps
plt.tight_layout()
# Save the figure
export_file = 'eztimem_plots.png'
plt.savefig(export_file, bbox_inches='tight') # Adjust saving to include external legend
plt.close(fig) # Close the plot figure to free memory
# Print to user
print("----------------------------------------")
print(" Plots saved in {}".format(export_file))
print("----------------------------------------")
if __name__ == "__main__":
# Check if command was provided as input
if len(sys.argv) <= 1:
print("Call me like this: python {} <command>".format(sys.argv[0]))
sys.exit(1)
# Parse command
command = " ".join(sys.argv[1:])
# Dictionary to store the report data
log = init_empty_log()
# Interval to monitor process (in seconds)
monitor_interval = 0.1
# Run process and monitor it
date_start = datetime.now() # get start datetime
start_time = time.perf_counter() # start timer
process = psutil.Popen(command.split()) # start process
monitor_thread = Thread(target=monitor_resources, args=(process.pid,
monitor_interval,
log)) # monitor process
monitor_thread.start()
process.wait()
monitor_thread.join()
end_time = time.perf_counter() # end timer
date_end = datetime.now() # get end datetime
elapsed_time = end_time - start_time # calculate elapsed time
# Check if command actually run
if (len(log["cpu_usage"]) == 1) and log["cpu_usage"][0] == 0:
print("No monitoring was done... Aborting.")
sys.exit(1)
# Report measured values
print("\n==========================================================")
print("= EZTIMEM REPORT =")
print("==========================================================")
# Elapsed time
print(f" Command: {command}")
print(f" Start Time: {date_start}")
print(f" Finish Time: {date_end}")
print(f" Total Time: {elapsed_time:.2f} seconds")
# Print tables
display_statistics(log)
display_statistics_gpu(log)
# Plot some graphs
plot_resources(log)
# To help the user
print(" For your information:")
print("* EZTIMEM monitoring checks are made every {} seconds.".format(monitor_interval))
print("* CPU usage considers all cores.")
print("* VMS Memory = total amount of virtual memory used by a process,")
print(" including all code, data, and shared libraries plus pages that have")
print(" been swapped out.")
print("* RSS Memory = portion of memory occupied by a process that is held")
print(" in RAM, excluding swapped out pages.")
print("* Process Priority = default is zero.")
print("* All reports consider only the resources utilized by the process,")
print(" except for the GPU, which is reported for the entire system usage.")