-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperformance_test_nmt.py
345 lines (269 loc) · 12.6 KB
/
performance_test_nmt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import sys
import json
import os
import subprocess
import time
from datetime import datetime
configurations = {
"Without NMT in build": {},
"With NMT": {},
"Java mode without NMT": {},
"Java mode with NMT": {}
}
measurements = {
"mean",
"max",
"p50",
"p90",
"p99",
"rss",
"startup"
}
BUILD_IMAGES = True
MODE = ""
ITERATIONS = 10
BENCHMARK = ""
IMAGE_NAME_ORIGINAL = "target/getting-started-1.0.0-SNAPSHOT-runner"
IMAGE_NAME_NMT = IMAGE_NAME_ORIGINAL+"_nmt"
IMAGE_NAME_NO_NMT = IMAGE_NAME_ORIGINAL+"_no_nmt"
JAVA_HOME = ""
GRAALVM_HOME = ""
HYPERFOIL_HOME = ""
CWD = os.getcwd()
RUN_COMMANDS = []
def check_endpoint(endpoint):
# Execute the command and check the result
try:
subprocess.run("curl -sf " + endpoint + " > /dev/null", shell=True, check=True)
return True # Return True if the command succeeds
except subprocess.CalledProcessError:
return False # Return False if the command fails
def set_up_hyperfoil():
# Start controller
subprocess.run(HYPERFOIL_HOME + "/bin/standalone.sh > output_dump" + datetime.now().isoformat() + ".txt &", shell=True, check=True)
# Wait for hyperfoil controller app to start up
# Busy wait rather than wait some arbitrary amount of time and risk waiting too long
print("-- Waiting for hyperfoil to start")
while True:
if check_endpoint("http://0.0.0.0:8090/openapi"):
break
print("-- Done waiting for hyperfoil start-up")
# Upload benchmark
subprocess.run("curl -X POST --data-binary @\"benchmark.hf.yaml\" -H \"Content-type: text/vnd.yaml\" http://0.0.0.0:8090/benchmark", shell=True, check=True)
def shutdown_hyperfoil():
try:
subprocess.run("sudo fuser -k 8090/tcp", shell=True, check=True)
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
print("-- Failed to shutdown hyperfoil")
def shutdown_quarkus():
try:
subprocess.run("sudo fuser -k 8080/tcp", shell=True, check=True)
except:
print("-- Failed to shutdown quarkus")
def wait_for_quarkus():
print("waiting for quarkus")
while True:
if check_endpoint("http://0.0.0.0:8080/hello/greeting/test_input"):
print("quarkus is accessible")
return
def enableTurboBoost(enable):
bit = 1
if enable:
bit = 0
try:
subprocess.run("echo " + str(bit) + " | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo", shell=True, check=True)
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
def run_hyperfoil_benchmark(config):
# start the benchmark
name = ""
try:
# TODO remove embedded python ported from bash script
process = subprocess.run("curl \"http://0.0.0.0:8090/benchmark/jfr-hyperfoil/start?templateParam=ENDPOINT=" + BENCHMARK + "\" | python3 -c \"import sys, json; print(json.load(sys.stdin)['id'])\"", shell=True, check=True, capture_output=True, text=True)
name = str(process.stdout).strip("\n")
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
# sleep until test is done
time.sleep(7)
# Get and parse results
try:
process = subprocess.run("curl \"http://0.0.0.0:8090/run/" + name + "/stats/all/json\"", shell=True, check=True, capture_output=True, text=True)
response_json = json.loads(str(process.stdout))
# record in us
config["mean"].append(response_json["stats"][0]["total"]["summary"]["meanResponseTime"]/1000)
config["max"].append(response_json["stats"][0]["total"]["summary"]["maxResponseTime"]/1000)
config["p50"].append(response_json["stats"][0]["total"]["summary"]["percentileResponseTime"]["50.0"]/1000)
config["p90"].append(response_json["stats"][0]["total"]["summary"]["percentileResponseTime"]["90.0"]/1000)
config["p99"].append(response_json["stats"][0]["total"]["summary"]["percentileResponseTime"]["99.0"]/1000)
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
# Does a single run of the test, on a single configuration, collecting measurements along the way.
def run_test(config, config_name):
print("Starting test for configuration: " + config_name)
shutdown_hyperfoil()
shutdown_quarkus()
set_up_hyperfoil()
# Clear caches (Greatly affects startup time)
try:
subprocess.run(
"sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", shell=True, check=True)
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
start_time = time.time()
# Start quarkus
try:
subprocess.run(
"sudo " + config["run_command"] + " &", shell=True, check=True)
wait_for_quarkus()
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e}")
startup = time.time() - start_time
process = subprocess.run(
"sudo lsof -t -i:8080", shell=True, check=True, capture_output=True, text=True)
process = subprocess.run("sudo ps -o rss= -p "+str(process.stdout), shell=True, check=True, capture_output=True, text=True)
config["rss"].append(int(process.stdout.strip("\n")))
config["startup"].append(startup)
run_hyperfoil_benchmark(config)
shutdown_quarkus()
shutdown_hyperfoil()
def set_global_variables():
global RUN_COMMANDS, JAVA_HOME, GRAALVM_HOME, HYPERFOIL_HOME, BUILD_IMAGES, MODE, BENCHMARK
# Print individual environment variables.
if "JAVA_HOME" in os.environ:
JAVA_HOME = os.environ["JAVA_HOME"]
if not os.path.exists(JAVA_HOME):
print("JAVA_HOME not set to valid path")
sys.exit()
else:
print("GRAALVM_HOME not set")
sys.exit()
if "GRAALVM_HOME" in os.environ:
GRAALVM_HOME = os.environ["GRAALVM_HOME"]
if not os.path.exists(GRAALVM_HOME):
print("GRAALVM_HOME not set to valid path")
sys.exit()
else:
print("GRAALVM_HOME not set")
sys.exit()
if "HYPERFOIL_HOME" in os.environ:
HYPERFOIL_HOME = os.environ["HYPERFOIL_HOME"]
if not os.path.exists(HYPERFOIL_HOME):
print("HYPERFOIL_HOME not set to valid path")
sys.exit()
else:
print("HYPERFOIL_HOME not set")
sys.exit()
print("Starting test")
print("JAVA_HOME:", JAVA_HOME)
print("GRAALVM_HOME:", GRAALVM_HOME)
print("HYPERFOIL_HOME:", HYPERFOIL_HOME)
RUN_COMMANDS = [
"./" + IMAGE_NAME_NO_NMT,
"./" + IMAGE_NAME_NMT + " -XX:+FlightRecorder -XX:StartFlightRecording=settings=" +
CWD + "/quarkus-demo.jfc,duration=4s,filename=performance_test.jfr",
JAVA_HOME + "/bin/java -XX:NativeMemoryTracking=off -jar ./target/quarkus-app/quarkus-run.jar",
JAVA_HOME + "/bin/java -XX:NativeMemoryTracking=summary -XX:+FlightRecorder -XX:StartFlightRecording=settings=" + CWD +
"/quarkus-demo.jfc,duration=4s,filename=performance_test_JVM.jfr -jar ./target/quarkus-app/quarkus-run.jar"
]
# Set mode to stress endpoint by default
if len(sys.argv) > 1:
MODE = sys.argv[1]
else:
MODE = "work"
if len(sys.argv) > 2 and sys.argv[2] == "false":
BUILD_IMAGES = False
if MODE == "work":
BENCHMARK = "work"
elif MODE == "regular":
BENCHMARK = "regular"
else:
print("invalid mode specified")
sys.exit()
def get_image_sizes():
process = subprocess.run("stat -c%s " + IMAGE_NAME_NMT, shell=True, check=True, capture_output=True, text=True)
file_size_nmt = process.stdout.strip("\n")
process = subprocess.run("stat -c%s " + IMAGE_NAME_NO_NMT, shell=True, check=True, capture_output=True, text=True)
file_size_no_nmt = process.stdout.strip("\n")
return file_size_nmt, file_size_no_nmt
def write_results(file_sizes):
# print(configurations)
# Prepare the data structure
diff_percentages = {"ni": {}, "jdk": {}}
for diff_percentage in diff_percentages:
for measurement in measurements:
diff_percentages[diff_percentage][measurement] = []
diff_percentages[diff_percentage][measurement+"_average"] = 0
for i in range(ITERATIONS):
for measurement in measurements:
result = (configurations["With NMT"][measurement][i] - configurations["Without NMT in build"][measurement][i]) / configurations["Without NMT in build"][measurement][i]
diff_percentages["ni"][measurement].append(result)
diff_percentages["ni"][measurement + "_average"] += result / ITERATIONS
result = (configurations["Java mode with NMT"][measurement][i] - configurations["Java mode without NMT"][measurement][i]) / configurations["Java mode without NMT"][measurement][i]
diff_percentages["jdk"][measurement].append(result)
diff_percentages["jdk"][measurement + "_average"] += result / ITERATIONS
for config in configurations:
configurations[config][measurement + "_average"] += configurations[config][measurement][i] / ITERATIONS
# print(diff_percentages)
current_datetime = datetime.now().isoformat()
with open("report_"+current_datetime+".txt", 'a') as file:
file.write("MODE: " + MODE+"\n")
file.write("ITERATIONS: " + str(ITERATIONS)+"\n")
file.write("JAVA_HOME: " + JAVA_HOME+"\n")
file.write("GRAALVM_HOME: " + GRAALVM_HOME+"\n")
file.write("HYPERFOIL_HOME: " + HYPERFOIL_HOME+"\n\n")
file.write("Image size with NMT: " + file_sizes[0]+"\n")
file.write("Image size without NMT: " + file_sizes[1]+"\n")
file.write("\n------------------------------------------------\n")
file.write("Average Performance Difference:\n")
file.write("These values are averages calculated using the results in the 'Performance Difference' section. \n")
for measurement in measurements:
file.write(measurement+" (NI): " +
str(diff_percentages["ni"][measurement+"_average"])+"\n")
file.write(measurement + " (JAVA): " +
str(diff_percentages["jdk"][measurement+"_average"])+"\n")
file.write("\n------------------------------------------------\n")
file.write("Average Measurments:\n")
file.write("These values are averages calculated using the results in the 'Raw Measurements' section. \n")
for measurement in measurements:
for config in configurations:
file.write(measurement+" (" + config + "): " + str(configurations[config][measurement + "_average"])+"\n")
file.write("\n------------------------------------------------\n")
file.write("Performance Difference:\n")
file.write("These values are calculated pair-wise for each iteration. They are percentages calcluated using (With NMT - Without NMT) / Without NMT. \n")
for measurement in measurements:
file.write(measurement+" (NI): " + str(diff_percentages["ni"][measurement])+"\n")
file.write(measurement+" (JAVA): " + str(diff_percentages["jdk"][measurement])+"\n")
file.write("\n------------------------------------------------\n")
file.write("Raw Measurements:\n")
file.write("These are individual measurements for each iteration. rss is in kB, startup time is s, all others are in us. \n")
for config in configurations:
file.write("\n"+config+":\n")
for measurement in measurements:
file.write(measurement+": " + str(configurations[config][measurement])+"\n")
def main():
set_global_variables()
# set up the data dictionaries
count = 0
for config in configurations:
configurations[config]["run_command"] = RUN_COMMANDS[count] # ensure run commands and configs match up
count += 1
for measurement in measurements:
configurations[config][measurement] = []
configurations[config][measurement + "_average"] = 0
file_sizes = get_image_sizes()
enableTurboBoost(False)
# Do the test multiple times.
for i in range(ITERATIONS):
'''
Test the full set of configurations as a batch. This way we interleave the runs.
It makes more sense to calculate the deltas this way because the diffs wer're comparing are closer temporally (so are more likely to be affected by the same system load etc.).
'''
for config in configurations:
run_test(configurations[config], config)
enableTurboBoost(True)
write_results(file_sizes)
# Check if the script is being run directly
if __name__ == "__main__":
# Call the main function
main()