Skip to content

Commit

Permalink
format fio files
Browse files Browse the repository at this point in the history
  • Loading branch information
gargnitingoogle committed Aug 5, 2024
1 parent b2edd59 commit 5263acc
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 112 deletions.
224 changes: 137 additions & 87 deletions perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# limitations under the License.

import json, os, pprint, subprocess

import sys
sys.path.append("../")

sys.path.append("../")
from utils.utils import get_memory, get_cpu, unix_to_timestamp, is_mash_installed

LOCAL_LOGS_LOCATION = "../../bin/fio-logs"
Expand All @@ -40,97 +40,147 @@
}

if __name__ == "__main__":
logLocations = [("gke-fio-64k-1m", "64K"), ("gke-fio-128k-1m", "128K"), ("gke-fio-1mb-1m", "1M"), ("gke-fio-100mb-50k", "100M"), ("gke-fio-200gb-1", "200G")]

logLocations = [
("gke-fio-64k-1m", "64K"),
("gke-fio-128k-1m", "128K"),
("gke-fio-1mb-1m", "1M"),
("gke-fio-100mb-50k", "100M"),
("gke-fio-200gb-1", "200G"),
]

try:
os.makedirs(LOCAL_LOGS_LOCATION)
except FileExistsError:
pass

for folder, fileSize in logLocations:
try:
os.makedirs(LOCAL_LOGS_LOCATION)
os.makedirs(LOCAL_LOGS_LOCATION + "/" + fileSize)
except FileExistsError:
pass

for folder, fileSize in logLocations:
try:
os.makedirs(LOCAL_LOGS_LOCATION+"/"+fileSize)
except FileExistsError:
pass
print(f"Download FIO output from the folder {folder}...")
result = subprocess.run(["gsutil", "-m", "cp", "-r", f"gs://{folder}/fio-output", LOCAL_LOGS_LOCATION+"/"+fileSize], capture_output=False, text=True)
if result.returncode < 0:
print(f"failed to fetch FIO output, error: {result.stderr}")

'''
pass
print(f"Download FIO output from the folder {folder}...")
result = subprocess.run(
[
"gsutil",
"-m",
"cp",
"-r",
f"gs://{folder}/fio-output",
LOCAL_LOGS_LOCATION + "/" + fileSize,
],
capture_output=False,
text=True,
)
if result.returncode < 0:
print(f"failed to fetch FIO output, error: {result.stderr}")

"""
"{read_type}-{mean_file_size}":
"mean_file_size": str
"read_type": str
"records":
"local-ssd": [record1, record2, record3, record4]
"gcsfuse-file-cache": [record1, record2, record3, record4]
"gcsfuse-no-file-cache": [record1, record2, record3, record4]
'''
output = {}
mash_installed = is_mash_installed()
if not mash_installed:
print("Mash is not installed, will skip parsing CPU and memory usage.")

for root, _, files in os.walk(LOCAL_LOGS_LOCATION):
for file in files:
per_epoch_output = root + f"/{file}"
root_split = root.split("/")
mean_file_size = root_split[-4]
scenario = root_split[-2]
read_type = root_split[-1]
epoch = int(file.split(".")[0][-1])

with open(per_epoch_output, 'r') as f:
per_epoch_output_data = json.load(f)

key = "-".join([read_type, mean_file_size])
if key not in output:
output[key] = {
"mean_file_size": mean_file_size,
"read_type": read_type,
"records": {
"local-ssd": [],
"gcsfuse-file-cache": [],
"gcsfuse-no-file-cache": [],
},
}

r = record.copy()
bs = per_epoch_output_data["jobs"][0]["job options"]["bs"]
r["pod_name"] = f"fio-tester-{read_type}-{mean_file_size.lower()}-{bs.lower()}-{scenario}"
r["epoch"] = epoch
r["scenario"] = scenario
r["duration"] = int(per_epoch_output_data["jobs"][0]["read"]["runtime"] / 1000)
r["IOPS"] = int(per_epoch_output_data["jobs"][0]["read"]["iops"])
r["throughput_mb_per_second"] = int(per_epoch_output_data["jobs"][0]["read"]["bw_bytes"] / (1024 ** 2))
r["start"] = unix_to_timestamp(per_epoch_output_data["jobs"][0]["job_start"])
r["end"] = unix_to_timestamp(per_epoch_output_data["timestamp_ms"])
if r["scenario"] != "local-ssd" and mash_installed:
r["lowest_memory"], r["highest_memory"] = get_memory(r["pod_name"], r["start"], r["end"])
r["lowest_cpu"], r["highest_cpu"] = get_cpu(r["pod_name"], r["start"], r["end"])

pprint.pprint(r)

while len(output[key]["records"][scenario]) < epoch:
output[key]["records"][scenario].append({})

output[key]["records"][scenario][epoch-1] = r

output_order = ["read-64K", "read-128K", "read-1M", "read-100M", "read-200G", "randread-1M", "randread-100M", "randread-200G"]
scenario_order = ["local-ssd", "gcsfuse-no-file-cache", "gcsfuse-file-cache"]

output_file = open("./output.csv", "a")
output_file.write("File Size,Read Type,Scenario,Epoch,Duration (s),Throughput (MB/s),IOPS,Throughput over Local SSD (%),GCSFuse Lowest Memory (MB),GCSFuse Highest Memory (MB),GCSFuse Lowest CPU (core),GCSFuse Highest CPU (core),Pod,Start,End\n")

for key in output_order:
if key not in output:
continue
record_set = output[key]

for scenario in scenario_order:
for i in range(len(record_set["records"][scenario])):
r = record_set["records"][scenario][i]
r["throughput_over_local_ssd"] = round(r["throughput_mb_per_second"] / record_set["records"]["local-ssd"][i]["throughput_mb_per_second"] * 100, 2)
output_file.write(f"{record_set['mean_file_size']},{record_set['read_type']},{scenario},{r['epoch']},{r['duration']},{r['throughput_mb_per_second']},{r['IOPS']},{r['throughput_over_local_ssd']},{r['lowest_memory']},{r['highest_memory']},{r['lowest_cpu']},{r['highest_cpu']},{r['pod_name']},{r['start']},{r['end']}\n")

output_file.close()
"""
output = {}
mash_installed = is_mash_installed()
if not mash_installed:
print("Mash is not installed, will skip parsing CPU and memory usage.")

for root, _, files in os.walk(LOCAL_LOGS_LOCATION):
for file in files:
per_epoch_output = root + f"/{file}"
root_split = root.split("/")
mean_file_size = root_split[-4]
scenario = root_split[-2]
read_type = root_split[-1]
epoch = int(file.split(".")[0][-1])

with open(per_epoch_output, "r") as f:
per_epoch_output_data = json.load(f)

key = "-".join([read_type, mean_file_size])
if key not in output:
output[key] = {
"mean_file_size": mean_file_size,
"read_type": read_type,
"records": {
"local-ssd": [],
"gcsfuse-file-cache": [],
"gcsfuse-no-file-cache": [],
},
}

r = record.copy()
bs = per_epoch_output_data["jobs"][0]["job options"]["bs"]
r["pod_name"] = (
f"fio-tester-{read_type}-{mean_file_size.lower()}-{bs.lower()}-{scenario}"
)
r["epoch"] = epoch
r["scenario"] = scenario
r["duration"] = int(
per_epoch_output_data["jobs"][0]["read"]["runtime"] / 1000
)
r["IOPS"] = int(per_epoch_output_data["jobs"][0]["read"]["iops"])
r["throughput_mb_per_second"] = int(
per_epoch_output_data["jobs"][0]["read"]["bw_bytes"] / (1024**2)
)
r["start"] = unix_to_timestamp(
per_epoch_output_data["jobs"][0]["job_start"]
)
r["end"] = unix_to_timestamp(per_epoch_output_data["timestamp_ms"])
if r["scenario"] != "local-ssd" and mash_installed:
r["lowest_memory"], r["highest_memory"] = get_memory(
r["pod_name"], r["start"], r["end"]
)
r["lowest_cpu"], r["highest_cpu"] = get_cpu(
r["pod_name"], r["start"], r["end"]
)

pprint.pprint(r)

while len(output[key]["records"][scenario]) < epoch:
output[key]["records"][scenario].append({})

output[key]["records"][scenario][epoch - 1] = r

output_order = [
"read-64K",
"read-128K",
"read-1M",
"read-100M",
"read-200G",
"randread-1M",
"randread-100M",
"randread-200G",
]
scenario_order = ["local-ssd", "gcsfuse-no-file-cache", "gcsfuse-file-cache"]

output_file = open("./output.csv", "a")
output_file.write(
"File Size,Read Type,Scenario,Epoch,Duration (s),Throughput"
" (MB/s),IOPS,Throughput over Local SSD (%),GCSFuse Lowest Memory"
" (MB),GCSFuse Highest Memory (MB),GCSFuse Lowest CPU (core),GCSFuse"
" Highest CPU (core),Pod,Start,End\n"
)

for key in output_order:
if key not in output:
continue
record_set = output[key]

for scenario in scenario_order:
for i in range(len(record_set["records"][scenario])):
r = record_set["records"][scenario][i]
r["throughput_over_local_ssd"] = round(
r["throughput_mb_per_second"]
/ record_set["records"]["local-ssd"][i]["throughput_mb_per_second"]
* 100,
2,
)
output_file.write(
f"{record_set['mean_file_size']},{record_set['read_type']},{scenario},{r['epoch']},{r['duration']},{r['throughput_mb_per_second']},{r['IOPS']},{r['throughput_over_local_ssd']},{r['lowest_memory']},{r['highest_memory']},{r['lowest_cpu']},{r['highest_cpu']},{r['pod_name']},{r['start']},{r['end']}\n"
)

output_file.close()
57 changes: 32 additions & 25 deletions perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,44 @@

import subprocess


def run_command(command: str):
result = subprocess.run(command.split(" "), capture_output=True, text=True)
print(result.stdout)
print(result.stderr)
result = subprocess.run(command.split(" "), capture_output=True, text=True)
print(result.stdout)
print(result.stderr)


bucketName_fileSize_blockSize = [
("gke-fio-64k-1m", "64K", "64K"),
("gke-fio-64k-1m", "64K", "64K"),
("gke-fio-128k-1m", "128K", "128K"),
("gke-fio-1mb-1m", "1M", "256K"),
("gke-fio-100mb-50k", "100M", "1M"),
("gke-fio-200gb-1", "200G", "1M")
]
("gke-fio-200gb-1", "200G", "1M"),
]

scenarios = ["gcsfuse-file-cache", "gcsfuse-no-file-cache", "local-ssd"]

for bucketName, fileSize, blockSize in bucketName_fileSize_blockSize:
for readType in ["read", "randread"]:
for scenario in scenarios:
if readType == "randread" and fileSize in ["64K", "128K"]:
continue

commands = [f"helm install fio-loading-test-{fileSize.lower()}-{readType}-{scenario} loading-test",
f"--set bucketName={bucketName}",
f"--set scenario={scenario}",
f"--set fio.readType={readType}",
f"--set fio.fileSize={fileSize}",
f"--set fio.blockSize={blockSize}"]

if fileSize == "100M":
commands.append("--set fio.filesPerThread=1000")

helm_command = " ".join(commands)

run_command(helm_command)
for bucketName, fileSize, blockSize in bucketName_fileSize_blockSize:
for readType in ["read", "randread"]:
for scenario in scenarios:
if readType == "randread" and fileSize in ["64K", "128K"]:
continue

commands = [
(
"helm install"
f" fio-loading-test-{fileSize.lower()}-{readType}-{scenario} loading-test"
),
f"--set bucketName={bucketName}",
f"--set scenario={scenario}",
f"--set fio.readType={readType}",
f"--set fio.fileSize={fileSize}",
f"--set fio.blockSize={blockSize}",
]

if fileSize == "100M":
commands.append("--set fio.filesPerThread=1000")

helm_command = " ".join(commands)

run_command(helm_command)

0 comments on commit 5263acc

Please sign in to comment.