Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PTT version converter initial pass at ptt converter #600

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 208 additions & 0 deletions tools/profiling/python/ptt_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import pandas as pd
import sys
import os

# oringinally developed by Daniel Mishler in the fall of 2023.

# take a trace from v1, and make an "events" df matching v2's "events" fields
def tv2_events_from_tv1_trace(v1trace):
v1events = v1trace['events']
tv2_aei = {} # all event info
always_present = []

for col in v1events.columns:
if any(v1events[col].isnull()) == False:
always_present.append(col)

# could skip above loop for time and opt for defaults in a pinch
"""
always_present_default = ['node_id', 'stream_id', 'taskpool_id', 'type', 'begin', 'end', 'flags', 'id']
always_present = always_present_default
"""

tv2_aei['events'] = v1events[always_present]


other_columns = []
for item in v1events.columns:
if item not in always_present:
other_columns.append(item)
for i in range(len(v1trace['event_types'])):
sliced_df = v1events[v1events.type == i]

relevant_columns = other_columns.copy()
for col in other_columns:
if all(sliced_df[col].isnull()) == True: # Everything is null
relevant_columns.remove(col)

tv2_aei[f"event_infos_{i}"] = sliced_df[relevant_columns]

return tv2_aei

# take a trace from v2, and make an "events" df matching v1's "events"
def tv1_events_from_tv2_trace(v2trace):
events = v2trace['events']
new_cols = set()
for i in range(len(v2trace['event_types'])):
for col in v2trace[f'event_infos_{i}']:
new_cols.add(col)
for col in new_cols:
events[col] = None
for i in range(len(v2trace['event_types'])):
events.update(v2trace[f'event_infos_{i}'])
return events


# this function may not be needed on a large scale, but it was a utility
# used in development
def compare_entries(a, b):
if type(a) not in [pd.core.frame.DataFrame, pd.core.series.Series]:
print("CE ERROR: unknown type for a")
return False
if type(b) not in [pd.core.frame.DataFrame, pd.core.series.Series]:
print("CE ERROR: unknown type for b")
return False
if(len(a) != len(b)):
print("CE ERROR: lengths do not match")
return False
if type(a) is pd.core.frame.DataFrame and type(b) is pd.core.frame.DataFrame:
# try a silly converstion?
a = a.astype(str)
b = b.astype(str) # this is digusting that this works. Consider a better option?
# realign axes
try:
a = a[b.columns]
except KeyError:
b = b[a.columns]
comparison_obj = a.compare(b)
else:
comparison_obj = (a == b)
if(len(comparison_obj) == 0):
return True
if(type(comparison_obj) is pd.core.series.Series):
if comparison_obj.any() == False:
return False
else:
return True
if(type(comparison_obj) is pd.core.frame.DataFrame):
for key in comparison_obj:
if comparison_obj[key].any() == False:
return False
# else
return True

def sanity_check_traces(ta, tb):
ta_keys = ta.keys()
for key in ta_keys:
if key in tb.keys():
match = compare_entries(ta[key], tb[key])
if match:
print(f"key {key} matches")
else:
print(f"key {key} does NOT match")

def sanity_check_files(f1name, f2name):
f1trace = pd.HDFStore(f1name)
f2trace = pd.HDFStore(f2name)

sanity_check_traces(f1trace,f2trace)

f1trace.close()
f2trace.close()



def tv2_to_tv1(infile, outfile):
v2trace = pd.HDFStore(infile)
v1trace = pd.HDFStore(outfile)

for key in v2trace:
if "/events" == key or "event_infos" in key:
continue
if "/information" == key:
v2trace[key].to_hdf(outfile, key=key, mode='a')
continue
if "/nodes" == key:
continue
# else
v1trace.append(key, v2trace[key])


tv1events = tv1_events_from_tv2_trace(v2trace)


for col in tv1events:
tv1events[col] = tv1events[col].astype(str)

v1trace.append("events", tv1events)


v2trace.close()
v1trace.close()

def tv1_to_tv2(infile, outfile):
v1trace = pd.HDFStore(infile)

for key in v1trace:
if "/events" == key or "event_infos" in key:
continue
# else
v1trace[key].to_hdf(outfile, key=key, mode='a')


# now handle events
tv2_aei = tv2_events_from_tv1_trace(v1trace)
for key in tv2_aei:
tv2_aei[key].to_hdf(outfile, key=key, mode='a')

v1trace.close()

def help_message():
print(f"usage: <python> {sys.argv[0]} <version_option> <input_hdf5> <output_hdf5>")
print(f" version_options:")
print(f" 2t1 // converts from version 2 to version 1")
print(f" 1t2 // converts from version 1 to version 2")


if __name__ == "__main__":
if(len(sys.argv) < 4):
help_message()
exit()


version_option = sys.argv[1]
infile = sys.argv[2]
outfile = sys.argv[3]

if(os.path.isfile(outfile)):
print(f"`{outfile}` previously existed and will be overwritten.")
# overwrite the output file
os.remove(outfile)
# alternatively, add this in for a comfirmation message
"""
print(f"`{outfile}` is a file that already exists. Do you wish to overwrite it? (yes/no)")
uinput = input()
if uinput == "y" or uinput == "yes":
# overwrite the output file
os.remove(outfile)
else:
print("halting execution")
exit()
"""

if(version_option == "2t1"):
tv2_to_tv1(infile, outfile)
elif(version_option == "1t2"):
tv1_to_tv2(infile, outfile)
else:
print("error: could not understand <version_option>")
help_message()
exit()

"""
print("SANITY CHECK")
if (version_option == "2t1"):
sanity_check_files(infile, outfile)
elif (version_option == "1t2"):
sanity_check_files(infile, outfile)
"""
Loading