From 7d330578e783442d46d2914fabb15b92cfb1be9f Mon Sep 17 00:00:00 2001 From: DannyMish Date: Mon, 20 Nov 2023 14:04:28 -0500 Subject: [PATCH] initial pass at ptt converter --- tools/profiling/python/ptt_converter.py | 208 ++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 tools/profiling/python/ptt_converter.py diff --git a/tools/profiling/python/ptt_converter.py b/tools/profiling/python/ptt_converter.py new file mode 100644 index 000000000..c4c44098f --- /dev/null +++ b/tools/profiling/python/ptt_converter.py @@ -0,0 +1,208 @@ +import pandas as pd +import sys +import os + +# oringinally developed by Daniel Mishler in the fall of 2023. + +# take a trace from v1, and make an "events" df matching v2's "events" fields +def tv2_events_from_tv1_trace(v1trace): + v1events = v1trace['events'] + tv2_aei = {} # all event info + always_present = [] + + for col in v1events.columns: + if any(v1events[col].isnull()) == False: + always_present.append(col) + + # could skip above loop for time and opt for defaults in a pinch + """ + always_present_default = ['node_id', 'stream_id', 'taskpool_id', 'type', 'begin', 'end', 'flags', 'id'] + always_present = always_present_default + """ + + tv2_aei['events'] = v1events[always_present] + + + other_columns = [] + for item in v1events.columns: + if item not in always_present: + other_columns.append(item) + for i in range(len(v1trace['event_types'])): + sliced_df = v1events[v1events.type == i] + + relevant_columns = other_columns.copy() + for col in other_columns: + if all(sliced_df[col].isnull()) == True: # Everything is null + relevant_columns.remove(col) + + tv2_aei[f"event_infos_{i}"] = sliced_df[relevant_columns] + + return tv2_aei + +# take a trace from v2, and make an "events" df matching v1's "events" +def tv1_events_from_tv2_trace(v2trace): + events = v2trace['events'] + new_cols = set() + for i in range(len(v2trace['event_types'])): + for col in v2trace[f'event_infos_{i}']: + new_cols.add(col) + for col in new_cols: + events[col] = None + for i in range(len(v2trace['event_types'])): + events.update(v2trace[f'event_infos_{i}']) + return events + + +# this function may not be needed on a large scale, but it was a utility +# used in development +def compare_entries(a, b): + if type(a) not in [pd.core.frame.DataFrame, pd.core.series.Series]: + print("CE ERROR: unknown type for a") + return False + if type(b) not in [pd.core.frame.DataFrame, pd.core.series.Series]: + print("CE ERROR: unknown type for b") + return False + if(len(a) != len(b)): + print("CE ERROR: lengths do not match") + return False + if type(a) is pd.core.frame.DataFrame and type(b) is pd.core.frame.DataFrame: + # try a silly converstion? + a = a.astype(str) + b = b.astype(str) # this is digusting that this works. Consider a better option? + # realign axes + try: + a = a[b.columns] + except KeyError: + b = b[a.columns] + comparison_obj = a.compare(b) + else: + comparison_obj = (a == b) + if(len(comparison_obj) == 0): + return True + if(type(comparison_obj) is pd.core.series.Series): + if comparison_obj.any() == False: + return False + else: + return True + if(type(comparison_obj) is pd.core.frame.DataFrame): + for key in comparison_obj: + if comparison_obj[key].any() == False: + return False + # else + return True + +def sanity_check_traces(ta, tb): + ta_keys = ta.keys() + for key in ta_keys: + if key in tb.keys(): + match = compare_entries(ta[key], tb[key]) + if match: + print(f"key {key} matches") + else: + print(f"key {key} does NOT match") + +def sanity_check_files(f1name, f2name): + f1trace = pd.HDFStore(f1name) + f2trace = pd.HDFStore(f2name) + + sanity_check_traces(f1trace,f2trace) + + f1trace.close() + f2trace.close() + + + +def tv2_to_tv1(infile, outfile): + v2trace = pd.HDFStore(infile) + v1trace = pd.HDFStore(outfile) + + for key in v2trace: + if "/events" == key or "event_infos" in key: + continue + if "/information" == key: + v2trace[key].to_hdf(outfile, key=key, mode='a') + continue + if "/nodes" == key: + continue + # else + v1trace.append(key, v2trace[key]) + + + tv1events = tv1_events_from_tv2_trace(v2trace) + + + for col in tv1events: + tv1events[col] = tv1events[col].astype(str) + + v1trace.append("events", tv1events) + + + v2trace.close() + v1trace.close() + +def tv1_to_tv2(infile, outfile): + v1trace = pd.HDFStore(infile) + + for key in v1trace: + if "/events" == key or "event_infos" in key: + continue + # else + v1trace[key].to_hdf(outfile, key=key, mode='a') + + + # now handle events + tv2_aei = tv2_events_from_tv1_trace(v1trace) + for key in tv2_aei: + tv2_aei[key].to_hdf(outfile, key=key, mode='a') + + v1trace.close() + +def help_message(): + print(f"usage: {sys.argv[0]} ") + print(f" version_options:") + print(f" 2t1 // converts from version 2 to version 1") + print(f" 1t2 // converts from version 1 to version 2") + + +if __name__ == "__main__": + if(len(sys.argv) < 4): + help_message() + exit() + + + version_option = sys.argv[1] + infile = sys.argv[2] + outfile = sys.argv[3] + + if(os.path.isfile(outfile)): + print(f"`{outfile}` previously existed and will be overwritten.") + # overwrite the output file + os.remove(outfile) + # alternatively, add this in for a comfirmation message + """ + print(f"`{outfile}` is a file that already exists. Do you wish to overwrite it? (yes/no)") + uinput = input() + if uinput == "y" or uinput == "yes": + # overwrite the output file + os.remove(outfile) + else: + print("halting execution") + exit() + """ + + if(version_option == "2t1"): + tv2_to_tv1(infile, outfile) + elif(version_option == "1t2"): + tv1_to_tv2(infile, outfile) + else: + print("error: could not understand ") + help_message() + exit() + + """ + print("SANITY CHECK") + if (version_option == "2t1"): + sanity_check_files(infile, outfile) + elif (version_option == "1t2"): + sanity_check_files(infile, outfile) + """ \ No newline at end of file