Skip to content

Commit

Permalink
v2024.07.19
Browse files Browse the repository at this point in the history
  • Loading branch information
Beercow committed Jul 19, 2024
1 parent 925e5b5 commit 2fb5195
Show file tree
Hide file tree
Showing 10 changed files with 227 additions and 120 deletions.
Binary file modified OneDriveExplorer/Images/splashv.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 1 addition & 2 deletions OneDriveExplorer/OneDriveExplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
)

__author__ = "Brian Maloney"
__version__ = "2024.05.20"
__version__ = "2024.07.19"
__email__ = "bmmaloney97@gmail.com"
rbin = []
DATParser = dat_parser.DATParser()
Expand Down Expand Up @@ -89,7 +89,6 @@ def output():
if ((args.csv or args.html) and args.json) or (not args.csv and not args.html):
if not args.json:
args.json = '.'
# print_json(df, rbin_df, name, args.pretty, args.json)
print_json(cache, name, args.pretty, args.json)

try:
Expand Down
191 changes: 124 additions & 67 deletions OneDriveExplorer/OneDriveExplorer_GUI.py

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion OneDriveExplorer/ode/helpers/mft.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def directoryRecurse(directoryObject, parentPath, user, filedata=False):

else:
continue
# log.warning(f'This went wrong, {entryObject.info.name.name} {f_type}')

except IOError as e:
log.error(e)
Expand Down
4 changes: 3 additions & 1 deletion OneDriveExplorer/ode/parsers/csv_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def parse_csv(filename):

file = open(filename.name, 'r', encoding='utf-8')
columns_to_drop = ['parentResourceId', 'resourceId', 'inRecycleBin', 'volumeId', 'fileId', 'DeleteTimeStamp', 'notificationTime', 'hash']
columns_to_drop_2 = ['MountPoint', 'Path', 'fileName', 'graphMetadataJSON', 'spoCompositeID', 'createdBy', 'modifiedBy', 'filePolicies', 'fileExtension', 'lastWriteCount']

dtypes = {'Type': 'object',
'scopeID': 'object',
Expand Down Expand Up @@ -73,7 +74,7 @@ def parse_csv(filename):
df_scope = df.loc[df['Type'] == 'Scope',
['Type', 'scopeID', 'siteID', 'webID', 'listID',
'tenantID', 'webURL', 'remotePath', 'spoPermissions',
'libraryType']]
'shortcutVolumeID', 'shortcutItemIndex', 'libraryType']]
columns_to_fill = df_scope.columns.difference(['libraryType'])
df_scope[columns_to_fill] = df_scope[columns_to_fill].fillna('')
scopeID = df_scope['scopeID'].tolist()
Expand Down Expand Up @@ -106,6 +107,7 @@ def parse_csv(filename):

df = df.astype(object)
df = df.where(pd.notna(df), None)
df.drop(columns=columns_to_drop_2, inplace=True)

except Exception as e:
print(e)
Expand Down
24 changes: 16 additions & 8 deletions OneDriveExplorer/ode/parsers/dat.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ def __init__(self):
'localSyncTokenData',
'localCobaltHashAlgorithm'
]
self.int_to_date = ['lastChange',
'serverLastChange',
'mediaDateTaken'
]
self.split_str = ['fileName',
'folderName'
]
Expand All @@ -100,8 +96,6 @@ def __init__(self):
'syncTokenData',
'syncTokenData_size',
'unknown7',
'shortcutVolumeID',
'shortcutItemIndex',
'sourceResourceID'
]

Expand Down Expand Up @@ -149,7 +143,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
if header.syncTokenData_size != 0:
account = 'Personal'
csvwriter = csv.writer(temp_scope, escapechar='\\')
csvwriter.writerow(['scopeID', 'siteID', 'webID', 'listID', 'libraryType', 'spoPermissions'])
csvwriter.writerow(['scopeID', 'siteID', 'webID', 'listID', 'libraryType', 'spoPermissions', 'shortcutVolumeID', 'shortcutItemIndex'])
self.scope_header = True
syncTokenData = urllib.parse.unquote(header.syncTokenData[:int(header.syncTokenData_size)].decode('utf-8'))
syncDict = dict(item.split("=") for item in syncTokenData.split(";"))
Expand Down Expand Up @@ -386,6 +380,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
del block._values[key]
except Exception:
continue
block._values.update([('shortcutVolumeID', ''), ('shortcutItemIndex', '')])

elif ff == '0a':
data_type = 'Scope'
Expand All @@ -397,7 +392,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
continue
block._values.update([('siteID', b''), ('webID', b'')])
block._values.move_to_end('listID', last=True)
block._values.update([('libraryType', b''), ('spoPermissions', '')])
block._values.update([('libraryType', b''), ('spoPermissions', ''), ('shortcutVolumeID', ''), ('shortcutItemIndex', '')])

elif ff == '0b':
data_type = 'Scope'
Expand All @@ -408,6 +403,8 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
except Exception:
continue
block._values.update([('siteID', b''), ('webID', b''), ('listID', b''), ('libraryType', b''), ('spoPermissions', '')])
block._values.move_to_end('shortcutVolumeID', last=True)
block._values.move_to_end('shortcutItemIndex', last=True)

elif ff == '0c':
data_type = 'Scope'
Expand All @@ -417,6 +414,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
del block._values[key]
except Exception:
continue
block._values.update([('shortcutVolumeID', ''), ('shortcutItemIndex', '')])

else:
block = self.datstruct.DAT_BLOCK(f.read(chunk))
Expand Down Expand Up @@ -481,13 +479,23 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
temp_files.seek(0)
temp_folders.seek(0)

convert = {'shortcutVolumeID': 'Int64',
'shortcutItemIndex': 'Int64'
}

df_scope = pd.read_csv(temp_scope)
temp_scope.close()
df_scope.insert(0, 'Type', 'Scope')
df_scope.insert(5, 'tenantID', '')
df_scope.insert(6, 'webURL', '')
df_scope.insert(7, 'remotePath', '')
df_scope = df_scope.astype(object)
df_scope = df_scope.astype(convert)
df_scope['shortcutVolumeID'].fillna(0, inplace=True)
df_scope['shortcutItemIndex'].fillna(0, inplace=True)

df_scope['shortcutVolumeID'] = df_scope['shortcutVolumeID'].apply(lambda x: '{:08x}'.format(x) if pd.notna(x) else '')
df_scope['shortcutVolumeID'] = df_scope['shortcutVolumeID'].apply(lambda x: '{}{}{}{}-{}{}{}{}'.format(*x.upper()) if x else '')
df_scope['spoPermissions'].replace('', np.nan, inplace=True)
df_scope['spoPermissions'] = df_scope['spoPermissions'].fillna(0).astype('int')
df_scope['spoPermissions'] = df_scope['spoPermissions'].apply(lambda x: permissions(x))
Expand Down
28 changes: 13 additions & 15 deletions OneDriveExplorer/ode/parsers/odl.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,34 +220,32 @@ def decrypt(cipher_text):
global dkey_list
global utf_type

cipher_text_orig = cipher_text

if not dkey_list:
return cipher_text_orig
return ''
if len(cipher_text) < 22:
return cipher_text_orig # invalid or it was not encrypted!
return '' # invalid or it was not encrypted!
# add proper base64 padding
remainder = len(cipher_text) % 4
if remainder == 1:
return cipher_text_orig # invalid b64 or it was not encrypted!
return '' # invalid b64 or it was not encrypted!
elif remainder in (2, 3):
cipher_text += "=" * (4 - remainder)
try:
cipher_text = cipher_text.replace('_', '/').replace('-', '+')
cipher_text = base64.b64decode(cipher_text)
except Exception:
return cipher_text_orig
return ''

if len(cipher_text) % 16 != 0:
return cipher_text_orig # invalid b64 or it was not encrypted!
return '' # invalid b64 or it was not encrypted!

for key in dkey_list:
try:
cipher = AES.new(key, AES.MODE_CBC, iv=b'\0'*16)
raw = cipher.decrypt(cipher_text)
except ValueError as ex:
# log.error(f'Exception while decrypting data {str(ex)}')
return cipher_text_orig
return ''
try:
plain_text = unpad(raw, 16)
except Exception as ex: # possible fix to change key
Expand All @@ -257,7 +255,7 @@ def decrypt(cipher_text):
plain_text = plain_text.decode(utf_type)
except ValueError as ex:
# print(f"Error decoding {utf_type}", str(ex))
return cipher_text_orig
return ''
return plain_text


Expand Down Expand Up @@ -287,6 +285,7 @@ def read_obfuscation_map(obfuscation_map_path, map):
repeated_items_found = False
encoding = guess_encoding(obfuscation_map_path)
with open(obfuscation_map_path, 'r', encoding=encoding) as f:
log.info(f"Building map from {f.name}")
for line in f.readlines():
line = line.rstrip('\n')
terms = line.split('\t')
Expand Down Expand Up @@ -400,7 +399,7 @@ def process_odl(filename, map):
if header.signature == b'EBFGONED': # Odl header
pass
else:
log.warning(f'{basename} wrong header! Did not find EBFGONED')
log.error(f'{basename} wrong header! Did not find EBFGONED')
return pd.DataFrame()
signature = f.read(8)
# Now either we have the gzip header here or the CDEF header (compressed or uncompressed handles both)
Expand All @@ -417,7 +416,7 @@ def process_odl(filename, map):
f = io.BytesIO(file_data)
signature = f.read(8)
if signature != b'\xCC\xDD\xEE\xFF\0\0\0\0': # CDEF header
log.warning(f'{basename} wrong header! Did not find 0xCCDDEEFF')
log.error(f'{basename} wrong header! Did not find 0xCCDDEEFF')
return pd.DataFrame()
else:
f.seek(-8, 1)
Expand Down Expand Up @@ -460,7 +459,7 @@ def process_odl(filename, map):
else:
log.error(f'Unknown odl_version = {header.odl_version}')
if data_block.signature != 0xffeeddcc:
log.warning(f'{basename} wrong data_block signature! Did not find 0xCCDDEEFF')
log.warning(f'Unable to parse {basename} completely. Did not find 0xCCDDEEFF')
return pd.DataFrame.from_records(odl_rows)
timestamp = ReadUnixMsTime(data_block.timestamp)
odl['Timestamp'] = timestamp
Expand All @@ -473,8 +472,7 @@ def process_odl(filename, map):
params_len = (data_block.data_len - data.code_file_name_len - data.code_function_name_len - 12)
f.seek(- params_len, io.SEEK_CUR)
except Exception as e:
log.warning(f'Unable to parse {basename}. Something went wrong! {type(e).__name__}')
# template = "An exception of type {0} occurred. Arguments:\n{1!r}"
log.warning(f'Unable to parse {basename} completely. {type(e).__name__}')
return pd.DataFrame.from_records(odl_rows)

if params_len:
Expand Down Expand Up @@ -506,7 +504,7 @@ def process_odl(filename, map):
params = ', '.join(params)
description = ''.join([v for (k, v) in cparser.consts.items() if k == f"{data.code_file_name.decode('utf8').lower().split('.')[0]}_{data.flags}_{data.code_function_name.decode('utf8').split('::')[-1].replace('~', '_').replace(' ()', '_').lower()}_des"])
except EOFError:
log.error(f"EOFError while parsing {data.code_file_name.decode('utf8').lower().split('.')[0]}_{data.flags}_{data.code_function_name.decode('utf8').split('::')[-1].replace('~', '_').replace(' ()', '_').lower()}")
log.warning(f"EOFError while parsing {data.code_file_name.decode('utf8').lower().split('.')[0]}_{data.flags}_{data.code_function_name.decode('utf8').split('::')[-1].replace('~', '_').replace(' ()', '_').lower()}")
f.seek(- params_len, 1)
params = extract_strings(f.read(params_len).decode('utf8', 'ignore'), map)
except AttributeError:
Expand Down
21 changes: 18 additions & 3 deletions OneDriveExplorer/ode/parsers/onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def find_parent(self, x, id_name_dict, parent_dict):

return self.find_parent(value, id_name_dict, parent_dict) + "\\\\" + str(id_name_dict.get(value))

# Generate scopeID list instead of passing
def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_path, rbin_df, account=False, reghive=False, recbin=False, gui=False, pb=False, value_label=False):
if os.path.isdir(file_path):
directory = file_path
Expand Down Expand Up @@ -167,6 +168,7 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p

df = df.astype(convert)
df['volumeID'].fillna(0, inplace=True)
df['itemIndex'].fillna(0, inplace=True)

df['volumeID'] = df['volumeID'].apply(lambda x: '{:08x}'.format(x) if pd.notna(x) else '')
df['volumeID'] = df['volumeID'].apply(lambda x: '{}{}{}{}-{}{}{}{}'.format(*x.upper()) if x else '')
Expand All @@ -176,14 +178,25 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
dcache = {}
is_del = []

# Need to look into this
if not df_GraphMetadata_Records.empty:
df_GraphMetadata_Records.set_index('resourceID', inplace=True)

column_len = len(df.columns)

for row in df.sort_values(
by=['Level', 'parentResourceID', 'Type', 'FileSort', 'FolderSort', 'libraryType'],
ascending=[False, False, False, True, False, False]).to_dict('records'):
if row['Type'] == 'File':
file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'HydrationTime', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'size', 'localHashDigest', 'sharedItem', 'Media')}
if column_len == 32:
file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'size', 'localHashDigest', 'sharedItem', 'Media')}

if column_len == 33:
file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'HydrationTime', 'size', 'localHashDigest', 'sharedItem', 'Media')}

if column_len == 36:
file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'lastHydrationType', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'firstHydrationTime', 'lastHydrationTime', 'hydrationCount', 'size', 'localHashDigest', 'sharedItem', 'Media')}

file.setdefault('Metadata', '')

try:
Expand All @@ -202,7 +215,7 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
if row['scopeID'] not in scopeID:
continue
scope = {key: row[key] for key in (
'scopeID', 'siteID', 'webID', 'listID', 'tenantID', 'webURL', 'remotePath', 'MountPoint', 'spoPermissions')}
'scopeID', 'siteID', 'webID', 'listID', 'tenantID', 'webURL', 'remotePath', 'MountPoint', 'spoPermissions', 'shortcutVolumeID', 'shortcutItemIndex')}
folder = cache.get(row['scopeID'], {})
temp = {**scope, **folder}
final.insert(0, temp)
Expand All @@ -217,6 +230,8 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
'scopeID', 'siteID', 'webID', 'listID', 'tenantID', 'webURL', 'remotePath')}
scope['MountPoint'] = row['MountPoint']
scope['spoPermissions'] = s['spoPermissions']
scope['shortcutVolumeID'] = s['shortcutVolumeID']
scope['shortcutItemIndex'] = s['shortcutItemIndex']
folder = cache.get(row['resourceID'], {})
temp = {**sub_folder, **folder}
scope.setdefault('Links', []).append(temp)
Expand Down Expand Up @@ -253,6 +268,6 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p

cache['Data'] = final

df_GraphMetadata_Records.reset_index(drop=True, inplace=True)
df_GraphMetadata_Records.reset_index(inplace=True)

return cache, rbin_df
Loading

0 comments on commit 2fb5195

Please sign in to comment.