v2024.07.19

Beercow · Jul 19, 2024 · 2fb5195 · 2fb5195
1 parent 925e5b5
commit 2fb5195
Show file tree

Hide file tree

Showing 10 changed files with 227 additions and 120 deletions.
diff --git a/OneDriveExplorer/Images/splashv.png b/OneDriveExplorer/Images/splashv.png
diff --git a/OneDriveExplorer/OneDriveExplorer.py b/OneDriveExplorer/OneDriveExplorer.py
@@ -50,7 +50,7 @@
                     )
 
 __author__ = "Brian Maloney"
-__version__ = "2024.05.20"
+__version__ = "2024.07.19"
 __email__ = "bmmaloney97@gmail.com"
 rbin = []
 DATParser = dat_parser.DATParser()
@@ -89,7 +89,6 @@ def output():
         if ((args.csv or args.html) and args.json) or (not args.csv and not args.html):
             if not args.json:
                 args.json = '.'
-#            print_json(df, rbin_df, name, args.pretty, args.json)
             print_json(cache, name, args.pretty, args.json)
 
         try:

diff --git a/OneDriveExplorer/OneDriveExplorer_GUI.py b/OneDriveExplorer/OneDriveExplorer_GUI.py
diff --git a/OneDriveExplorer/ode/helpers/mft.py b/OneDriveExplorer/ode/helpers/mft.py
@@ -67,7 +67,6 @@ def directoryRecurse(directoryObject, parentPath, user, filedata=False):
 
             else:
                 continue
-#                log.warning(f'This went wrong, {entryObject.info.name.name} {f_type}')
 
         except IOError as e:
             log.error(e)

diff --git a/OneDriveExplorer/ode/parsers/csv_file.py b/OneDriveExplorer/ode/parsers/csv_file.py
@@ -33,6 +33,7 @@ def parse_csv(filename):
 
     file = open(filename.name, 'r', encoding='utf-8')
     columns_to_drop = ['parentResourceId', 'resourceId', 'inRecycleBin', 'volumeId', 'fileId', 'DeleteTimeStamp', 'notificationTime', 'hash']
+    columns_to_drop_2 = ['MountPoint', 'Path', 'fileName', 'graphMetadataJSON', 'spoCompositeID', 'createdBy', 'modifiedBy', 'filePolicies', 'fileExtension', 'lastWriteCount']
 
     dtypes = {'Type': 'object',
               'scopeID': 'object',
@@ -73,7 +74,7 @@ def parse_csv(filename):
         df_scope = df.loc[df['Type'] == 'Scope',
                           ['Type', 'scopeID', 'siteID', 'webID', 'listID',
                            'tenantID', 'webURL', 'remotePath', 'spoPermissions',
-                           'libraryType']]
+                           'shortcutVolumeID', 'shortcutItemIndex', 'libraryType']]
         columns_to_fill = df_scope.columns.difference(['libraryType'])
         df_scope[columns_to_fill] = df_scope[columns_to_fill].fillna('')
         scopeID = df_scope['scopeID'].tolist()
@@ -106,6 +107,7 @@ def parse_csv(filename):
 
         df = df.astype(object)
         df = df.where(pd.notna(df), None)
+        df.drop(columns=columns_to_drop_2, inplace=True)
 
     except Exception as e:
         print(e)

diff --git a/OneDriveExplorer/ode/parsers/dat.py b/OneDriveExplorer/ode/parsers/dat.py
@@ -75,10 +75,6 @@ def __init__(self):
                              'localSyncTokenData',
                              'localCobaltHashAlgorithm'
                              ]
-        self.int_to_date = ['lastChange',
-                            'serverLastChange',
-                            'mediaDateTaken'
-                            ]
         self.split_str = ['fileName',
                           'folderName'
                           ]
@@ -100,8 +96,6 @@ def __init__(self):
                          'syncTokenData',
                          'syncTokenData_size',
                          'unknown7',
-                         'shortcutVolumeID',
-                         'shortcutItemIndex',
                          'sourceResourceID'
                          ]
 
@@ -149,7 +143,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
                 if header.syncTokenData_size != 0:
                     account = 'Personal'
                     csvwriter = csv.writer(temp_scope, escapechar='\\')
-                    csvwriter.writerow(['scopeID', 'siteID', 'webID', 'listID', 'libraryType', 'spoPermissions'])
+                    csvwriter.writerow(['scopeID', 'siteID', 'webID', 'listID', 'libraryType', 'spoPermissions', 'shortcutVolumeID', 'shortcutItemIndex'])
                     self.scope_header = True
                     syncTokenData = urllib.parse.unquote(header.syncTokenData[:int(header.syncTokenData_size)].decode('utf-8'))
                     syncDict = dict(item.split("=") for item in syncTokenData.split(";"))
@@ -386,6 +380,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
                                 del block._values[key]
                             except Exception:
                                 continue
+                        block._values.update([('shortcutVolumeID', ''), ('shortcutItemIndex', '')])
 
                     elif ff == '0a':
                         data_type = 'Scope'
@@ -397,7 +392,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
                                 continue
                         block._values.update([('siteID', b''), ('webID', b'')])
                         block._values.move_to_end('listID', last=True)
-                        block._values.update([('libraryType', b''), ('spoPermissions', '')])
+                        block._values.update([('libraryType', b''), ('spoPermissions', ''), ('shortcutVolumeID', ''), ('shortcutItemIndex', '')])
 
                     elif ff == '0b':
                         data_type = 'Scope'
@@ -408,6 +403,8 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
                             except Exception:
                                 continue
                         block._values.update([('siteID', b''), ('webID', b''), ('listID', b''), ('libraryType', b''), ('spoPermissions', '')])
+                        block._values.move_to_end('shortcutVolumeID', last=True)
+                        block._values.move_to_end('shortcutItemIndex', last=True)
 
                     elif ff == '0c':
                         data_type = 'Scope'
@@ -417,6 +414,7 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
                                 del block._values[key]
                             except Exception:
                                 continue
+                        block._values.update([('shortcutVolumeID', ''), ('shortcutItemIndex', '')])
 
                     else:
                         block = self.datstruct.DAT_BLOCK(f.read(chunk))
@@ -481,13 +479,23 @@ def parse_dat(self, usercid, account='Business', gui=False, pb=False, value_labe
         temp_files.seek(0)
         temp_folders.seek(0)
 
+        convert = {'shortcutVolumeID': 'Int64',
+                   'shortcutItemIndex': 'Int64'
+                   }
+
         df_scope = pd.read_csv(temp_scope)
         temp_scope.close()
         df_scope.insert(0, 'Type', 'Scope')
         df_scope.insert(5, 'tenantID', '')
         df_scope.insert(6, 'webURL', '')
         df_scope.insert(7, 'remotePath', '')
         df_scope = df_scope.astype(object)
+        df_scope = df_scope.astype(convert)
+        df_scope['shortcutVolumeID'].fillna(0, inplace=True)
+        df_scope['shortcutItemIndex'].fillna(0, inplace=True)
+
+        df_scope['shortcutVolumeID'] = df_scope['shortcutVolumeID'].apply(lambda x: '{:08x}'.format(x) if pd.notna(x) else '')
+        df_scope['shortcutVolumeID'] = df_scope['shortcutVolumeID'].apply(lambda x: '{}{}{}{}-{}{}{}{}'.format(*x.upper()) if x else '')
         df_scope['spoPermissions'].replace('', np.nan, inplace=True)
         df_scope['spoPermissions'] = df_scope['spoPermissions'].fillna(0).astype('int')
         df_scope['spoPermissions'] = df_scope['spoPermissions'].apply(lambda x: permissions(x))

diff --git a/OneDriveExplorer/ode/parsers/odl.py b/OneDriveExplorer/ode/parsers/odl.py
@@ -220,34 +220,32 @@ def decrypt(cipher_text):
     global dkey_list
     global utf_type
 
-    cipher_text_orig = cipher_text
-
     if not dkey_list:
-        return cipher_text_orig
+        return ''
     if len(cipher_text) < 22:
-        return cipher_text_orig  # invalid or it was not encrypted!
+        return ''  # invalid or it was not encrypted!
     # add proper base64 padding
     remainder = len(cipher_text) % 4
     if remainder == 1:
-        return cipher_text_orig  # invalid b64 or it was not encrypted!
+        return ''  # invalid b64 or it was not encrypted!
     elif remainder in (2, 3):
         cipher_text += "=" * (4 - remainder)
     try:
         cipher_text = cipher_text.replace('_', '/').replace('-', '+')
         cipher_text = base64.b64decode(cipher_text)
     except Exception:
-        return cipher_text_orig
+        return ''
 
     if len(cipher_text) % 16 != 0:
-        return cipher_text_orig  # invalid b64 or it was not encrypted!
+        return ''  # invalid b64 or it was not encrypted!
 
     for key in dkey_list:
         try:
             cipher = AES.new(key, AES.MODE_CBC, iv=b'\0'*16)
             raw = cipher.decrypt(cipher_text)
         except ValueError as ex:
             # log.error(f'Exception while decrypting data {str(ex)}')
-            return cipher_text_orig
+            return ''
         try:
             plain_text = unpad(raw, 16)
         except Exception as ex:  # possible fix to change key
@@ -257,7 +255,7 @@ def decrypt(cipher_text):
             plain_text = plain_text.decode(utf_type)
         except ValueError as ex:
             # print(f"Error decoding {utf_type}", str(ex))
-            return cipher_text_orig
+            return ''
         return plain_text
 
 
@@ -287,6 +285,7 @@ def read_obfuscation_map(obfuscation_map_path, map):
     repeated_items_found = False
     encoding = guess_encoding(obfuscation_map_path)
     with open(obfuscation_map_path, 'r', encoding=encoding) as f:
+        log.info(f"Building map from {f.name}")
         for line in f.readlines():
             line = line.rstrip('\n')
             terms = line.split('\t')
@@ -400,7 +399,7 @@ def process_odl(filename, map):
         if header.signature == b'EBFGONED':  # Odl header
             pass
         else:
-            log.warning(f'{basename} wrong header! Did not find EBFGONED')
+            log.error(f'{basename} wrong header! Did not find EBFGONED')
             return pd.DataFrame()
         signature = f.read(8)
         # Now either we have the gzip header here or the CDEF header (compressed or uncompressed handles both)
@@ -417,7 +416,7 @@ def process_odl(filename, map):
             f = io.BytesIO(file_data)
             signature = f.read(8)
         if signature != b'\xCC\xDD\xEE\xFF\0\0\0\0':  # CDEF header
-            log.warning(f'{basename} wrong header! Did not find 0xCCDDEEFF')
+            log.error(f'{basename} wrong header! Did not find 0xCCDDEEFF')
             return pd.DataFrame()
         else:
             f.seek(-8, 1)
@@ -460,7 +459,7 @@ def process_odl(filename, map):
             else:
                 log.error(f'Unknown odl_version = {header.odl_version}')
             if data_block.signature != 0xffeeddcc:
-                log.warning(f'{basename} wrong data_block signature! Did not find 0xCCDDEEFF')
+                log.warning(f'Unable to parse {basename} completely. Did not find 0xCCDDEEFF')
                 return pd.DataFrame.from_records(odl_rows)
             timestamp = ReadUnixMsTime(data_block.timestamp)
             odl['Timestamp'] = timestamp
@@ -473,8 +472,7 @@ def process_odl(filename, map):
                     params_len = (data_block.data_len - data.code_file_name_len - data.code_function_name_len - 12)
                 f.seek(- params_len, io.SEEK_CUR)
             except Exception as e:
-                log.warning(f'Unable to parse {basename}. Something went wrong! {type(e).__name__}')
-                # template = "An exception of type {0} occurred. Arguments:\n{1!r}"
+                log.warning(f'Unable to parse {basename} completely. {type(e).__name__}')
                 return pd.DataFrame.from_records(odl_rows)
 
             if params_len:
@@ -506,7 +504,7 @@ def process_odl(filename, map):
                             params = ', '.join(params)
                         description = ''.join([v for (k, v) in cparser.consts.items() if k == f"{data.code_file_name.decode('utf8').lower().split('.')[0]}_{data.flags}_{data.code_function_name.decode('utf8').split('::')[-1].replace('~', '_').replace(' ()', '_').lower()}_des"])
                     except EOFError:
-                        log.error(f"EOFError while parsing {data.code_file_name.decode('utf8').lower().split('.')[0]}_{data.flags}_{data.code_function_name.decode('utf8').split('::')[-1].replace('~', '_').replace(' ()', '_').lower()}")
+                        log.warning(f"EOFError while parsing {data.code_file_name.decode('utf8').lower().split('.')[0]}_{data.flags}_{data.code_function_name.decode('utf8').split('::')[-1].replace('~', '_').replace(' ()', '_').lower()}")
                         f.seek(- params_len, 1)
                         params = extract_strings(f.read(params_len).decode('utf8', 'ignore'), map)
                 except AttributeError:

diff --git a/OneDriveExplorer/ode/parsers/onedrive.py b/OneDriveExplorer/ode/parsers/onedrive.py
@@ -88,6 +88,7 @@ def find_parent(self, x, id_name_dict, parent_dict):
 
         return self.find_parent(value, id_name_dict, parent_dict) + "\\\\" + str(id_name_dict.get(value))
 
+    # Generate scopeID list instead of passing
     def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_path, rbin_df, account=False, reghive=False, recbin=False, gui=False, pb=False, value_label=False):
         if os.path.isdir(file_path):
             directory = file_path
@@ -167,6 +168,7 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
 
         df = df.astype(convert)
         df['volumeID'].fillna(0, inplace=True)
+        df['itemIndex'].fillna(0, inplace=True)
 
         df['volumeID'] = df['volumeID'].apply(lambda x: '{:08x}'.format(x) if pd.notna(x) else '')
         df['volumeID'] = df['volumeID'].apply(lambda x: '{}{}{}{}-{}{}{}{}'.format(*x.upper()) if x else '')
@@ -176,14 +178,25 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
         dcache = {}
         is_del = []
 
+        # Need to look into this
         if not df_GraphMetadata_Records.empty:
             df_GraphMetadata_Records.set_index('resourceID', inplace=True)
 
+        column_len = len(df.columns)
+
         for row in df.sort_values(
             by=['Level', 'parentResourceID', 'Type', 'FileSort', 'FolderSort', 'libraryType'],
                 ascending=[False, False, False, True, False, False]).to_dict('records'):
             if row['Type'] == 'File':
-                file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'HydrationTime', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'size', 'localHashDigest', 'sharedItem', 'Media')}
+                if column_len == 32:
+                    file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'size', 'localHashDigest', 'sharedItem', 'Media')}
+
+                if column_len == 33:
+                    file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'HydrationTime', 'size', 'localHashDigest', 'sharedItem', 'Media')}
+
+                if column_len == 36:
+                    file = {key: row[key] for key in ('parentResourceID', 'resourceID', 'eTag', 'Path', 'Name', 'fileStatus', 'lastHydrationType', 'spoPermissions', 'volumeID', 'itemIndex', 'lastChange', 'firstHydrationTime', 'lastHydrationTime', 'hydrationCount', 'size', 'localHashDigest', 'sharedItem', 'Media')}
+
                 file.setdefault('Metadata', '')
 
                 try:
@@ -202,7 +215,7 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
                     if row['scopeID'] not in scopeID:
                         continue
                     scope = {key: row[key] for key in (
-                             'scopeID', 'siteID', 'webID', 'listID', 'tenantID', 'webURL', 'remotePath', 'MountPoint', 'spoPermissions')}
+                             'scopeID', 'siteID', 'webID', 'listID', 'tenantID', 'webURL', 'remotePath', 'MountPoint', 'spoPermissions', 'shortcutVolumeID', 'shortcutItemIndex')}
                     folder = cache.get(row['scopeID'], {})
                     temp = {**scope, **folder}
                     final.insert(0, temp)
@@ -217,6 +230,8 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
                                      'scopeID', 'siteID', 'webID', 'listID', 'tenantID', 'webURL', 'remotePath')}
                             scope['MountPoint'] = row['MountPoint']
                             scope['spoPermissions'] = s['spoPermissions']
+                            scope['shortcutVolumeID'] = s['shortcutVolumeID']
+                            scope['shortcutItemIndex'] = s['shortcutItemIndex']
                         folder = cache.get(row['resourceID'], {})
                         temp = {**sub_folder, **folder}
                         scope.setdefault('Links', []).append(temp)
@@ -253,6 +268,6 @@ def parse_onedrive(self, df, df_scope, df_GraphMetadata_Records, scopeID, file_p
 
         cache['Data'] = final
 
-        df_GraphMetadata_Records.reset_index(drop=True, inplace=True)
+        df_GraphMetadata_Records.reset_index(inplace=True)
 
         return cache, rbin_df