-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathitemTable_ID_Extractor.py
66 lines (55 loc) · 2.54 KB
/
itemTable_ID_Extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def extract_numbers_from_file(input_file, output_file, keyword):
"""
Extract row names from a file with automatic encoding detection.
Args:
input_file: Path to the input file
output_file: Path to save the extracted row names
keyword: The keyword to search for in each line
"""
# Try different encodings
encodings = ['utf-8', 'utf-16-le', 'utf-16-be', 'ascii', 'iso-8859-1']
for encoding in encodings:
try:
output = "("
with open(input_file, 'r', encoding=encoding) as f_in:
while True:
try:
line = f_in.readline()
if not line:
break
if keyword in line:
try:
splitLine = line.strip().split(": ")
if len(splitLine) >= 2:
parsedRowName = splitLine[1][1:-2]
output += f"'{parsedRowName}', "
except IndexError:
continue # Skip malformed lines
except UnicodeDecodeError:
# If we hit a decode error in the middle of the file,
# this encoding is not correct
break
# If we got here without breaking, we successfully read the file
if output.endswith(", "):
output = output[:-2] # Remove last comma and space
output += ")"
# Write output to file
with open(output_file, 'w', encoding='utf-8') as f_out:
f_out.write(output)
print(f"Successfully processed file using {encoding} encoding")
return
except UnicodeError:
continue # Try next encoding
except Exception as e:
print(f"Error with {encoding} encoding: {str(e)}")
continue
raise ValueError(f"Could not process file with any of these encodings: {encodings}")
if __name__ == '__main__':
try:
input_file = 'ItemTable.json'
output_file = 'output.txt'
keyword = 'RowName'
extract_numbers_from_file(input_file, output_file, keyword)
print("Done!")
except Exception as e:
print(f"Error: {str(e)}")