-
Notifications
You must be signed in to change notification settings - Fork 0
/
improvedFunctions.py
501 lines (399 loc) · 17.9 KB
/
improvedFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
import pandas as pd
import numpy as np
from splitting.newSplittingDF import forDataFrame
##############################################
#region ########### a i x A C C T ############
# Helper functions
def fileToStr(fileName):
"""
Reads the contents of a file and returns them as a single string.
Args:
fileName (str): Path to the file to be read.
Returns:
str: Contents of the file as a single string.
"""
print("Loading file {}".format(fileName))
lines = ""
with open(fileName) as f:
for line in f:
lines += line
return lines
def sectionTheFile(lines, printSummary):
"""
For aixACCT - sections off the file content into separate tables.
Args:
lines (str): The entire content of the file as a single string.
printSummary (bool): Flag to indicate whether to print the summary or not.
Returns:
list: List of sections (tables) from the file, excluding the summary.
"""
sections = lines.split("Table ")
title = sections[0].strip()
summarySection = sections[2]
summaryLines = summarySection.split("\n")
if printSummary:
print("Title: {}\n==============\nSummary table:".format(title))
for line in summaryLines:
print(line)
return sections[3:]
currentTableName = None
def loadSection(section, shouldPrint=False, sectionName="Contour "):
"""
For aixACCT - Loads each section of the aixACCT file into separate tables, including preprocessing.
Args:
section (str): A string representing a section of the file containing a table.
shouldPrint (bool, optional): Flag to print the data frame. Defaults to False.
sectionName (str, optional): Name for the section. Defaults to "Contour ".
Returns:
tuple: DataFrame representing the table, and interesting constants (e.g., area, thickness).
"""
# Some preprocessing
lastLine = "Measurement Status:"
index = section.index("\n",section.index(lastLine))+1
dataBeforeTable = section[:index]
global currentTableName; redniBrojTabele = dataBeforeTable[:dataBeforeTable.index("\n")]
# AFAIK the only useful data we need are area and thickness. TODO check if we need others
area = getValue(dataBeforeTable, "Area [mm2]")
thickness = getValue(dataBeforeTable, "Thickness [nm]") # TODO doesn't get unit of measurement SADA GA DOBIJA, ZASTO
try:
amplitude = getValue(dataBeforeTable, "Hysteresis Amplitude [V]")
thickness_val_cm = thickness[0]/10e7
amp_val_kv = amplitude[0]/10e3
el_field_strength = round(amp_val_kv/thickness_val_cm, 6)
currentTableName = "{}. {} kV_cm".format(redniBrojTabele, el_field_strength)
except:
currentTableName = sectionName + redniBrojTabele
#thickness = float(thickness[0]) # That perhaps answers the question above. Why do this?
# Now get the table from the string - MAIN PART
table = section.strip()[index:].split("\n")
if table[-1] == "Data": table = table[:-1]
for i, line in enumerate(table):
table[i] = table[i].strip().split("\t")
if i == 0: continue
if table[i] == "" or len(table[i]) == 1 or type(table[i]) != type(list()): table.remove(table[i]); continue
for j, cell in enumerate(table[i]):
try:
table[i][j] = float(table[i][j])
except:
continue
# Separate column names
columnNames = table[0]
dataFrame = pd.DataFrame(
data = np.array(table[1:]), # TODO why np array and not list
columns=columnNames
)
if shouldPrint:
print(dataFrame)
#dataFrame["INTERESTING CONSTANTS"] = [area, thickness]
interestingConstants = {"area":area, "thickness":thickness}
return dataFrame, interestingConstants
def getValue(string, valueName, terminator = "\n", suffix = ": ", subfunction = False):
"""
For aixACCT - Reads specific constants or values from a section of an aixACCT table given its value name.
Args:
string (str): The portion of the section containing the value to be read.
valueName (str): Name of the value or constant to be retrieved.
terminator (str, optional): Character indicating the end of the value. Defaults to "\n".
suffix (str, optional): String that comes after the value name. Defaults to ": ".
subfunction (bool, optional): Flag to indicate use as a subfunction. Defaults to False.
Returns:
tuple or str: Numeric value and unit, or raw numeric data as a string if subfunction is True.
"""
startIndex = string.index(valueName) + len(valueName + suffix)
terminatorIndex = string.index(terminator, startIndex) if terminator != None else len(string)
numericData = string[startIndex : terminatorIndex]
if subfunction:
return numericData
unitOfMeasurement = getValue(valueName, "[", terminator="]", suffix="", subfunction=True)
return (float(numericData), unitOfMeasurement)
# Main function
def load_aixACCTFile(fileName, printSummary = False):
"""
For aixACCT - Orchestrates the reading of an entire aixACCT file, including sectioning and loading tables.
Args:
fileName (str): Path to the .dat aixACCT file to be read.
printSummary (bool, optional): Flag to indicate whether to print the summary. Defaults to False.
Returns:
tuple: Dictionary of DataFrames representing the tables, and a dictionary of associated constants.
Raises:
Exception: If the provided file does not have the .dat extension.
"""
if not fileName.endswith(".dat"): raise Exception("Expected a .dat file of aixACCT.")
lines = fileToStr(fileName)
sections = sectionTheFile(lines, printSummary)
dataFrameDict = {}; constantsDict ={}
for section in sections:
tmpDataFrame, constants = loadSection(section)
dataFrameDict[currentTableName] = tmpDataFrame
constantsDict[currentTableName] = constants
return dataFrameDict, constantsDict
#endregion
##################################################
#region ########### P R O B O S T A T ############
def load_probostatFile(fileName, subFunction=False):
"""
Reads and organizes data from a Probostat .csv file into a DataFrame.
Called within 'stitchUp_probostatFiles' as a subfunction when multiple files are provided.
Args:
fileName (str): Path to the .csv Probostat file to be read.
subFunction (bool, optional): Flag to indicate use as a subfunction. Defaults to False.
Returns:
tuple: Dictionary containing the DataFrame, and a dictionary for constants.
Raises:
Exception: If the provided file does not have the .csv extension.
"""
if not fileName.endswith(".csv"): raise Exception("Expected a .csv file of Probostat.")
f = open(fileName, "r") # open the file
matrix = [] # get an empty table ready
x_axis = None
i = 0
for line in f: # go line by line in file - that is gonna become our rows in table
# Get x axis from pre-table data
if not x_axis and line.startswith(";Assigned to Axis: "):
index = line.find(";Assigned to Axis: ") + len(";Assigned to Axis: ")
x_axis = line[index:].strip()
# skip empty rows; "\n" means "newline"
if line == "\n":
continue
tmpArr = line.split(";") # separate values by the ";" character and store in a temporary array
# they always had the first one empty, delete it
if len(tmpArr) > 1:
tmpArr=tmpArr[1:]
# if it has just one element it is pre-table bullshit
if len(tmpArr) == 1:
continue
#if line.count("NAN") > len(tmpArr)-1 or line.count("nan") > len(tmpArr)-1:#if line.__contains__("NAN") or line.__contains__("nan"):
#if line.count("NAN") > 0 or line.count("nan") > 0:
#break#break
if not tmpArr[0].isdigit(): # this only happens in the header row (where names of columns are)
n = len("X data for ")
for i in range(1, len(tmpArr)):
tmpArr[i] = tmpArr[i][n:] # this removes "X data for " from beginning of column name
tmpArr = [tmpArr[i] for i in range(len(tmpArr)) if i%2==0 or i==1]
tmpArr[1] = x_axis
else:
i+=1
tmpArr = [float(tmpArr[i]) for i in range(len(tmpArr)) if i%2==0 or i==1]
if i > 4060:
print("yo")
# after processing this row, add it to table
matrix.append(tmpArr) #removes index column
#matrix.append(tmpArr) #keeps index column
f.close()
df = pd.DataFrame(matrix, columns=matrix[0])
#return {"table1":df}, {"table1":{}}
#df = df.dropna()
if not subFunction:
dictionary = forDataFrame(df, "AVG T [°C]")
print("all good")
constants = {key:{} for key in dictionary}
return dictionary, constants
else:
return df
x_axis= None
def load_probostatFile_stitching(fileName, subFunction=False):
"""
Processes a Probostat .csv file, extracting and organizing the data into a DataFrame.
Used as a subfunction within 'stitchUp_probostatFiles' for stitching multiple files.
Args:
fileName (str): Path to the .csv Probostat file to be read.
subFunction (bool, optional): Flag to indicate use as a subfunction. Defaults to False.
Returns:
DataFrame: Resulting DataFrame containing data extracted from the file.
Raises:
Exception: If the provided file does not have the .csv extension or if the file is not valid.
"""
if not fileName.endswith(".csv"): raise Exception("Expected a .csv file of Probostat.")
f = open(fileName, "r", encoding='latin1', errors='replace') # open the file
matrix = [] # get an empty table ready
global x_axis
x_axis = None
n = None
columnNames = []; dataFrames = []
setOfTimes = set()
for j, line in enumerate(f): # go line by line in file - that is gonna become our rows in table
# Get x axis from pre-table data
if not x_axis and line.startswith(";Assigned to Axis: "):
index = line.find(";Assigned to Axis: ") + len(";Assigned to Axis: ")
x_axis = line[index:].strip()
# skip empty rows; "\n" means "newline"
if line == "\n":
continue
tmpArr = line.split(";") # separate values by the ";" character and store in a temporary array
# they always had the first one empty, delete it
if len(tmpArr) > 1:
tmpArr=tmpArr[1:]
# if it has just one element it is pre-table bullshit
if len(tmpArr) == 1:
continue
tmpArr = tmpArr[1:]
if tmpArr[-1] == "\n":
tmpArr = tmpArr[:-1]
try:
float(tmpArr[0])
isHeader = False
except:
isHeader = True
if isHeader: # this only happens in the header row (where names of columns are)
if columnNames != []:
raise Exception("Not expected order of operations")
n = len(tmpArr)
nn = len("X data for ")
for i in range(0, n):
tmpArr[i] = tmpArr[i][nn:] # this removes "Y data for " from beginning of column name
if i == 0:
columnNames.append(x_axis)
if i%2==1:
columnNames.append(tmpArr[i])
dataFrames.append(pd.DataFrame(columns=[x_axis, columnNames[-1]]))
continue
if n != len(tmpArr):
raise Exception("CSV file is not valid")
for i in range(len(tmpArr)):
if i%2 == 1: continue
x = tmpArr[i]
y = tmpArr[i+1]
if x == "NAN" or y=="NAN":continue
x = round(float(x), 6)
y = round(float(y), 6)
yColName = columnNames[1+ i//2]
setOfTimes.add(x)
newRow = pd.Series({x_axis:x,yColName:y})
dataFrames[i//2] = pd.concat([dataFrames[i//2], newRow.to_frame().T],ignore_index=True)
f.close()
#matrix is whole table together
#first row colnames, all others table
listOfTimes = sorted(list(setOfTimes))
result = pd.DataFrame({x_axis:listOfTimes})
for df in dataFrames:
newResult = pd.merge(result, df, on=x_axis, how='outer')
result = newResult
#df = pd.concat(dataFrames, axis=0, ignore_index=True)
#return {"table1":df}, {"table1":{}}
#df = df.dropna()
"""if not subFunction:
dictionary = forDataFrame(result, "AVG T [°C]")
print("all good")
constants = {key:{} for key in dictionary}
return dictionary, constants
else:"""
#return {"table":result}, {"table":{}}
return result
if __name__ == "__main__":
#testSplitRiseFlatFall()
rollings = [1, 5, 50, 150]
epsilons = [10**-6, 10**-4, 10**-2, 0.02, 0.05, 0.1, 1, 2, 5]
for roll in rollings:
for eps in epsilons:
rolling = roll; epsilon = eps
testForDataFrame()
#endregion
######################################
#region ########### MISC ############
def load_excel(fileName):
"""
Loads data from an Excel file into a DataFrame.
Args:
fileName (str): Path to the Excel file to be read.
Returns:
tuple: Dictionary of DataFrames (one for each sheet), and an empty dictionary for constants.
"""
dataFrames = {}; constants = {}
for sheetName in pd.ExcelFile(fileName).sheet_names:
dataFrames[sheetName] = pd.read_excel(fileName, sheet_name=sheetName)
constants[sheetName] = dict()
return dataFrames, constants
def load_csv(fileName):
try:
df = pd.read_csv(fileName, encoding='utf-8', encoding_errors="replace")
except UnicodeDecodeError:
df = pd.read_csv(fileName, encoding='ISO-8859-1', encoding_errors="replace")
return {"csv table": df}, {"csv table": {}}
def whichFileToLoad(fileName, n):
"""
Determines the appropriate loading function based on the file extension.
Args:
fileName (str): Path to the file to be read.
n (int): An additional parameter (unused in the current implementation).
Returns:
function: The corresponding loading function for the given file extension.
"""
possibilities = {
".dat": load_aixACCTFile,
".csv": load_csv,
".xlsx": load_excel
}
for fileExtension in possibilities:
if fileName.endswith(fileExtension):
return possibilities[fileExtension](fileName)
import matplotlib.pyplot as plt
# Plot
def plotData(loadedTables, x_axis, y_axis, conditionColName=None, minimumValue=None, plotType="Line"):
"""
Plots data from a DataFrame using specified columns for the x and y axes.
Args:
dataFrame (DataFrame): DataFrame containing the data to be plotted.
xColumn (str): Name of the column to be used for the x-axis.
yColumns (list): List of column names to be used for the y-axis.
title (str): Title of the plot.
xLabel (str): Label for the x-axis.
yLabel (str): Label for the y-axis.
legendLabels (list): List of labels for the legend corresponding to yColumns.
saveName (str): Path and filename to save the plot as an image file.
Returns:
None: The function displays the plot and saves it to the specified location.
"""
for file in loadedTables:
# TODO if file checkboxed
fileData = loadedTables[file]
#print("+++++++++++++\n++++++++++++++++++\n\n\nDict {} key {}".format(loadedTables, file))
#print(fileData)
for contourName in fileData:
# TODO if contour checkboxed
contour = fileData[contourName]
#print("************************\n******************\n\n\nDict {} key {}".format(fileData, contourName))
#print(contour)
#df = contour.dropna() TODO
if conditionColName != None and minimumValue != None:
df.drop(df[df[conditionColName] < minimumValue].index, inplace=True)
if plotType == "Line":
plt.plot(df[x_axis], df[y_axis])
elif plotType == "Dotted":
plt.scatter(df[x_axis], df[y_axis])
else:
raise Exception("Plot doesn't know whether to be line or dotted")
plt.show()
#endregion
def stitchUp_probostatFiles(fileList, separationColumn):
"""
Stitches together multiple Probostat files into a single DataFrame by aligning common columns
and appending unique data. Useful for consolidating different measurements or conditions.
Args:
fileList (list): List of file paths for the Probostat files to be stitched together.
separationColumn (str): Name of the column used for separation and alignment (e.g., time).
Returns:
tuple: Dictionary containing the resulting DataFrame, and a dictionary for constants.
"""
loadedDataFrames = []
for file in fileList:
loadedDataFrames.append(load_probostatFile_stitching(file, True))
#df = pd.concat(loadedDataFrames, axis=0, ignore_index=True)
global x_axis
result = pd.DataFrame({x_axis:[]})
for df in loadedDataFrames:
newResult = pd.merge(result, df, on=x_axis, how='outer')
result = newResult
"""dictionary = forDataFrame(result, separationColumn)
print("all good")
constants = {key:{} for key in dictionary}
return dictionary, constants"""
result = result.sort_values(by=[x_axis])
return {"table":result}, {"table":{}}
# Test functions
def testFunction1():
allDataFramesFromTheFile = load_aixACCTFile("data/BSFO13_RS800_12h_10-150kVcm_RT.dat")
print("\n\nHow many Data Frames were loaded: {}".format(len(allDataFramesFromTheFile)))
for key in allDataFramesFromTheFile:
tmp = allDataFramesFromTheFile[key]
print("\n\n{} - size {}".format(key, tmp.shape))
tmp.info()