Skip to content

Commit

Permalink
back to multiple joiner options
Browse files Browse the repository at this point in the history
  • Loading branch information
linhnguyen-cinnamon authored and linhnguyen-cinnamon committed Jun 7, 2024
1 parent f138ba2 commit 4dfa8c1
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions libs/kotaemon/kotaemon/loaders/excel_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,30 @@ def load_data(
)

dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config)
for key in dfs.keys():
# remove redundant row and column
sheet_names = dfs.keys()
df_sheets = []

for key in sheet_names:
sheet = []
if include_sheetname:
sheet.append([key])
dfs[key] = dfs[key].dropna(axis=0, how='all')
dfs[key] = dfs[key].dropna(axis=0, how='all')
dfs[key] = dfs[key].dropna(axis=1, how='all')
dfs[key].fillna('', inplace=True)
dfs[key].fillna('', inplace=True)
sheet.extend(dfs[key].values.astype(str).tolist())
df_sheets.append(sheet)

text_list = list(
itertools.chain.from_iterable(df_sheets)
) # flatten list of lists

output = [
Document(
text="{}\n{}".format(key, dfs[key].to_string()),
metadata=extra_info.update({'name': key}),
text=self._row_joiner.join(
self._col_joiner.join(sublist) for sublist in text_list
),
metadata=extra_info or {},
)
for key in dfs.keys()
]

return output

0 comments on commit 4dfa8c1

Please sign in to comment.