From 4b683ae2fe385a16a3bd8ea2e7ebd4e13e971339 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 29 Aug 2023 15:47:10 +0100 Subject: [PATCH] Added append function to to_sql --- pybarb/pybarb.py | 33 ++++++++++++++++++++------------- setup.py | 2 +- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pybarb/pybarb.py b/pybarb/pybarb.py index 29ea072..d83cc20 100644 --- a/pybarb/pybarb.py +++ b/pybarb/pybarb.py @@ -529,17 +529,18 @@ def to_json(self, file_name): with open(file_name, 'w') as f: json.dump(self.api_response_data, f) - def to_sql(self, connection_string, table_name): + def to_sql(self, connection_string, table_name, if_exists='replace'): """ Saves the API response data as a SQL table. Args: connection_string (str): The connection string to the SQL database. table_name (str): The name of the SQL table to save. + if_exists (str): The action to take if the SQL table already exists. """ df = self.to_dataframe() engine = sqlalchemy.create_engine(connection_string) - df.to_sql(table_name, engine, if_exists='replace', index=False) + df.to_sql(table_name, engine, if_exists=if_exists, index=False) def audience_pivot(self): """ @@ -802,19 +803,27 @@ def to_dataframe(self, unpack=None): row.update(item['DEVICE']) for programme in item['PROGRAMMES_VIEWED']: - if 'programme_start_datetime' in programme.keys(): - for viewer in item['PANEL_VIEWERS']: - inner_row = {} - inner_row.update({'programme_start_datetime': programme['programme_start_datetime']['standard_datetime'], - 'programme_name': programme['programme_content']['content_name'],}) - inner_row.update(viewer) - inner_row.update(row) - rows.append(inner_row) + + for viewer in item['PANEL_VIEWERS']: + inner_row = {} + inner_row.update({'session_start_datetime':item['SESSION_START']['standard_datetime']}) + if 'programme_start_datetime' in programme.keys(): + inner_row.update({'programme_start_datetime': programme['programme_start_datetime']['standard_datetime']}) + inner_row.update({'programme_name': programme['programme_content']['content_name']}) + inner_row.update(viewer) + inner_row.update(row) + rows.append(inner_row) # Drop all columns from df with datatype that is a dict df = pd.DataFrame(rows) - df = df.drop(columns=["panel_member_weights", "tv_set_properties"]).drop_duplicates() + + # If it exists, drop the column tv_set_properties + for column in ["tv_set_properties", "panel_member_weights"]: + if column in df.columns: + df = df.drop(columns=[column]) + + df = df.drop_duplicates() return df @@ -827,5 +836,3 @@ def to_json(self, file_name): """ self.api_response_data.to_json(file_name, orient='records') - - diff --git a/setup.py b/setup.py index b863f85..9da5ca5 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='pybarb', - version='0.4.5', + version='0.4.6', packages=find_packages(), install_requires=[ 'pandas',