From 160b7a5ad3ac742ffec4905d81f0e7c140dd6f7b Mon Sep 17 00:00:00 2001 From: Perdana Hadi Date: Fri, 20 Sep 2024 09:44:40 +0700 Subject: [PATCH] update pipeline --- README.md | 1 + pipeline/upload_to_sheets.py | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 82e485a..5717a4c 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ id-jobs collects job listings from Indonesian job portals and company websites, respecting each site's terms of service. **View the Data on Google Sheets:** [https://s.id/id-jobs-v2](https://s.id/id-jobs-v2) + **View the Dasboard on LookerStudio by Google:** [https://s.id/id-jobs-dashboard](https://s.id/id-jobs-dashboard) ## 🎨 Job Age Colors diff --git a/pipeline/upload_to_sheets.py b/pipeline/upload_to_sheets.py index 464ba42..513ba55 100644 --- a/pipeline/upload_to_sheets.py +++ b/pipeline/upload_to_sheets.py @@ -294,6 +294,26 @@ def get_dynamic_range(data): num_cols = len(data[0]) if data else 0 return f"Sheet1!A1:{chr(65 + num_cols - 1)}{num_rows}" +def clear_sheet(service, spreadsheet_id): + sheet_metadata = service.spreadsheets().get(spreadsheetId=spreadsheet_id).execute() + properties = sheet_metadata.get('sheets', [])[0].get('properties', {}) + sheet_id = properties.get('sheetId', 0) + + requests = [{ + "updateCells": { + "range": { + "sheetId": sheet_id, + }, + "fields": "userEnteredValue" + } + }] + + body = { + 'requests': requests + } + service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute() + print("Sheet cleared successfully.") + def upload_to_sheets(service, spreadsheet_id, data): config.sheet_range = get_dynamic_range(data) body = {'values': data} @@ -303,10 +323,8 @@ def upload_to_sheets(service, spreadsheet_id, data): spreadsheet = service.spreadsheets().get(spreadsheetId=spreadsheet_id).execute() print(f"Successfully accessed spreadsheet: {spreadsheet['properties']['title']}") - service.spreadsheets().values().clear( - spreadsheetId=spreadsheet_id, - range=config.sheet_range - ).execute() + # Clear the entire sheet + clear_sheet(service, spreadsheet_id) result = service.spreadsheets().values().update( spreadsheetId=spreadsheet_id,