|
| 1 | +""" |
| 2 | +This module defines the routes for aggregating data from user workspaces in a Flask application. |
| 3 | +It provides two main routes for performing column-level calculations on CSV files stored in the |
| 4 | +user's workspace. The supported operations include summing, averaging, counting, finding the |
| 5 | +minimum, and finding the maximum values in specified columns. |
| 6 | +
|
| 7 | +The module emits real-time feedback to the user’s session via Socket.IO, providing status updates |
| 8 | +on the calculations, handling skipped cells due to invalid data, and notifying the user of errors |
| 9 | +such as file not found, permission denied, or unexpected issues. |
| 10 | +
|
| 11 | +Routes: |
| 12 | + - get_workspace_aggregate_all(relative_path): |
| 13 | + Calculates aggregate values (sum, avg, min, max, cnt) for multiple columns in a CSV file. |
| 14 | +
|
| 15 | + - get_workspace_aggregate(relative_path): |
| 16 | + Calculates an aggregate value (sum, avg, min, max, cnt) for a single column in a CSV file. |
| 17 | +
|
| 18 | +Exceptions are handled to provide feedback through the user’s console using Socket.IO. |
| 19 | +""" |
| 20 | + |
1 | 21 | # pylint: disable=import-error
|
2 | 22 |
|
3 | 23 | import os
|
4 | 24 | import csv
|
5 |
| -from flask import Blueprint, request, jsonify |
6 | 25 | from ast import literal_eval
|
| 26 | +from flask import Blueprint, request, jsonify |
7 | 27 |
|
8 | 28 | from src.setup.extensions import logger
|
9 | 29 | from src.utils.helpers import socketio_emit_to_user_session, is_number
|
|
18 | 38 | f"{WORKSPACE_AGGREGATE_ROUTE}/all/<path:relative_path>", methods=["GET"]
|
19 | 39 | )
|
20 | 40 | def get_workspace_aggregate_all(relative_path):
|
| 41 | + """ |
| 42 | + Route to calculate aggregate values (e.g., sum, avg, min, max, cnt) for multiple columns |
| 43 | + in a CSV file located in the user's workspace. The columns and their aggregation actions |
| 44 | + are specified in the request's query parameters. |
| 45 | +
|
| 46 | + Args: |
| 47 | + relative_path (str): The relative path to the CSV file inside the user's workspace. |
| 48 | +
|
| 49 | + Request Headers: |
| 50 | + - uuid: A unique identifier for the user's session. |
| 51 | + - sid: A session identifier for emitting real-time console feedback via Socket.IO. |
| 52 | +
|
| 53 | + Query Parameters: |
| 54 | + - columnsAggregation (str): A stringified dictionary where the keys are column names and |
| 55 | + the values are dictionaries with an "action" key specifying the aggregation operation |
| 56 | + ('sum', 'avg', 'min', 'max', or 'cnt'). |
| 57 | +
|
| 58 | + Returns: |
| 59 | + Response (JSON): |
| 60 | + - On success: A JSON object with aggregated results for each specified column. |
| 61 | + - On error: A JSON object with an error message and appropriate HTTP status code. |
| 62 | +
|
| 63 | + Emits: |
| 64 | + - Real-time console feedback using Socket.IO via the `socketio_emit_to_user_session` |
| 65 | + function. Feedback includes the start, completion, and any warnings or errors during |
| 66 | + the aggregation process. |
| 67 | +
|
| 68 | + Possible Errors: |
| 69 | + - FileNotFoundError: The specified CSV file does not exist. |
| 70 | + - PermissionError: Insufficient permissions to read the CSV file. |
| 71 | + - UnexpectedError: Any other unexpected error during the aggregation process. |
| 72 | + """ |
| 73 | + |
21 | 74 | uuid = request.headers.get("uuid")
|
22 | 75 | sid = request.headers.get("sid")
|
23 | 76 |
|
@@ -140,7 +193,8 @@ def get_workspace_aggregate_all(relative_path):
|
140 | 193 | CONSOLE_FEEDBACK_EVENT,
|
141 | 194 | {
|
142 | 195 | "type": "warn",
|
143 |
| - "message": f"The following columns had cells skipped due to non-numeric values: {', '.join(skipped_columns_info)}", |
| 196 | + "message": "The following columns had cells skipped due to non-numeric " |
| 197 | + + f"values: {', '.join(skipped_columns_info)}", |
144 | 198 | },
|
145 | 199 | uuid,
|
146 | 200 | sid,
|
@@ -203,6 +257,45 @@ def get_workspace_aggregate_all(relative_path):
|
203 | 257 | f"{WORKSPACE_AGGREGATE_ROUTE}/<path:relative_path>", methods=["GET"]
|
204 | 258 | )
|
205 | 259 | def get_workspace_aggregate(relative_path):
|
| 260 | + """ |
| 261 | + Route to calculate an aggregate value (e.g., sum, avg, min, max, cnt) for a single column |
| 262 | + in a CSV file located in the user's workspace. The column and the aggregation action |
| 263 | + are specified in the request's query parameters. |
| 264 | +
|
| 265 | + Args: |
| 266 | + relative_path (str): The relative path to the CSV file inside the user's workspace. |
| 267 | +
|
| 268 | + Request Headers: |
| 269 | + - uuid: A unique identifier for the user's session. |
| 270 | + - sid: A session identifier for emitting real-time console feedback via Socket.IO. |
| 271 | +
|
| 272 | + Query Parameters: |
| 273 | + - field (str): The name of the column to perform the aggregation on. |
| 274 | + - action (str): The type of aggregation action to perform |
| 275 | + ('sum', 'avg', 'min', 'max', or 'cnt'). |
| 276 | +
|
| 277 | + Returns: |
| 278 | + Response (JSON): |
| 279 | + - On success: A JSON object with the aggregated result for the specified column. |
| 280 | + - On error: A JSON object with an error message and appropriate HTTP status code. |
| 281 | +
|
| 282 | + Emits: |
| 283 | + - Real-time console feedback using Socket.IO via the `socketio_emit_to_user_session` |
| 284 | + function.Feedback includes the start, completion, and any warnings or errors during the |
| 285 | + aggregation process. |
| 286 | +
|
| 287 | + Possible Errors: |
| 288 | + - FileNotFoundError: The specified CSV file does not exist. |
| 289 | + - PermissionError: Insufficient permissions to read the CSV file. |
| 290 | + - UnexpectedError: Any other unexpected error during the aggregation process. |
| 291 | +
|
| 292 | + Notes: |
| 293 | + - If the column contains non-numeric values, those cells are skipped and a warning is sent |
| 294 | + via Socket.IO. |
| 295 | + - The result is formatted as "N/A" if no valid numeric data is found or if the specified |
| 296 | + action is invalid for the data present in the column. |
| 297 | + """ |
| 298 | + |
206 | 299 | uuid = request.headers.get("uuid")
|
207 | 300 | sid = request.headers.get("sid")
|
208 | 301 |
|
@@ -289,7 +382,8 @@ def get_workspace_aggregate(relative_path):
|
289 | 382 | CONSOLE_FEEDBACK_EVENT,
|
290 | 383 | {
|
291 | 384 | "type": "warn",
|
292 |
| - "message": f"At column '{field}' {skipped_count} cells were skipped because they contain non-numeric values.", |
| 385 | + "message": f"At column '{field}' {skipped_count} cells " |
| 386 | + + "were skipped because they contain non-numeric values.", |
293 | 387 | },
|
294 | 388 | uuid,
|
295 | 389 | sid,
|
|
0 commit comments