diff --git a/.gitignore b/.gitignore index 15201ac..1d2217f 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,5 @@ cython_debug/ # PyPI configuration file .pypirc + + diff --git a/data/0.0/climb-test.txt b/data/0.0/climb-test.txt new file mode 100644 index 0000000..b2c525f --- /dev/null +++ b/data/0.0/climb-test.txt @@ -0,0 +1 @@ +climb test file v 0 \ No newline at end of file diff --git a/data/0.0/run-text.txt b/data/0.0/run-text.txt new file mode 100644 index 0000000..8b1fe77 --- /dev/null +++ b/data/0.0/run-text.txt @@ -0,0 +1 @@ +run test file v0 \ No newline at end of file diff --git a/data/0.0/walk-test.txt b/data/0.0/walk-test.txt new file mode 100644 index 0000000..defa8f2 --- /dev/null +++ b/data/0.0/walk-test.txt @@ -0,0 +1 @@ +walk test txt v0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 790838d..c30205d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,6 @@ python-dateutil==2.9.0.post0 pytz==2024.2 six==1.17.0 tzdata==2024.2 + +## boto3 +## from dotenv import load_dotenv \ No newline at end of file diff --git a/src/s3_pipeline/README.md b/src/s3_pipeline/README.md new file mode 100644 index 0000000..58dae84 --- /dev/null +++ b/src/s3_pipeline/README.md @@ -0,0 +1,62 @@ +# Required Libraries: + +> pip install boto3 + +> pip install python-dotenv + +# Getting Access to S3: +After having a local version of the repo, in the Exoskeleton-AI create a .env file and config it like the following: + +>AWS_ACCESS_KEY_ID= + +>AWS_SECRET_ACCESS_KEY= + +>AWS_BUCKET_NAME= mcmaster-exoskeleton-ai-data + +>AWS_BUCKET_REGION= us-east-2 + +and get the aws_access_key_id and aws_secret_access_key privately ( aws_bucket_name might be subjected to change) + + +# Viewing s3 bucket content +To view the s3 bucket's content(folders, subfolders, files) to find version numbers, files to download, etc. + +Run this command: +>python3 s3_bucketContent.py + + +# Uploading/Downloading data +To download/upload files from the s3 bucket. + +Run this command: +>python3 s3_script.py +--action ACTION +--version VERSION +--function FUNCTION +--path PATH
+ +|Parameter|Description|Options| +|-|-|-| +|ACTION| Upload or Download Exoskeleton Data| UPLOAD / DOWNLOAD +|VERSION| Dataset Version| 1.0, 2.0, . . . +|FUNCTION| Action for Exoskeleton| WALK, RUN, CLIMB +|PATH| Path where data will be uploaded from, or downloaded to + +## Note: + +It best suggested where you download and upload files from a folder inside the local repo on ur machine as for the paths to be less complicated, in the following example +the folder is called ***data***, the path must be relative to current directory that is currently being worked out of. + +### Examples: +***Download*** + +>python3 s3_script.py --action DOWNLOAD --version 1.0 --function WALK --path ../../data/1.0/walk.csv + + +make sure that the walk.csv file does exsist in the s3 bucket, if unsure which files exsist use the viewing s3 bucket script + +***Upload*** + +>python3 s3_script.py --action UPLOAD --version 2.0 --function RUN --path ../../data/2.0/run_v2.csv + +will upload it into s3 as 2.0 / RUN / run_v2.csv diff --git a/src/s3_pipeline/s3_bucketContent.py b/src/s3_pipeline/s3_bucketContent.py new file mode 100644 index 0000000..d4a015c --- /dev/null +++ b/src/s3_pipeline/s3_bucketContent.py @@ -0,0 +1,36 @@ +# a script that can be run to find out all the folders and sub-folders ( CONTENT ) of the s3 bucket, otherwise ppl need to wait for others to response? import boto3 +## use this script to find the --version (could be anything) and the file name ( ends with .txt / .csv) to download in a file path (i.e folders) that exsist otherwise error + +import boto3 +import os +from dotenv import load_dotenv + +def list_all_contents(): + + load_dotenv() + + AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") + AWS_SECRET_ACCESS_KEY= os.getenv("AWS_SECRET_ACCESS_KEY") + AWS_BUCKET_NAME= os.getenv("AWS_BUCKET_NAME") + AWS_BUCKET_REGION= os.getenv("AWS_BUCKET_REGION") + + s3 = boto3.client( + "s3", + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, + region_name = AWS_BUCKET_REGION + ) + + paginator = s3.get_paginator('list_objects_v2') + result = [] + + for page in paginator.paginate(Bucket = AWS_BUCKET_NAME): + for content in page.get('Contents', []): + result.append(content['Key']) + + for item in result: + print(item) + +if __name__ == "__main__": + + list_all_contents() \ No newline at end of file diff --git a/src/s3_pipeline/s3_script.py b/src/s3_pipeline/s3_script.py new file mode 100644 index 0000000..e22bbe3 --- /dev/null +++ b/src/s3_pipeline/s3_script.py @@ -0,0 +1,66 @@ +import os +import boto3 ## (requirements.txt) + +import argparse ## in built python library +from dotenv import load_dotenv # pip install -dotenv ( requirements.txt ) + +def upload_data(version, function, path): + try: + s3.upload_file(path, AWS_BUCKET_NAME, f"{version}/{function}/{os.path.basename(path)}") + #basename = example: home/users/data.txt basename is the data.txt (what the file name will be in the bucket and what it should be when uploading as well) + print(f"Data uploaded to s3://{AWS_BUCKET_NAME}/{version}/{function}/{os.path.basename(path)}") + except Exception as e: + print(f"Error uploading file: {e}") + + +def download_data(version, function, path): + + try: + s3.download_file(AWS_BUCKET_NAME, f"{version}/{function}/{os.path.basename(path)}", path) + ## + print(f"Data downloaded to {path}") + + except Exception as e : + print(f"Error downloading data: {e}") + + +def main(action, version, function, path): + if action == "UPLOAD": + upload_data(version, function, path) + elif action == "DOWNLOAD": + download_data(version, function, path) + + + +if __name__ == "__main__": + + load_dotenv() ## # reads .env file that will be git ignored so everyone accessing the data from the cloud needs to be + + AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") + AWS_SECRET_ACCESS_KEY= os.getenv("AWS_SECRET_ACCESS_KEY") + AWS_BUCKET_NAME= os.getenv("AWS_BUCKET_NAME") + AWS_BUCKET_REGION= os.getenv("AWS_BUCKET_REGION") + + s3 = boto3.client( + "s3", + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, + region_name = AWS_BUCKET_REGION + ) + + + parser = argparse.ArgumentParser(description="Upload or Download Exoskelton Sensor Data ") + parser.add_argument("--action", type=str, required= True, choices=["UPLOAD","DOWNLOAD"],help="Upload or Download Exoskeleton Data") + parser.add_argument("--version", type=str, required= True, help="Dataset Version (e.g 1.0, 2.0, ... )") + ## any name that the team puts in? thats why there is no restriction on version naming ( i.e does not have to be numbers) + + parser.add_argument("--function", type=str, required= True, choices=["WALK","RUN","CLIMB"],help="Action for Exoskeleton") + parser.add_argument("--path", type=str, required= True, help="Path where data will be uploaded from, or downloaded to") + ## path must exsist, and file name must exsist under the function otherwise cannot download or upload + + # verion if action is upload and it is a new version then the new version type needs to exsist or something + args = parser.parse_args() + main(args.action, args.version, args.function, args.path) + + ## just ask people to drag and drop the csv/text files in the data folder so that it would be easier for them to upload by having a simple predeteremined file path + ## it would be ../../data/----- and where they place the rest of the data \ No newline at end of file