Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,5 @@ cython_debug/

# PyPI configuration file
.pypirc


1 change: 1 addition & 0 deletions data/0.0/climb-test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
climb test file v 0
1 change: 1 addition & 0 deletions data/0.0/run-text.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
run test file v0
1 change: 1 addition & 0 deletions data/0.0/walk-test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
walk test txt v0
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ python-dateutil==2.9.0.post0
pytz==2024.2
six==1.17.0
tzdata==2024.2

## boto3
## from dotenv import load_dotenv
62 changes: 62 additions & 0 deletions src/s3_pipeline/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Required Libraries:

> pip install boto3

> pip install python-dotenv

# Getting Access to S3:
After having a local version of the repo, in the Exoskeleton-AI create a .env file and config it like the following:

>AWS_ACCESS_KEY_ID=

>AWS_SECRET_ACCESS_KEY=

>AWS_BUCKET_NAME= mcmaster-exoskeleton-ai-data

>AWS_BUCKET_REGION= us-east-2

and get the aws_access_key_id and aws_secret_access_key privately ( aws_bucket_name might be subjected to change)


# Viewing s3 bucket content
To view the s3 bucket's content(folders, subfolders, files) to find version numbers, files to download, etc.

Run this command:
>python3 s3_bucketContent.py


# Uploading/Downloading data
To download/upload files from the s3 bucket.

Run this command:
>python3 s3_script.py
--action <strong>ACTION</strong>
--version <strong>VERSION</strong>
--function <strong>FUNCTION</strong>
--path <strong>PATH</strong><br>

|Parameter|Description|Options|
|-|-|-|
|<strong>ACTION</strong>| Upload or Download Exoskeleton Data| UPLOAD / DOWNLOAD
|<strong>VERSION</strong>| Dataset Version| 1.0, 2.0, . . .
|<strong>FUNCTION</strong>| Action for Exoskeleton| WALK, RUN, CLIMB
|<strong>PATH</strong>| Path where data will be uploaded from, or downloaded to

## Note:

It best suggested where you download and upload files from a folder inside the local repo on ur machine as for the paths to be less complicated, in the following example
the folder is called ***data***, the path must be relative to current directory that is currently being worked out of.

### Examples:
***Download***

>python3 s3_script.py --action DOWNLOAD --version 1.0 --function WALK --path ../../data/1.0/walk.csv


make sure that the walk.csv file does exsist in the s3 bucket, if unsure which files exsist use the viewing s3 bucket script

***Upload***

>python3 s3_script.py --action UPLOAD --version 2.0 --function RUN --path ../../data/2.0/run_v2.csv

will upload it into s3 as 2.0 / RUN / run_v2.csv
36 changes: 36 additions & 0 deletions src/s3_pipeline/s3_bucketContent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# a script that can be run to find out all the folders and sub-folders ( CONTENT ) of the s3 bucket, otherwise ppl need to wait for others to response? import boto3
## use this script to find the --version (could be anything) and the file name ( ends with .txt / .csv) to download in a file path (i.e folders) that exsist otherwise error

import boto3
import os
from dotenv import load_dotenv

def list_all_contents():

load_dotenv()

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY= os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_BUCKET_NAME= os.getenv("AWS_BUCKET_NAME")
AWS_BUCKET_REGION= os.getenv("AWS_BUCKET_REGION")

s3 = boto3.client(
"s3",
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
region_name = AWS_BUCKET_REGION
)

paginator = s3.get_paginator('list_objects_v2')
result = []

for page in paginator.paginate(Bucket = AWS_BUCKET_NAME):
for content in page.get('Contents', []):
result.append(content['Key'])

for item in result:
print(item)

if __name__ == "__main__":

list_all_contents()
66 changes: 66 additions & 0 deletions src/s3_pipeline/s3_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import boto3 ## (requirements.txt)

import argparse ## in built python library
from dotenv import load_dotenv # pip install -dotenv ( requirements.txt )

def upload_data(version, function, path):
try:
s3.upload_file(path, AWS_BUCKET_NAME, f"{version}/{function}/{os.path.basename(path)}")
#basename = example: home/users/data.txt basename is the data.txt (what the file name will be in the bucket and what it should be when uploading as well)
print(f"Data uploaded to s3://{AWS_BUCKET_NAME}/{version}/{function}/{os.path.basename(path)}")
except Exception as e:
print(f"Error uploading file: {e}")


def download_data(version, function, path):

try:
s3.download_file(AWS_BUCKET_NAME, f"{version}/{function}/{os.path.basename(path)}", path)
##
print(f"Data downloaded to {path}")

except Exception as e :
print(f"Error downloading data: {e}")


def main(action, version, function, path):
if action == "UPLOAD":
upload_data(version, function, path)
elif action == "DOWNLOAD":
download_data(version, function, path)



if __name__ == "__main__":

load_dotenv() ## # reads .env file that will be git ignored so everyone accessing the data from the cloud needs to be

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY= os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_BUCKET_NAME= os.getenv("AWS_BUCKET_NAME")
AWS_BUCKET_REGION= os.getenv("AWS_BUCKET_REGION")

s3 = boto3.client(
"s3",
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
region_name = AWS_BUCKET_REGION
)


parser = argparse.ArgumentParser(description="Upload or Download Exoskelton Sensor Data ")
parser.add_argument("--action", type=str, required= True, choices=["UPLOAD","DOWNLOAD"],help="Upload or Download Exoskeleton Data")
parser.add_argument("--version", type=str, required= True, help="Dataset Version (e.g 1.0, 2.0, ... )")
## any name that the team puts in? thats why there is no restriction on version naming ( i.e does not have to be numbers)

parser.add_argument("--function", type=str, required= True, choices=["WALK","RUN","CLIMB"],help="Action for Exoskeleton")
parser.add_argument("--path", type=str, required= True, help="Path where data will be uploaded from, or downloaded to")
## path must exsist, and file name must exsist under the function otherwise cannot download or upload

# verion if action is upload and it is a new version then the new version type needs to exsist or something
args = parser.parse_args()
main(args.action, args.version, args.function, args.path)

## just ask people to drag and drop the csv/text files in the data folder so that it would be easier for them to upload by having a simple predeteremined file path
## it would be ../../data/----- and where they place the rest of the data