Skip to content

Commit

Permalink
Merge pull request #4 from mrpbennett/tweaks
Browse files Browse the repository at this point in the history
set TTL in redis and created a loop
  • Loading branch information
mrpbennett authored Jan 3, 2024
2 parents 017ed69 + e83e4a5 commit 8374f50
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 82 deletions.
2 changes: 1 addition & 1 deletion api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ async def list_users():
with conn.cursor() as curs:
curs.execute(
"""
SELECT uid FROM users;
SELECT uid FROM users ORDER BY ts DESC;
"""
)
user_list = curs.fetchall()
Expand Down
13 changes: 11 additions & 2 deletions caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def add_user_to_redis(user_data: list, user_address_data: list) -> None:
"""Add user to Redis for caching
Combine the user and users address data into one dictonary to cache into Redis
under the same uid
under the same uid. A TTL of 2mins is also set to each key to keep the Redis cache
as fresh as possible.
Agrs:
user_data: A list of a JSON dicts
Expand Down Expand Up @@ -67,9 +68,15 @@ def add_user_to_redis(user_data: list, user_address_data: list) -> None:

full_user_data: dict = {**users_data, **address_data}

# Add user to Redis
redis.hset(uid, mapping=full_user_data)

logging.info(f"{red(len(user_data))} Users have been cached in Redis")
# Set TTL in Redis for 2 min
redis.expire(uid, 120)

logging.info(
f"{red(len(user_data))} Users have been cached in Redis, with a TTL of 2 mins"
)

except ValueError as e:
raise e
Expand All @@ -91,6 +98,8 @@ def get_user_from_redis(key: str) -> dict:
try:
data = {}

# TODO: Set TTL to 5mins

if redis.exists(key):
data: dict = redis.hgetall(key)
else:
Expand Down
1 change: 1 addition & 0 deletions extaction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from datetime import datetime

from simple_chalk import green, red, yellow

Expand Down
4 changes: 2 additions & 2 deletions get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
def get_user_data() -> dict:
"""Generate random user data from API call
Calls a Random user generator API to collect data on 100 random users
Calls a Random user generator API to collect data on 10 random users
Returns:
A JSON object of 100 random users
Expand All @@ -34,7 +34,7 @@ def get_user_data() -> dict:
try:
# MAX requests is 100
response = requests.get(
f"https://random-data-api.com/api/v2/users?size=100&response_type=json"
f"https://random-data-api.com/api/v2/users?size=10&response_type=json"
)

response.raise_for_status()
Expand Down
150 changes: 81 additions & 69 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import json
import logging
import time

import tomli
from simple_chalk import blue, green, red, yellow
Expand Down Expand Up @@ -53,75 +54,86 @@


def main():
# Get Data from API. Max requests = 100
data_to_be_process = get_user_data()

# Validate Data - validate_json: retuns True
if validate_json(data_to_be_process):
"""
Clean Data by removing certain cols using Pandas DataFrame returns pd.DataFrame
that is then converted to JSON string for storage enabling extraction
"""

json_data = json.loads(
data_clean_up(data_to_be_process).to_json(orient="records")
)

"""
Now data has been retrieved, validated, cleaned and converted into JSON.
Now extract two objects of data to insert into relevant tables and Redis
for caching.
"""

# Extract user data and return a list of dicts
user_data = extract_user_data_for_storage(json_data)

# Extract users address data and return a list of dicts
users_address_data = extract_address_data_for_storage(json_data)

"""
Now data has been retrieved, validated and clean now insert into Redis
for caching
"""

add_user_to_redis(user_data, users_address_data)

"""
INSERT data into Postgres tables users and users_address
"""

# Check if users and users_addres tables exists already
if check_table_exists("users") and check_table_exists("users_address"):
for user in user_data:
# Add user into Postgres for permenant storage
if insert_into_user_table(user):
logging.info(
f"User: {blue(user['uid'])} added successfully into Postgres"
)

for address in users_address_data:
if insert_into_address_table(address):
logging.info(
f"User: {blue(address['uid'])} address has been added successfully into Postgres"
)
else:
# IF tables do not exist create and add data
if create_user_table():
logging.info(yellow("users table was created"))
if create_address_table():
logging.info(yellow("users_address table was created"))

for user in user_data:
if insert_into_user_table(user):
logging.info(
f"User: {blue(user['uid'])} added successfully into Postgres"
)

for address in users_address_data:
if insert_into_address_table(address):
logging.info(
f"User: {blue(address['uid'])} address has been added successfully into Postgres"
)
counter = 0
max_count = 10

# Each while loop will represent one daily data dump.
# 24hrs = 2 mins
# 10 days worth of data in 20 mins
while counter < max_count:
# Get Data from API. Max requests = 100
data_to_be_process = get_user_data()

# Validate Data - validate_json: retuns True
if validate_json(data_to_be_process):
"""
Clean Data by removing certain cols using Pandas DataFrame returns pd.DataFrame
that is then converted to JSON string for storage enabling extraction
"""

json_data = json.loads(
data_clean_up(data_to_be_process).to_json(orient="records")
)

"""
Now data has been retrieved, validated, cleaned and converted into JSON.
Now extract two objects of data to insert into relevant tables and Redis
for caching.
"""

# Extract user data and return a list of dicts
user_data = extract_user_data_for_storage(json_data)

# Extract users address data and return a list of dicts
users_address_data = extract_address_data_for_storage(json_data)

"""
Now data has been retrieved, validated and clean now insert into Redis
for caching
"""

add_user_to_redis(user_data, users_address_data)

"""
INSERT data into Postgres tables users and users_address
"""

# Check if users and users_addres tables exists already
if check_table_exists("users") and check_table_exists("users_address"):
for user in user_data:
# Add user into Postgres for permenant storage
if insert_into_user_table(user):
logging.info(
f"User: {blue(user['uid'])} added successfully into Postgres"
)

for address in users_address_data:
if insert_into_address_table(address):
logging.info(
f"User: {blue(address['uid'])} address has been added successfully into Postgres"
)
else:
# IF tables do not exist create and add data
if create_user_table():
logging.info(yellow("users table was created"))
if create_address_table():
logging.info(yellow("users_address table was created"))

for user in user_data:
if insert_into_user_table(user):
logging.info(
f"User: {blue(user['uid'])} added successfully into Postgres"
)

for address in users_address_data:
if insert_into_address_table(address):
logging.info(
f"User: {blue(address['uid'])} address has been added successfully into Postgres"
)

counter += 1
time.sleep(120) # Sleep for 2 minutes
logging.info(yellow("Waiting for next batch of users to process..."))


if __name__ == "__main__":
Expand Down
14 changes: 11 additions & 3 deletions static/src/App.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,18 @@ function App() {
}

return (
<div className="container mx-auto">
<div className="grid grid-cols-2 gap-8 mt-10">
<div className="container mx-auto max-w-[960px] overflow-auto">
<div>
<p className="mt-8 text-lg">
Click on the <code>uid</code> to display the relevant data for that
user...
</p>
</div>
<div className="grid grid-cols-2 gap-4 mt-10">
<div>
<h1 className="font-bold mb-4">Current users</h1>
<h1 className="font-bold mb-4">
Current user <code>uid</code>s
</h1>
{users.map(item => (
<ol key={item}>
<li
Expand Down
15 changes: 10 additions & 5 deletions storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
green(f"Address for User: {address['uid']} added successfully")
"""
import logging
from datetime import datetime
from typing import Literal

import psycopg2
Expand Down Expand Up @@ -74,7 +75,8 @@ def create_user_table() -> Literal[True]:
email VARCHAR(255),
phone_number VARCHAR(255),
social_insurance_number VARCHAR(255),
date_of_birth VARCHAR(255)
date_of_birth VARCHAR(255),
ts INT
);
"""
)
Expand Down Expand Up @@ -106,6 +108,7 @@ def create_address_table() -> Literal[True]:
zip_code VARCHAR(255),
state VARCHAR(255),
country VARCHAR(255),
ts INT,
PRIMARY KEY (uid),
FOREIGN KEY (uid) REFERENCES users(uid)
Expand Down Expand Up @@ -153,10 +156,10 @@ def insert_into_user_table(user_data: dict) -> Literal[True]:
curs.execute(
"""
INSERT INTO users (
uid, password, first_name, last_name, username, email, phone_number, social_insurance_number, date_of_birth
uid, password, first_name, last_name, username, email, phone_number, social_insurance_number, date_of_birth, ts
)
VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s
);
""",
(
Expand All @@ -169,6 +172,7 @@ def insert_into_user_table(user_data: dict) -> Literal[True]:
user_data["phone_number"],
user_data["social_insurance_number"],
user_data["date_of_birth"],
int(datetime.timestamp(datetime.now())),
),
)

Expand All @@ -191,10 +195,10 @@ def insert_into_address_table(address_data: dict) -> Literal[True]:
curs.execute(
"""
INSERT INTO users_address (
uid, city, street_name, street_address, zip_code, state, country
uid, city, street_name, street_address, zip_code, state, country, ts
)
VALUES (
%s, %s, %s, %s, %s, %s, %s
%s, %s, %s, %s, %s, %s, %s, %s
);
""",
(
Expand All @@ -205,6 +209,7 @@ def insert_into_address_table(address_data: dict) -> Literal[True]:
address_data["zip_code"],
address_data["state"],
address_data["country"],
int(datetime.timestamp(datetime.now())),
),
)

Expand Down

0 comments on commit 8374f50

Please sign in to comment.