Skip to content

Commit

Permalink
Code clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
SonyShrestha committed May 29, 2024
1 parent b3c8175 commit f4b5095
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Website/pages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pages.product_preishability import show_feature1
from pages.product_perishability import show_feature1
from pages.cust_purchase_expected_expiry import show_feature2
from pages.food_recommender import show_feature3
from pages.sentiment_analysis import show_feature4
Expand Down
23 changes: 22 additions & 1 deletion Website/pages/food_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import json
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import logging
import configparser

# Set environment variables
os.environ["PYSPARK_PYTHON"] = "/home/pce/anaconda3/envs/spark_env/bin/python3.11"
Expand All @@ -19,9 +21,28 @@
# Set page config for a better appearance
st.set_page_config(page_title="Food Recommender System", layout="wide")

# Configure logging
logging.basicConfig(level=logging.INFO) # Set log level to INFO

# Create logger object
logger = logging.getLogger()

# Get base directory
root_dir = os.path.abspath(os.path.join(os.getcwd()))

# Specify the path to config file
config_file_path = os.path.join(root_dir, "config.ini")
config = configparser.ConfigParser()
config.read(config_file_path)

config_file_path_json = os.path.join(root_dir, "config.json")
with open(config_file_path_json) as f:
config_json = json.load(f)



def create_spark_session():
gcs_config = config["GCS"]["credentials_path"]
spark = SparkSession.builder \
.appName("RecipeProcessing") \
.config("spark.driver.host", "127.0.0.1") \
Expand All @@ -30,7 +51,7 @@ def create_spark_session():
.config("spark.hadoop.fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
.config("spark.hadoop.fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS") \
.config("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
.config("spark.hadoop.google.cloud.auth.service.account.json.keyfile", "/home/pce/Documents/VBP_Joint_Project-main/formal-atrium-418823-7fbbc75ebbc6.json") \
.config("spark.hadoop.google.cloud.auth.service.account.json.keyfile", gcs_config) \
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
return spark
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion exploitation_zone/bigquery_code/dim_cust_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
.config("spark.driver.host", "127.0.0.1") \
.config("spark.hadoop.fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
.config("spark.hadoop.fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS") \
.config('spark.jars', '/home/pce/Documents/VBP_Joint_Project-main/spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config('spark.jars', 'spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
.config("spark.hadoop.google.cloud.auth.service.account.json.keyfile", gcs_config) \
.config("temporaryGcsBucket", raw_bucket_name) \
Expand Down
7 changes: 1 addition & 6 deletions exploitation_zone/bigquery_code/dim_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
.config("spark.driver.host", "127.0.0.1") \
.config("spark.hadoop.fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
.config("spark.hadoop.fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS") \
.config('spark.jars', '/home/pce/Documents/VBP_Joint_Project-main/spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config('spark.jars', 'spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
.config("spark.hadoop.google.cloud.auth.service.account.json.keyfile", gcs_config) \
.config("temporaryGcsBucket", raw_bucket_name) \
Expand Down Expand Up @@ -70,9 +70,4 @@
.option('temporaryGcsBucket', raw_bucket_name) \
.mode('overwrite') \
.save()
# #
# product_df.printSchema()


# product_df.write.mode('overwrite').parquet(f'/home/pce/Documents/VBP_Joint_Project-main/dim_table/dim_product.parquet')

7 changes: 1 addition & 6 deletions exploitation_zone/bigquery_code/dim_sp_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,7 @@
.format('bigquery') \
.option('table', f'{project_id}:{dataset_id}.dim_supermarket') \
.mode('overwrite') \
.save()
# .option('temporaryGcsBucket', raw_bucket_name) \

# dim_supermarket.write.mode('overwrite').parquet(f'/home/pce/Documents/VBP_Joint_Project-main/dim_table/dim_supermarket.parquet')
# dim_sp_location.write.mode('overwrite').parquet(f'/home/pce/Documents/VBP_Joint_Project-main/dim_table/dim_supermarket_location.parquet')

.save()

logger.info("writing supermarket location dimension table")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
.config("spark.driver.host", "127.0.0.1") \
.config("spark.hadoop.fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
.config("spark.hadoop.fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS") \
.config('spark.jars', '/home/pce/Documents/VBP_Joint_Project-main/spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config('spark.jars', 'spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
.config("spark.hadoop.google.cloud.auth.service.account.json.keyfile", gcs_config) \
.config("temporaryGcsBucket", raw_bucket_name) \
Expand Down
2 changes: 1 addition & 1 deletion exploitation_zone/bigquery_code/fact_cust_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
.config("spark.driver.host", "127.0.0.1") \
.config("spark.hadoop.fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") \
.config("spark.hadoop.fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS") \
.config('spark.jars', '/home/pce/Documents/VBP_Joint_Project-main/spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config('spark.jars', 'spark-bigquery-with-dependencies_2.12-0.27.0.jar') \
.config("spark.hadoop.google.cloud.auth.service.account.enable", "true") \
.config("spark.hadoop.google.cloud.auth.service.account.json.keyfile", gcs_config) \
.config("temporaryGcsBucket", raw_bucket_name) \
Expand Down

0 comments on commit f4b5095

Please sign in to comment.