Akshat111111 · Akshat111111 · Jul 31, 2024 · Jul 30, 2024
diff --git a/Finacial Domain/Health Insurance Price Prediction/Health_insurance_cost_prediction.ipynb b/Finacial Domain/Health Insurance Price Prediction/Health_insurance_cost_prediction.ipynb
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/boxplot-1.png b/Finacial Domain/Health Insurance Price Prediction/Images/boxplot-1.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/boxplot-2_processed.png b/Finacial Domain/Health Insurance Price Prediction/Images/boxplot-2_processed.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/correlation_matrix.png b/Finacial Domain/Health Insurance Price Prediction/Images/correlation_matrix.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/countplot.png b/Finacial Domain/Health Insurance Price Prediction/Images/countplot.png
diff --git a/...cial Domain/Health Insurance Price Prediction/Images/cross_validation_graph.png b/...cial Domain/Health Insurance Price Prediction/Images/cross_validation_graph.png
diff --git a/...h Insurance Price Prediction/Images/evaluation_metrix_comparison_all_models.png b/...h Insurance Price Prediction/Images/evaluation_metrix_comparison_all_models.png
diff --git a/...ial Domain/Health Insurance Price Prediction/Images/gbm_actual_vs_predicted.png b/...ial Domain/Health Insurance Price Prediction/Images/gbm_actual_vs_predicted.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/histogram (1).png b/Finacial Domain/Health Insurance Price Prediction/Images/histogram (1).png
diff --git a/...ial Domain/Health Insurance Price Prediction/Images/mlr_actual_vs_predicted.png b/...ial Domain/Health Insurance Price Prediction/Images/mlr_actual_vs_predicted.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/pie-chart-1.png b/Finacial Domain/Health Insurance Price Prediction/Images/pie-chart-1.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/pie-chart-2.png b/Finacial Domain/Health Insurance Price Prediction/Images/pie-chart-2.png
diff --git a/...cial Domain/Health Insurance Price Prediction/Images/rf_actual_vs_predicted.png b/...cial Domain/Health Insurance Price Prediction/Images/rf_actual_vs_predicted.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/scatterplot.png b/Finacial Domain/Health Insurance Price Prediction/Images/scatterplot.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/Images/violin-plot.png b/Finacial Domain/Health Insurance Price Prediction/Images/violin-plot.png
diff --git a/...Domain/Health Insurance Price Prediction/Images/xgboost_actual_Vs_predicted.png b/...Domain/Health Insurance Price Prediction/Images/xgboost_actual_Vs_predicted.png
diff --git a/Finacial Domain/Health Insurance Price Prediction/app.py b/Finacial Domain/Health Insurance Price Prediction/app.py
@@ -0,0 +1,57 @@
+import streamlit as st
+import pandas as pd
+import joblib
+
+# Load the trained model and label encoders
+model_lr = joblib.load('linear_regression_model.pkl')
+label_encoders = joblib.load('label_encoders.pkl')
+
+
+# Function to predict insurance charges
+def predict_insurance_charges(age, sex, bmi, children, smoker, region):
+    # Transform categorical variables using label encoders
+    sex_encoded = label_encoders['sex'].transform([sex])[0]
+    smoker_encoded = label_encoders['smoker'].transform([smoker])[0]
+    region_encoded = label_encoders['region'].transform([region])[0]
+
+    # Prepare input data as DataFrame
+    input_data = pd.DataFrame({
+        'age': [age],
+        'sex': [sex_encoded],
+        'bmi': [bmi],
+        'children': [children],
+        'smoker': [smoker_encoded],
+        'region': [region_encoded]
+    })
+
+    # Make prediction using the trained Linear Regression model
+    predicted_charge = model_lr.predict(input_data)[0]
+
+    return predicted_charge
+
+
+# Streamlit app
+def main():
+    st.title('Health Insurance Price Prediction')
+    st.markdown('Enter the following details to predict insurance charges:')
+
+    # Input fields
+    age = st.number_input('Age', min_value=0, max_value=100, step=1)
+    sex = st.selectbox('Sex', ['male', 'female'])
+    bmi = st.number_input('BMI', min_value=10.0, max_value=50.0, step=0.1)
+    children = st.number_input('Number of Children', min_value=0, max_value=10, step=1)
+    smoker = st.selectbox('Smoker', ['yes', 'no'])
+    region = st.selectbox('Region of India', ['northeast', 'northwest', 'southeast', 'southwest'])
+
+    if st.button('Predict'):
+        # Call prediction function
+        predicted_charge = predict_insurance_charges(age, sex, bmi, children, smoker, region)
+
+        # Display prediction result in a green container with bold text
+        st.markdown(
+            f'<div style="background-color:#00FF00; padding:10px; border-radius:10px;"><h2 style="color:black; text-align:center;">Predicted Insurance Charge: <b>{predicted_charge:.2f}Rs</b></h2></div>',
+            unsafe_allow_html=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Finacial Domain/Health Insurance Price Prediction/readme.md b/Finacial Domain/Health Insurance Price Prediction/readme.md
@@ -0,0 +1,135 @@
+#  Health Insurance Price Prediction using Machine Learning
+
+Project Summary :
+
+Data Exploration and Preprocessing:
+
+Dataset:  https://www.kaggle.com/datasets/annetxu/health-insurance-cost-prediction
+
+Exploratory Data Analysis (EDA): 
+
+Utilized Plotly and Seaborn for visualizations including pie charts, histograms, violin plots, and box plots to understand data distributions, correlations, and outliers.
+
+Data Preprocessing:
+
+Label Encoding: Converted categorical variables (sex, smoker, region) into numerical format using LabelEncoder from scikit-learn.
+Handling Missing Values: Ensured data completeness by checking for and handling missing values appropriately.
+Normalization: Used StandardScaler from scikit-learn for feature scaling where applicable.
+
+
+Machine Learning Models:
+
+Linear Regression (LR):
+
+Trained a Linear Regression model to predict insurance charges based on features such as age, BMI, and others.
+Evaluated using metrics like R-squared (accuracy), Mean Squared Error (MSE), Root Mean Squared Error (RMSE), and Mean Absolute Percentage Error (MAPE).
+
+
+Random Forest (RF):
+
+Applied a Random Forest Regressor for prediction.
+Evaluated performance metrics similar to LR.
+
+
+XGBoost (XGB) and Gradient Boosting Machine (GBM):
+
+Implemented XGBoost and GBM models for comparison.
+Evaluated and compared their performance metrics with LR and RF.
+
+
+Model Evaluation and Comparison:
+
+Compared the performance of LR, RF, XGB, and GBM using metrics such as R-squared, MSE, RMSE, and MAPE.
+Visualized the actual vs. predicted values using line plots and evaluated the accuracy across different models.
+
+
+Deployment with Streamlit:
+
+Developed a Streamlit web application for predicting insurance charges based on user inputs (age, sex, BMI, children, smoker, region).
+Integrated the trained LR model and label encoders into the Streamlit app.
+Provided a user-friendly interface where users can input their data and get the predicted insurance charge displayed in a visually appealing green container with bold text.
+
+
+Future Directions:
+
+
+Model Improvement: Fine-tuning models for better accuracy, exploring ensemble techniques or deep learning approaches if needed.
+Feature Engineering: Further exploring feature interactions or transformations to enhance model performance.
+User Experience: Improving the UI/UX of the Streamlit app, adding more features such as data visualization options and model selection.
+
+
+Tools and Technologies Used:
+
+Programming Languages: Python
+Libraries and Frameworks: pandas, NumPy, scikit-learn, XGBoost, Plotly, Seaborn, Streamlit
+Data Visualization: Plotly, Seaborn for interactive and insightful visualizations.
+Machine Learning: Regression models (Linear Regression, Random Forest, XGBoost, GBM) for predictive analysis.
+Web Application Development: Streamlit for creating interactive and user-friendly web applications.
+
+Conclusion:
+
+The project revolves around leveraging machine learning techniques to predict insurance charges based on various customer attributes. The journey has included data exploration, preprocessing, model building, evaluation, and deployment using modern tools and frameworks. This structured approach ensures robust predictions and a seamless user experience through the Streamlit application.
+
+## How to Use
+
+1. **Clone the Repository**: 
+    ```sh
+    git clone url_to_this_repository
+    ```
+
+2. **Install Dependencies**: 
+    ```sh
+    pip install -r requirements.txt
+    ```
+
+3. **Run the Model**: 
+    ```python
+    streamlit run main.py
+    ```
+
+4. **View Results**: The script will allow you to predict the estimated cost of health insurance for a person
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Finacial Domain/Health Insurance Price Prediction/requirements.txt b/Finacial Domain/Health Insurance Price Prediction/requirements.txt
@@ -0,0 +1,5 @@
+scikit-learn==1.2.2
+joblib==1.4.2
+pandas==2.0.3
+numpy==1.25.2
+streamlit