predicting_ethereum_prices.py

# -*- coding: utf-8 -*-
"""predicting_ethereum_prices.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1t1MImZP0Vyzj36r8wZxJ7iw6nETyvJ0T

# Import Library
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import Huber

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime

from google.colab import drive
drive.mount('/content/drive')

"""# Data Understanding"""

df = pd.read_csv('/content/drive/MyDrive/ETHHistorycalData/ethereum_price.csv')
df

df.isnull().sum()

"""## Change Date Format"""

df['Date'] = pd.to_datetime(df['Date'], format='%b %d, %Y').dt.strftime('%Y-%m-%d')
df

df = df[['Date', 'Price']]
df

"""## Data Normalization"""

scaler = MinMaxScaler()
df['Price'] = df['Price'].str.replace(',', '').astype(float)
df['Price'] = scaler.fit_transform(df['Price'].values.reshape(-1,1))
type(df['Price'])

"""## ETH Price Chart"""

dates = df['Date'].values
prices = df['Price'].values

plt.figure(figsize=(15,5))
plt.plot(dates, prices)
plt.title('Prices', fontsize=20)

"""# Data Preparation"""

def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: (w[:-1], w[-1:]))
    return ds.batch(batch_size).prefetch(1)

train_size = int(len(prices) * 0.8)
time = np.array(range(len(prices)))
series = np.array(prices)
x_train = series[:train_size]
x_valid = series[train_size:]

train_set = windowed_dataset(
    x_train, window_size=60,
    batch_size=100,
    shuffle_buffer=1000
)

valid_set = windowed_dataset(
    x_valid, window_size=60,
    batch_size=100,
    shuffle_buffer=1000
)

"""# Model Development"""

model = Sequential([
    LSTM(60, return_sequences=True),
    LSTM(60),
    Dense(30, activation="relu"),
    Dropout(0.3),
    Dense(10, activation="relu"),
    Dropout(0.2),
    Dense(1),
])

threshold_mae = (df['Price'].max() - df['Price'].min()) * 10/100

class EarlyStop(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('mae')<threshold_mae):
            print("mae < 10%")
            self.model.stop_training = True

callbacks = EarlyStop()

optimizer = SGD(learning_rate=1.0000e-04, momentum=0.9)

model.compile(loss=Huber(),
              optimizer=optimizer,
              metrics=["mae", "accuracy"])

history = model.fit(train_set, epochs=100,
                    validation_data=valid_set,
                    callbacks=[callbacks],
                    batch_size=128)

"""# Model Evaluation"""

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Metrics')
plt.xlabel('Epoch')
plt.legend()
plt.show()

test_loss, test_mae, test_accuracy = model.evaluate(valid_set)

print("Test Loss:", test_loss)
print("Test MAE:", test_mae)
print("Test Accuracy:", test_accuracy)