Add files via upload

wiseinvoker · Jan 2, 2018 · a060c00 · a060c00
1 parent 4507372
commit a060c00
Show file tree

Hide file tree

Showing 4 changed files with 265 additions and 38 deletions.
diff --git a/main.py b/main.py
@@ -0,0 +1,76 @@
+"""
+This is the main code for automated FX trading
+
+"""
+
+from helpers.oanda_api_helpers import TradingSession, close_order_manually
+from helpers.utils import remove_nan_rows
+from helpers.get_features import get_features, min_max_scaling
+from helpers.get_historical_data import get_latest_oanda_data
+import tensorflow as tf
+import numpy as np
+import datetime
+from apscheduler.schedulers.blocking import BlockingScheduler
+
+# oanda access keys
+accountID = '101-004-3943081-006'
+access_token = 'fb12d7edd860927ce27467d8ec4aee94-1cb7ffc0e40d649b736315872a10c545'
+model_name = 'lr-v2-avg_score0.204-64000'
+
+# init trading session
+trading_sess = TradingSession(accountID=accountID, access_token=access_token)
+
+# init tf model
+config = tf.ConfigProto(device_count={'GPU': 0})
+sess = tf.Session(config=config)
+saver = tf.train.import_meta_graph('saved_models/' + model_name + '.meta')
+saver.restore(sess, tf.train.latest_checkpoint('saved_models/'))
+graph = tf.get_default_graph()
+x = graph.get_tensor_by_name("Placeholder:0")
+tf_op_to_restore = graph.get_tensor_by_name("Softmax:0")
+
+# Do stuff every period
+scheduler = BlockingScheduler()
+
+
+@scheduler.scheduled_job(trigger='cron', day_of_week='0-6', hour='0-23', minute='0', second='5')
+def do_stuff_every_period():
+
+    # retrieve data and return signal
+    oanda_data = get_latest_oanda_data('EUR_USD', 'H1', 64)
+    input_data_raw, input_data_dummy = get_features(oanda_data)
+    input_data, input_data_dummy = remove_nan_rows([input_data_raw, input_data_dummy])
+    input_data_scaled_no_dummies = (input_data - min_max_scaling[1, :]) / (
+            min_max_scaling[0, :] - min_max_scaling[1, :])
+    input_data_scaled = np.concatenate([input_data_scaled_no_dummies, input_data_dummy], axis=1)
+    y_ = sess.run(tf_op_to_restore, feed_dict={x: input_data_scaled})
+    order_signal = y_.argmax()  # 0 stands for buy, 1 for sell, 2 for hold
+
+    print('{} | signal: buy: {:.2f}, sell: {:.2f}, nothing: {:.2f}'.format(
+        str(datetime.datetime.now())[:-4], y_[0][0], y_[0][1], y_[0][2]))
+
+    # if signal long
+    if order_signal == 0:
+        if trading_sess.order_book['EUR_USD']['order_type'] == -1:
+            trading_sess.close_order('EUR_USD')
+        trading_sess.open_order('EUR_USD', 1)
+
+    # if signal short
+    elif order_signal == 1:
+        if trading_sess.order_book['EUR_USD']['order_type'] == 1:
+            trading_sess.close_order('EUR_USD')
+        trading_sess.open_order('EUR_USD', -1)
+
+    # else (uncharted waters)
+    else:
+        print('Do nothing')
+
+
+# start
+do_stuff_every_period()
+scheduler.start()
+
+# close_order_manually(accountID, access_token, 1579)
+# trading_sess.check_open_positions()
+# trading_sess.check_account_summary()
+# trading_sess.order_book
diff --git a/train_logistic_regression_v2.py b/train_logistic_regression_v2.py
@@ -1,107 +1,131 @@
 """
 Training logistic regression v2:
-using regression to allocate funds.
+using model to allocate funds, i.e. maximizing return without correct labels.
 
 Things to work out:
-1. price_data_raw percentage or pips?
+1. price_data_raw percentage or pips? (percentage)
 2. objective function normalization (yearly percentage return..? etc)
 
-
+Other options to set:
+np.set_printoptions(linewidth=75*3+5, edgeitems=6)
+pl.rcParams.update({'font.size': 6})
 """
 
+
 import numpy as np
 import pylab as pl
 import tensorflow as tf
-from sklearn.preprocessing import minmax_scale
 from helpers.utils import extract_timeseries_from_oanda_data, train_test_validation_split
 from helpers.utils import remove_nan_rows, get_data_batch
 from models import logistic_regression
-from helpers.get_features import get_features
+from helpers.get_features import get_features, min_max_scaling
+
 
 # hyper-params
 batch_size = 1024
 plotting = False
+saving = True
+value_cv_moving_average = 50
+split = (0.5, 0.3, 0.2)
 
 # load data
-# TODO: check shit np.concatenate([price, input_data_raw[:, 0:1], price_data_raw[:, 0:1], output_data_raw], axis=1)
-# TODO np.set_printoptions(linewidth=75*3+5, edgeitems=6)
-oanda_data = np.load('data\\AUD_USD_H1.npy')
+# TODO
+oanda_data = np.load('data\\EUR_USD_H1.npy')[-50000:]
 price_data_raw = extract_timeseries_from_oanda_data(oanda_data, ['closeMid'])
 input_data_raw, input_data_dummy = get_features(oanda_data)
-# price_data_raw = np.concatenate([[[0]], price_data_raw[1:] - price_data_raw[:-1]], axis=0)
-# TODO: new price data
-price_data_raw = np.concatenate([[[0]], (price_data_raw[1:] - price_data_raw[:-1]) / (price_data_raw[1:] + 1e-10)], axis=0)
+price_data_raw = np.concatenate([[[0]],
+                                 (price_data_raw[1:] - price_data_raw[:-1]) / (price_data_raw[1:] + 1e-10)], axis=0)
 
 # prepare data
 input_data, price_data, input_data_dummy = remove_nan_rows([input_data_raw, price_data_raw, input_data_dummy])
-input_data_scaled = np.concatenate([minmax_scale(input_data, axis=0), input_data_dummy], axis=1)
+input_data_scaled_no_dummies = (input_data - min_max_scaling[1, :]) / (min_max_scaling[0, :] - min_max_scaling[1, :])
+input_data_scaled = np.concatenate([input_data_scaled_no_dummies, input_data_dummy], axis=1)
 
 # split to train,test and cross validation
 input_train, input_test, input_cv, price_train, price_test, price_cv = \
-    train_test_validation_split([input_data_scaled, price_data], split=(0.5, 0.3, 0.2))
+    train_test_validation_split([input_data_scaled, price_data], split=split)
 
 # get dims
 _, input_dim = np.shape(input_data_scaled)
 
 # forward-propagation
-x, _, logits, y_ = logistic_regression(input_dim, 3, drop_keep_prob=0.8)
+x, _, logits, y_ = logistic_regression(input_dim, 3, drop_keep_prob=0.7)
 
 # tf cost and optimizer
-# TODO: maximize return or sharpe or something, but not cross-entropy
 price_h = tf.placeholder(tf.float32, [None, 1])
 signals = tf.constant([[1., -1., 0.]])
-objective = (tf.reduce_mean(y_[:-1] * signals * price_h[1:]) + tf.constant(1.))  # profit function
+objective = (tf.reduce_mean(y_[:-1] * signals * price_h[1:] * 100))  # profit function
 train_step = tf.train.AdamOptimizer(0.001).minimize(-objective)
 
 # init session
-step, cost_hist_train, cost_hist_test, value_hist_train, value_hist_test, value_hist_cv, value_hist_cv_ma = \
-    0, [], [], [], [], [], []
+step, step_hist, objective_hist_train, objective_hist_test, value_hist_train, value_hist_test, \
+    value_hist_cv, value_hist_cv_ma, saving_score = 0, [], [], [], [], [], [], [], 0.05
+saver = tf.train.Saver()
 init = tf.global_variables_initializer()
 sess = tf.Session()
 sess.run(init)
 
+
 # main loop
 while True:
 
     # train model
     x_train, price_batch = get_data_batch([input_train, price_train], batch_size)
-    _, cost_train, sig = sess.run([train_step, objective, y_], feed_dict={x: x_train, price_h: price_batch})
+    _, objective_train, sig = sess.run([train_step, objective, y_], feed_dict={x: x_train, price_h: price_batch})
 
     # keep track of stuff
     step += 1
-    if step % 10 == 0 or step == 1:
+    if step % 100 == 0 or step == 1:
 
         # get y_ predictions
         y_train_pred = sess.run(y_, feed_dict={x: input_train})
-        y_test_pred, cost_test = sess.run([y_, objective], feed_dict={x: input_test, price_h: price_test})
+        y_test_pred, objective_test = sess.run([y_, objective], feed_dict={x: input_test, price_h: price_test})
         y_cv_pred = sess.run(y_, feed_dict={x: input_cv})
 
         # get portfolio value
-        value_test = np.cumsum(np.sum(y_test_pred[:-1] * [1., -1., 0.] * price_test[1:], axis=1))
-        value_train = np.cumsum(np.sum(y_train_pred[:-1] * [1., -1., 0.] * price_train[1:], axis=1))
-        value_cv = np.cumsum(np.sum(y_cv_pred[:-1] * [1., -1., 0.] * price_cv[1:], axis=1))
+        value_train = 1 + np.cumsum(np.sum(y_train_pred[:-1] * [1., -1., 0.] * price_train[1:], axis=1))
+        value_test = 1 + np.cumsum(np.sum(y_test_pred[:-1] * [1., -1., 0.] * price_test[1:], axis=1))
+        value_cv = 1 + np.cumsum(np.sum(y_cv_pred[:-1] * [1., -1., 0.] * price_cv[1:], axis=1))
 
         # save history
-        cost_hist_train.append(cost_train)
-        cost_hist_test.append(cost_test)
+        step_hist.append(step)
+        objective_hist_train.append(objective_train)
+        objective_hist_test.append(objective_test)
+        value_hist_train.append(value_train[-1])
+        value_hist_test.append(value_test[-1])
+        value_hist_cv.append(value_cv[-1])
+        value_hist_cv_ma.append(np.mean(value_hist_cv[-value_cv_moving_average:]))
 
-        print('Step {}: train {:.4f}, test {:.4f}'.format(step, cost_train, cost_test))
+        print('Step {}: train {:.4f}, test {:.4f}'.format(step, objective_train, objective_test))
 
         if plotting:
 
-            pl.figure(1)
-            pl.title('Cost')
-            pl.plot(cost_hist_train, color='darkorange')
-            pl.plot(cost_hist_test, color='dodgerblue')
+            pl.figure(1, figsize=(3, 7), dpi=80, facecolor='w', edgecolor='k')
+
+            pl.subplot(211)
+            pl.title('Objective function')
+            pl.plot(step_hist, objective_hist_train, color='darkorange', linewidth=0.3)
+            pl.plot(step_hist, objective_hist_test, color='dodgerblue', linewidth=0.3)
 
+            pl.subplot(212)
+            pl.title('Portfolio value')
+            pl.plot(step_hist, value_hist_train, color='darkorange', linewidth=0.3)
+            pl.plot(step_hist, value_hist_test, color='dodgerblue', linewidth=0.3)
+            pl.plot(step_hist, value_hist_cv, color='magenta', linewidth=1)
+            pl.plot(step_hist, value_hist_cv_ma, color='black', linewidth=0.5)
             pl.pause(1e-10)
 
-            if value_test[-1] > 0.01 and value_train[-1] > 0.01 and value_cv[-1] > 0.01:
-                print(value_train[-1], value_test[-1], value_cv[-1])
+        # save if some complicated rules
+        if saving:
+            current_score = 0 if value_test[-1] < 0.01 or value_cv[-1] < 0.01 \
+                else np.average([value_test[-1], value_cv[-1]])
+            saving_score = current_score if saving_score < current_score else saving_score
+            if saving_score == current_score and saving_score > 0.05:
+                saver.save(sess, 'saved_models/lr-v2-avg_score{:.3f}'.format(current_score), global_step=step)
+                print('Model saved. Average score: {:.2f}'.format(current_score))
 
                 pl.figure(2)
-                pl.plot(value_test)
-                pl.plot(value_train)
-                pl.plot(value_cv)
-
+                pl.plot(value_test, linewidth=0.2)
+                pl.plot(value_cv, linewidth=2)
+                pl.pause(1e-10)
 
diff --git a/train_lstm_v1.py b/train_lstm_v1.py
@@ -19,7 +19,7 @@
 split = (0.5, 0.3, 0.2)
 
 # load data
-oanda_data = np.load('data\\EUR_GBP_H1.npy')[-60000:]
+oanda_data = np.load('data\\EUR_GBP_H1.npy')[-50000:]
 input_data_raw, input_data_dummies = get_features(oanda_data)
 output_data_raw = price_to_binary_target(oanda_data, delta=0.00037)
 price_data_raw = extract_timeseries_from_oanda_data(oanda_data, ['closeMid'])

diff --git a/train_lstm_v2.py b/train_lstm_v2.py
@@ -0,0 +1,127 @@
+"""
+Training lstm v2:
+using model to allocate funds, i.e. maximizing return without correct labels.
+
+Other options to set:
+np.set_printoptions(linewidth=75*3+5, edgeitems=6)
+pl.rcParams.update({'font.size': 6})
+"""
+
+import numpy as np
+import pylab as pl
+import tensorflow as tf
+from helpers.utils import extract_timeseries_from_oanda_data, train_test_validation_split
+from helpers.utils import remove_nan_rows, get_data_batch, get_lstm_input_output
+from models import lstm_nn
+from helpers.get_features import get_features, min_max_scaling
+
+# hyper-params
+batch_size = 256
+time_steps = 12
+plotting = False
+saving = False
+value_cv_moving_average = 50
+split = (0.6, 0.2, 0.1)
+
+# load data
+oanda_data = np.load('data\\EUR_USD_M10.npy')  # [-50000:]
+price_data_raw = extract_timeseries_from_oanda_data(oanda_data, ['closeMid'])
+input_data_raw, input_data_dummy = get_features(oanda_data)
+price_data_raw = np.concatenate([[[0]],
+                                 (price_data_raw[1:] - price_data_raw[:-1]) / (price_data_raw[1:] + 1e-10)], axis=0)
+
+# prepare data
+input_data, price_data, input_data_dummy = remove_nan_rows([input_data_raw, price_data_raw, input_data_dummy])
+input_data_scaled_no_dummies = (input_data - min_max_scaling[1, :]) / (min_max_scaling[0, :] - min_max_scaling[1, :])
+input_data_scaled = np.concatenate([input_data_scaled_no_dummies, input_data_dummy], axis=1)
+input_data_lstm, _ = get_lstm_input_output(input_data_scaled, np.zeros_like(input_data), time_steps=time_steps)
+price_data = price_data[-len(input_data_lstm):]
+
+# split to train,test and cross validation
+input_train, input_test, input_cv, price_train, price_test, price_cv = \
+    train_test_validation_split([input_data_lstm, price_data], split=split)
+
+# get dims
+_, _, input_dim = np.shape(input_train)
+
+# forward-propagation
+x, y, logits, y_ = lstm_nn(input_dim, 3, time_steps=time_steps, n_hidden=[3], drop_keep_prob=0.6)
+
+# tf cost and optimizer
+price_h = tf.placeholder(tf.float32, [None, 1])
+signals = tf.constant([[1., -1., 0.]])
+objective = (tf.reduce_mean(y_[:-1] * signals * price_h[1:] * 100))  # profit function
+train_step = tf.train.AdamOptimizer(0.001).minimize(-objective)
+
+
+# init session
+step, step_hist, objective_hist_train, objective_hist_test, value_hist_train, value_hist_test, \
+    value_hist_cv, value_hist_cv_ma, saving_score = 0, [], [], [], [], [], [], [], 0.05
+saver = tf.train.Saver()
+init = tf.global_variables_initializer()
+sess = tf.Session()
+sess.run(init)
+
+# train
+while True:
+
+    # train model
+    x_train, price_batch = get_data_batch([input_train, price_train], batch_size)
+    _, objective_train, sig = sess.run([train_step, objective, y_], feed_dict={x: x_train, price_h: price_batch})
+
+    # keep track of stuff
+    step += 1
+    if step % 100 == 0 or step == 1:
+
+        # get y_ predictions
+        y_train_pred = sess.run(y_, feed_dict={x: input_train})
+        y_test_pred, objective_test = sess.run([y_, objective], feed_dict={x: input_test, price_h: price_test})
+        y_cv_pred = sess.run(y_, feed_dict={x: input_cv})
+
+        # get portfolio value
+        value_train = 1 + np.cumsum(np.sum(y_train_pred[:-1] * [1., -1., 0.] * price_train[1:], axis=1))
+        value_test = 1 + np.cumsum(np.sum(y_test_pred[:-1] * [1., -1., 0.] * price_test[1:], axis=1))
+        value_cv = 1 + np.cumsum(np.sum(y_cv_pred[:-1] * [1., -1., 0.] * price_cv[1:], axis=1))
+
+        # save history
+        step_hist.append(step)
+        objective_hist_train.append(objective_train)
+        objective_hist_test.append(objective_test)
+        value_hist_train.append(value_train[-1])
+        value_hist_test.append(value_test[-1])
+        value_hist_cv.append(value_cv[-1])
+        value_hist_cv_ma.append(np.mean(value_hist_cv[-value_cv_moving_average:]))
+
+        print('Step {}: train {:.4f}, test {:.4f}'.format(step, objective_train, objective_test))
+
+        if plotting:
+
+            pl.figure(1, figsize=(3, 7), dpi=80, facecolor='w', edgecolor='k')
+
+            pl.subplot(211)
+            pl.title('Objective function')
+            pl.plot(step_hist, objective_hist_train, color='darkorange', linewidth=0.3)
+            pl.plot(step_hist, objective_hist_test, color='dodgerblue', linewidth=0.3)
+
+            pl.subplot(212)
+            pl.title('Portfolio value')
+            pl.plot(step_hist, value_hist_train, color='darkorange', linewidth=0.3)
+            pl.plot(step_hist, value_hist_test, color='dodgerblue', linewidth=0.3)
+            pl.plot(step_hist, value_hist_cv, color='magenta', linewidth=1)
+            pl.plot(step_hist, value_hist_cv_ma, color='black', linewidth=0.5)
+            pl.pause(1e-10)
+
+        # save if some complicated rules
+        if saving:
+            current_score = 0 if value_test[-1] < 0.01 or value_cv[-1] < 0.01 \
+                else np.average([value_test[-1], value_cv[-1]])
+            saving_score = current_score if saving_score < current_score else saving_score
+            if saving_score == current_score and saving_score > 0.05:
+                saver.save(sess, 'saved_models/lstm-v2-avg_score{:.3f}'.format(current_score), global_step=step)
+                print('Model saved. Average score: {:.2f}'.format(current_score))
+
+                pl.figure(2)
+                pl.plot(value_test, linewidth=0.2)
+                pl.plot(value_cv, linewidth=2)
+                pl.pause(1e-10)
+