-
Notifications
You must be signed in to change notification settings - Fork 30
/
binary-Q1-archm.Rmd
465 lines (371 loc) · 17.8 KB
/
binary-Q1-archm.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
---
title: "<img src='www/binary-logo-resize.jpg' width='240'>"
subtitle: "[binary.com](https://github.com/englianhu/binary.com-interview-question) 面试试题 I - GARCH模型中的`ARCH in Mean`"
author: "[<span style='color:blue'>®γσ, Lian Hu</span>](https://englianhu.github.io/) <img src='www/ENG.jpg' width='24'> <img src='www/RYO.jpg' width='24'>®"
date: "`r lubridate::today('Asia/Tokyo')`"
output:
html_document:
number_sections: yes
toc: yes
toc_depth: 4
toc_float:
collapsed: yes
smooth_scroll: yes
code_folding: hide
---
# 简介
[binary.com 面试试题 I - GARCH模型中的`ARIMA(p,d,q)`参数最优化](https://rpubs.com/englianhu/binary-Q1FiGJRGARCH)添加了季节性和比较模型精准性。目前还测试下`archm=TRUE`是否会更精准,详情请参阅[[问答] 请问怎样用R语言产生arch, arch-m, garch, garch-m的随机数?](http://bbs.pinggu.org/forum.php?mod=redirect&goto=findpost&ptid=4108804&pid=40798375&fromuid=5794471)。
```{r warning=FALSE}
suppressPackageStartupMessages(require('BBmisc'))
## 读取程序包
pkg <- c('lubridate', 'plyr', 'dplyr', 'magrittr', 'stringr', 'rugarch', 'forecast', 'quantmod', 'memoise', 'microbenchmark', 'knitr', 'kableExtra', 'formattable')
suppressAll(lib(pkg))
funs <- c('task_progress.R') %>% paste0('function/', .)
l_ply(funs, source)
rm(pkg, funs)
```
# 数据
首先读取[Binary.com Interview Q1 (Extention)](http://rpubs.com/englianhu/binary-Q1E)的汇市数据。
```{r warning=FALSE}
cr_code <- c('AUDUSD=X', 'EURUSD=X', 'GBPUSD=X', 'CHF=X', 'CAD=X',
'CNY=X', 'JPY=X')
#'@ names(cr_code) <- c('AUDUSD', 'EURUSD', 'GBPUSD', 'USDCHF', 'USDCAD',
#'@ 'USDCNY', 'USDJPY')
names(cr_code) <- c('USDAUD', 'USDEUR', 'USDGBP', 'USDCHF', 'USDCAD', 'USDCNY', 'USDJPY')
price_type <- c('Op', 'Hi', 'Lo', 'Cl')
## 读取雅虎数据。
mbase <- sapply(names(cr_code), function(x) readRDS(paste0('./data/', x, '.rds')) %>% na.omit)
```
数据简介报告。
```{r warning=FALSE}
sapply(mbase, summary) %>%
kable %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
```
*桌面2.1:数据简介。*
# 统计建模
## ARCH in Mean
```{r warning=FALSE}
opt_arma <- function(mbase){
#ARMA Modeling minimum AIC value of `p,d,q`
fit <- auto.arima(mbase)
arimaorder(fit)
}
```
再来就设置`mean.model`模型中的参数为`archm = TRUE`。
```{r warning=FALSE}
calc_fx <- memoise(function(mbase, currency = 'JPY=X', ahead = 1, price = 'Cl') {
source('function/filterFX.R')
source('function/opt_arma.R')
mbase = suppressWarnings(filterFX(mbase, currency = currency, price = price))
armaOrder = opt_arma(mbase)
spec = ugarchspec(
variance.model = list(
model = 'gjrGARCH', garchOrder = c(1, 1),
submodel = NULL, external.regressors = NULL,
variance.targeting = FALSE),
mean.model = list(
armaOrder = armaOrder[c(1, 3)],
include.mean = TRUE, archm = TRUE,
archpow = 1, arfima = TRUE,
external.regressors = NULL,
archex = FALSE),
fixed.pars = list(arfima = armaOrder[2]),
distribution.model = 'snorm')
fit = ugarchfit(spec, mbase, solver = 'hybrid')
fc = ugarchforecast(fit, n.ahead = ahead)
res = tail(attributes(fc)$forecast$seriesFor, 1)
colnames(res) = names(mbase)
latestPrice = tail(mbase, 1)
latestPrice <- xts(latestPrice)
return(list(latestPrice = latestPrice, forecastPrice = res,
AIC = infocriteria(fit)))
})
```
# 模拟数据
## 回测数据
以下僕运行数据测试后事先储存,然后直接读取。首先过滤`timeID`时间参数,然后才模拟预测汇价。
```{r warning=FALSE}
timeID <- llply(mbase, function(x) as.character(index(x))) %>%
unlist %>% unique %>% as.Date %>% sort
timeID <- c(timeID, xts::last(timeID) + days(1))
timeID0 <- ymd('2013-01-01')
timeID <- timeID[timeID >= timeID0]
```
模拟`calc_fx()`函数预测汇价数据。
```{r eval=FALSE, warning=FALSE}
## ------------- 模拟calc_fx()预测汇价 ----------------------
pred3 <- list()
for (dt in timeID) {
for (i in seq(cr_code)) {
smp <- mbase[[names(cr_code)[i]]]
dtr <- xts::last(index(smp[index(smp) < dt]), 1)
smp <- smp[paste0(dtr %m-% years(1), '/', dtr)]
pred3[[i]] <- ldply(price_type, function(y) {
df = calc_fx(smp, currency = cr_code[i], price = y)
df = data.frame(Date = index(df[[1]][1]),
Type = paste0(names(df[[1]]), '.', y),
df[[1]], df[[2]], t(df[[3]]))
names(df)[4] %<>% str_replace_all('1', 'T+1')
df
})
if (!dir.exists(paste0('data/fx/', names(pred3[[i]])[3])))
dir.create(paste0('data/fx/', names(pred3[[i]])[3]))
saveRDS(pred3[[i]], paste0(
'data/fx/', names(pred3[[i]])[3], '/pred3.',
unique(pred3[[i]]$Date), '.rds'))
cat(paste0(
'data/fx/', names(pred3[[i]])[3], '/pred3.',
unique(pred3[[i]]$Date), '.rds saved!\n'))
}; rm(i)
}
```
## 查询进度
查询模拟测试进度的函数`task_progress()`如下。
```{r warning=FALSE, message=FALSE, eval=FALSE}
## ------------- 查询缺失文件 ----------------------
## 查询缺失文件。
dts <- sapply(mbase, function(x) {
y = index(x)
y[y >= timeID0]
})
#'@ sapply(mbase, function(x) as.character(index(x)) %>% as.Date %>% sort)
fls <- sapply(names(cr_code), function(x) {
fls <- list.files(paste0('./data/fx/', x), pattern = '^pred3') %>%
str_extract_all('[0-9]{4}-[0-9]{2}-[0-9]{2}') %>%
unlist %>% as.Date %>% sort
dts[[x]][!dts[[x]] %in% fls] %>% sort
})
#'@ timeID <- sapply(fls, function(x) timeID[!timeID %in% x] %>% sort)
#'@ timeID <- llply(fls, function(x) timeID[!timeID %in% x] %>% sort) %>% unlist %>% as.Date %>% sort
#'@ names(timeID) <- NULL
#'@ timeID %<>% unique
```
```{r eval=FALSE, warning=FALSE}
## ------------- 模拟calc_fx()预测汇价 ----------------------
pred3 <- list()
for (i in seq(cr_code)) {
timeIDi <- fls[[names(cr_code)[i]]]
for (dt in timeIDi) {
smp <- mbase[[names(cr_code)[i]]]
dtr <- xts::last(index(smp[index(smp) < dt]), 1)
smp <- smp[paste0(dtr %m-% years(1), '/', dtr)]
pred3[[i]] <- ldply(price_type, function(y) {
df = calc_fx(smp, currency = cr_code[i], price = y)
df = data.frame(Date = index(df[[1]][1]),
Type = paste0(names(df[[1]]), '.', y),
df[[1]], df[[2]], t(df[[3]]))
names(df)[4] %<>% str_replace_all('1', 'T+1')
df
})
if (!dir.exists(paste0('data/fx/', names(pred3[[i]])[3])))
dir.create(paste0('data/fx/', names(pred3[[i]])[3]))
saveRDS(pred3[[i]], paste0(
'data/fx/', names(pred3[[i]])[3], '/pred3.',
unique(pred3[[i]]$Date), '.rds'))
cat(paste0(
'data/fx/', names(pred3[[i]])[3], '/pred3.',
unique(pred3[[i]]$Date), '.rds saved!\n'))
}
}; rm(i)
```
模拟完毕后,再来就查看数据结果。
```{r warning=FALSE}
## calc_fx()模拟数据误差率
task_progress(.pattern = '^pred3', .loops = FALSE)
```
以上结果显示,模拟后的数据的误差率非常渺小^[一些数据模拟时,出现不知名错误。]。以下筛选`pred1`、`pred2`与`pred3`同样日期的有效数据。
```{r warning=FALSE}
##数据1
fx1 <- llply(names(cr_code), function(x) {
fls <- list.files(paste0('data/fx/', x), pattern = '^pred1')
dfm <- ldply(fls, function(y) {
readRDS(paste0('data/fx/', x, '/', y))
}) %>% data.frame(Cat = 'pred1', .) %>% tbl_df
names(dfm)[4:5] <- c('Price', 'Price.T1')
dfm
})
names(fx1) <- names(cr_code)
##数据2
fx2 <- llply(names(cr_code), function(x) {
fls <- list.files(paste0('data/fx/', x), pattern = '^pred2')
dfm <- ldply(fls, function(y) {
readRDS(paste0('data/fx/', x, '/', y))
}) %>% data.frame(Cat = 'pred2', .) %>% tbl_df
names(dfm)[4:5] <- c('Price', 'Price.T1')
dfm
})
names(fx2) <- names(cr_code)
##数据3
fx3 <- llply(names(cr_code), function(x) {
fls <- list.files(paste0('data/fx/', x), pattern = '^pred3')
dfm <- ldply(fls, function(y) {
readRDS(paste0('data/fx/', x, '/', y))
}) %>% data.frame(Cat = 'pred3', .) %>% tbl_df
names(dfm)[4:5] <- c('Price', 'Price.T1')
dfm
})
names(fx3) <- names(cr_code)
#合并,并且整理数据。
fx1 %<>% ldply %>% tbl_df
fx2 %<>% ldply %>% tbl_df
fx3 %<>% ldply %>% tbl_df
fx <- suppressAll(bind_rows(fx1, fx2, fx3) %>% arrange(Date) %>%
mutate(.id = factor(.id), Cat = factor(Cat)) %>%
ddply(.(Cat, Type), function(x) {
x %>% mutate(Price.T1 = lag(Price.T1, 1))
}) %>% tbl_df %>%
dplyr::filter(Date >= ymd('2013-01-01') & Date <= ymd('2017-08-30')))
rm(fx1, fx2, fx3)
```
```{r warning=FALSE}
## filter all predictive error where sd >= 20%.
notID <- fx %>% mutate(diff = abs(Price.T1/Price), se = ifelse(diff <= 0.8 | diff >= 1.25, 1, 0)) %>% dplyr::filter(se == 1)
ntimeID <- notID %>% .$Date %>% unique
notID %>%
kable(caption = 'Error data') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
```
僕尝试运行好几次,`USDCHF`都是获得同样的结果。然后将默认的`snorm`分布更换为`norm`就没有出现错误。至于`USDCNY`原始数据有误就不是统计模型的问题了。
```{r warning=FALSE}
## timeID which contains 3 prediction models.
utimeID <- fx %>%
ddply(.(Date), summarize,
n = n()) %>%
dplyr::filter(n == 84) %>%
tbl_df %>% .$Date
fx %<>% dplyr::filter(Date %in% utimeID, !Date %in% ntimeID)
```
## 精准度
现在就比较下双方的MSE值与AIC值。
```{r warning=FALSE}
acc <- ddply(fx, .(Cat, Type), summarise,
MSE = mean((Price.T1 - Price)^2, na.rm = TRUE),
n = length(Price),
Akaike.MSE = (-2*MSE)/n+2*4/n,
Akaike = mean(Akaike, na.rm = TRUE),
Bayes = mean(Bayes, na.rm = TRUE),
Shibata = mean(Shibata, na.rm = TRUE),
Hannan.Quinn = mean(Hannan.Quinn, na.rm = TRUE)) %>%
tbl_df %>% mutate(MSE = round(MSE, 6)) %>%
arrange(Type)
acc %>%
kable(caption = 'Group Table Summary') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
group_rows('USD/AUD Open', 1, 3, label_row_css = 'background-color: #e68a00; color: #fff;') %>%
group_rows('USD/AUD High', 4, 6, label_row_css = 'background-color: #e68a00; color: #fff;') %>%
group_rows('USD/AUD Low', 7, 9, label_row_css = 'background-color: #e68a00; color: #fff;') %>%
group_rows('USD/AUD Close', 10, 12, label_row_css = 'background-color: #e68a00; color: #fff;') %>%
group_rows('USD/EUR Open', 13, 15, label_row_css = 'background-color: #6666ff; color: #fff;') %>%
group_rows('USD/EUR High', 16, 18, label_row_css = 'background-color: #6666ff; color: #fff;') %>%
group_rows('USD/EUR Low', 19, 21, label_row_css = 'background-color:#6666ff; color: #fff;') %>%
group_rows('USD/EUR Close', 22, 24, label_row_css = 'background-color: #6666ff; color: #fff;') %>%
group_rows('USD/GBP Open', 25, 27, label_row_css = 'background-color: #339966; color: #fff;') %>%
group_rows('USD/GBP High', 28, 30, label_row_css = 'background-color: #339966; color: #fff;') %>%
group_rows('USD/GBP Low', 31, 33, label_row_css = 'background-color: #339966; color: #fff;') %>%
group_rows('USD/GBP Close', 34, 36, label_row_css = 'background-color: #339966; color: #fff;') %>%
group_rows('USD/CHF Open', 37, 39, label_row_css = 'background-color: #808000; color: #fff;') %>%
group_rows('USD/CHF High', 40, 42, label_row_css = 'background-color: #808000; color: #fff;') %>%
group_rows('USD/CHF Low', 43, 45, label_row_css = 'background-color: #808000; color: #fff;') %>%
group_rows('USD/CHF Close', 46, 48, label_row_css = 'background-color: #808000; color: #fff;') %>%
group_rows('USD/CAD Open', 49, 51, label_row_css = 'background-color: #666; color: #fff;') %>%
group_rows('USD/CAD High', 52, 54, label_row_css = 'background-color: #666; color: #fff;') %>%
group_rows('USD/CAD Low', 55, 57, label_row_css = 'background-color: #666; color: #fff;') %>%
group_rows('USD/CAD Close', 58, 60, label_row_css = 'background-color: #666; color: #fff;') %>%
group_rows('USD/CNY Open', 61, 63, label_row_css = 'background-color: #e60000; color: #fff;') %>%
group_rows('USD/CNY High', 64, 66, label_row_css = 'background-color: #e60000; color: #fff;') %>%
group_rows('USD/CNY Low', 67, 69, label_row_css = 'background-color: #e60000; color: #fff;') %>%
group_rows('USD/CNY Close', 70, 72, label_row_css = 'background-color: #e60000; color: #fff;') %>%
group_rows('USD/JPY Open', 73, 75, label_row_css = 'background-color: #ff3377; color: #fff;') %>%
group_rows('USD/JPY High', 76, 78, label_row_css = 'background-color: #ff3377; color: #fff;') %>%
group_rows('USD/JPY Low', 79, 81, label_row_css = 'background-color: #ff3377; color: #fff;') %>%
group_rows('USD/JPY Close', 82, 84, label_row_css = 'background-color: #ff3377; color: #fff;') %>%
scroll_box(width = '100%', height = '400px')
```
```{r warning=FALSE}
acc <- ddply(fx, .(Cat, .id), summarise,
MSE = mean((Price.T1 - Price)^2, na.rm = TRUE),
n = length(Price),
Akaike.MSE = (-2*MSE)/n+2*4/n,
Akaike = mean(Akaike, na.rm = TRUE),
Bayes = mean(Bayes, na.rm = TRUE),
Shibata = mean(Shibata, na.rm = TRUE),
Hannan.Quinn = mean(Hannan.Quinn, na.rm = TRUE)) %>%
tbl_df %>% mutate(MSE = round(MSE, 6)) %>%
arrange(.id)
acc %>%
kable(caption = 'Group Table Summary') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
group_rows('USD/AUD', 1, 3, label_row_css = 'background-color: #003399; color: #fff;') %>%
group_rows('USD/CAD', 4, 6, label_row_css = 'background-color: #003399; color: #fff;') %>%
group_rows('USD/CHF', 7, 9, label_row_css = 'background-color: #003399; color: #fff;') %>%
group_rows('USD/CNY', 10, 12, label_row_css = 'background-color: #003399; color: #fff;') %>%
group_rows('USD/EUR', 13, 15, label_row_css = 'background-color: #003399; color: #fff;') %>%
group_rows('USD/GBP', 16, 18, label_row_css = 'background-color: #003399; color: #fff;') %>%
group_rows('USD/JPY', 19, 21, label_row_css = 'background-color: #003399; color: #fff;') %>%
scroll_box(width = '100%', height = '400px')
```
```{r warning=FALSE}
acc <- ddply(fx, .(Cat), summarise,
MSE = mean((Price.T1 - Price)^2, na.rm = TRUE),
n = length(Price),
Akaike.MSE = (-2*MSE)/n+2*4/n,
Akaike = mean(Akaike, na.rm = TRUE),
Bayes = mean(Bayes, na.rm = TRUE),
Shibata = mean(Shibata, na.rm = TRUE),
Hannan.Quinn = mean(Hannan.Quinn, na.rm = TRUE)) %>%
tbl_df %>% mutate(MSE = round(MSE, 6))
acc %>%
kable(caption = 'Group Table Summary') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive'))
```
# 结论
结果证明`pred2`的`archm=FALSE`最为精准。目前正在编写着[Q1App2](https://beta.rstudioconnect.com/content/3138/)自动交易应用。“商场如战场”,除了模式最优化以外,程序运作上分秒必争... `microbenchmark`测试效率,之前编写了个[DataCollection](https://beta.rstudioconnect.com/content/3153/)应用采集实时数据以方便之后的高频率交易自动化建模^[不过数据量多就会当机,得继续提升才行。]。欲知更多详情,请参阅[Real Time FXCM](https://github.com/scibrokes/real-time-fxcm)。
# 附录
## 文件与系统资讯
以下乃此文献资讯:
- 文件建立日期:2018-10-14
- 文件最新更新日期:`r today('Asia/Tokyo')`
- `r R.version.string`
- R语言版本:`r getRversion()`
- [**rmarkdown** 程序包](https://github.com/rstudio/rmarkdown)版本:`r packageVersion('rmarkdown')`
- 文件版本:1.0.1
- 作者简历:[®γσ, Eng Lian Hu](https://beta.rstudioconnect.com/content/3091/ryo-eng.html)
- GitHub:[源代码](https://github.com/englianhu/binary.com-interview-question)
- 其它系统资讯:
```{r echo=FALSE, warning=FALSE, results='asis'}
suppressMessages(require('dplyr', quietly = TRUE))
suppressMessages(require('formattable', quietly = TRUE))
suppressMessages(require('knitr', quietly = TRUE))
suppressMessages(require('kableExtra', quietly = TRUE))
sys1 <- devtools::session_info()$platform %>%
unlist %>% data.frame(Category = names(.), session_info = .)
rownames(sys1) <- NULL
#sys1 %<>% rbind(., data.frame(
# Category = 'Current time',
# session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST'))) %>%
# dplyr::filter(Category != 'os')
sys2 <- data.frame(Sys.info()) %>% mutate(Category = rownames(.)) %>% .[2:1]
names(sys2)[2] <- c('Sys.info')
rownames(sys2) <- NULL
if (nrow(sys1) == 7 & nrow(sys2) == 8) {
sys1 %<>% rbind(., data.frame(
Category = 'Current time',
session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST')))
} else {
sys2 %<>% rbind(., data.frame(
Category = 'Current time',
Sys.info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST')))
}
cbind(sys1, sys2) %>%
kable(caption = 'Additional session information:') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive'))
rm(sys1, sys2)
```
## 参考文献
01. [[问答] 请问怎样用R语言产生arch, arch-m, garch, garch-m的随机数?](http://bbs.pinggu.org/forum.php?mod=redirect&goto=findpost&ptid=4108804&pid=40798375&fromuid=5794471)<img src='www/hot.jpg' width='20'>
02. [binary.com 面试试题 I - GARCH模型中的`ARIMA(p,d,q)`参数最优化](https://rpubs.com/englianhu/binary-Q1FiGJRGARCH)
---
<span style='color:RoyalBlue'>**Powered by - Copyright® Intellectual Property Rights of [<img src='www/scb_logo.jpg' width='64'>®](http://www.scibrokes.com)個人の経営企業**</span>