-
Notifications
You must be signed in to change notification settings - Fork 0
/
function_journey_dir_and_num_r_loop.R
186 lines (164 loc) · 10.4 KB
/
function_journey_dir_and_num_r_loop.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#write function version here
library(RPostgreSQL)
library(data.table)
library(lubridate)
#now make the for loop a function
assign_dir <- function(input_data_table, same_journey_mins, same_stage_mins, max_stage, min_stage){
# input data table must have the columns:
# tran_time POSIXlt, fare_stage int
#
# same_journey_mins
# sets how long away a transaction in the expected order must be
# before it is automatically excluded as being part of the same route
#
# same_stage_mins
# sets how long away a transaction must be from the current one
# before it is excluded from being considered part of the same
#
# min_stage and max_stage
# these are the minimum and maximum possible fare stage for the given bus route
# htey are used to calibrate the start and ends of journeys
# the highest and lowest stages can be ambiguous in terms of order
# as they are close and appear at the beginning and end of runs
# so at risk of being missassigned
#
#
# you need to make another version of this with varying amounts
# This funciont looks at transactions for a given machine on a given route, ordered by time
# by checking the fare stage information of each transaction
# against the one before as well as the 3 after it, it works out the direction of travel.
#
#
#make sure the data table is in the right order
input_data_table <- input_data_table[order(tran_time)]
#make the journey and stage window parameters
same_journey_window <- duration(same_journey_mins, units = "minutes")
same_stage_window <- duration(same_stage_mins, units = "minutes")
#add the new direction and journey number columns
input_data_table$new_dir <- "blank"
input_data_table$new_jorno <- 0
#now the for loop
for(i in 1:nrow(input_data_table)){
print(i)
#statement to assign direction and journey number
if((i==1)
&& ((((input_data_table$fare_stage[i+1]-input_data_table$fare_stage[i])>0)
&& (input_data_table$tran_time[i+1]<(input_data_table$tran_time[i]+same_journey_window)))
| ((input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+2]-input_data_table$fare_stage[i])>0)
&& (input_data_table$tran_time[i+2]<(input_data_table$tran_time[i]+same_journey_window)))
| ((input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& (input_data_table$fare_stage[i+2]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+3]-input_data_table$fare_stage[i])>0)
&& (input_data_table$tran_time[i+3]<(input_data_table$tran_time[i]+same_journey_window))))
){
#checks whether any of the next three points is increasing and within journey window
#if so assigns out
direction<- "out"
}else if((i==1)
&& ((((input_data_table$fare_stage[i+1]-input_data_table$fare_stage[i])<0)
&& (input_data_table$tran_time[i+1]<(input_data_table$tran_time[i]+same_journey_window)))
| ((input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+2]-input_data_table$fare_stage[i])<0)
&& (input_data_table$tran_time[i+2]<(input_data_table$tran_time[i]+same_journey_window)))
| ((input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& (input_data_table$fare_stage[i+2]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+3]-input_data_table$fare_stage[i])<0)
&& (input_data_table$tran_time[i+3]<(input_data_table$tran_time[i]+same_journey_window))))){
direction <- "in"
#like above but for decreasing
}else if((i==1)
&& (input_data_table$tran_time[i+1]>(input_data_table$tran_time[i]+same_journey_window))
&& (input_data_table$tran_time[i]>(input_data_table$tran_time[i-1]+same_journey_window))){
#checks if the transaction if too far from any other transaction to be compared
direction <- "unknown"
}else if((input_data_table$fare_stage[i]==min_stage && input_data_table$fare_stage[i-1]!=min_stage)
| (input_data_table$fare_stage[i]==max_stage && input_data_table$fare_stage[i-1]!=max_stage)
){
#if the transaction is at the minimum or maximum farestage
# don't know why the second part of this is in there about cheking that the previous point is not min/max stage
direction <- "unknown"
}else if(input_data_table$tran_time[i]==input_data_table$tran_time[i-1]){
#sanity check for a time error in sample set
#(transaction time rounding led to a lower fare stage being assinged the same time as a higher one or vice versa)
direction <- direction
}else if(((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])<(input_data_table$tran_time[i+1]-input_data_table$tran_time[i])) | (i==nrow(input_data_table)))
&& ((input_data_table$fare_stage[i]-input_data_table$fare_stage[i-1])>0)
&& (input_data_table$tran_time[i]<(input_data_table$tran_time[i-1]+same_journey_window))
&& (input_data_table$fare_stage[i]>1))
| ((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])>(input_data_table$tran_time[i+1]-input_data_table$tran_time[i])))
&& (i<nrow(input_data_table))
&& ((input_data_table$fare_stage[i+1]-input_data_table$fare_stage[i])>0)
&& (input_data_table$tran_time[i+1]<(input_data_table$tran_time[i]+same_journey_window)))
| ((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])>(input_data_table$tran_time[i+2]-input_data_table$tran_time[i])))
&& (i<(nrow(input_data_table)-1))
&& (input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+2]-input_data_table$fare_stage[i])>0)
&& (input_data_table$tran_time[i+2]<(input_data_table$tran_time[i]+same_journey_window)))
| ((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])>(input_data_table$tran_time[i+3]-input_data_table$tran_time[i])))
&& (i<(nrow(input_data_table)-2))
&& (input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& (input_data_table$fare_stage[i+2]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+3]-input_data_table$fare_stage[i])>0)
&& (input_data_table$tran_time[i+3]<(input_data_table$tran_time[i]+same_journey_window)))
| ((input_data_table$fare_stage[i]!=min_stage)
&& (input_data_table$fare_stage[i-1]==min_stage)
&& (input_data_table$tran_time[i]<(input_data_table$tran_time[i-1]+same_journey_window)))
){
#see if the direction is out
#compares against the point before and the 3 following points
#for relative positions in the fare stage
#turning this into a data.table commange would sort out the iterability of this
direction <- "out"
}else if(((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])<(input_data_table$tran_time[i+1]-input_data_table$tran_time[i])) | i==nrow(input_data_table))
&& ((input_data_table$fare_stage[i]-input_data_table$fare_stage[i-1])<0)
&& (input_data_table$tran_time[i]<(input_data_table$tran_time[i-1]+same_journey_window)))
| ((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])>(input_data_table$tran_time[i+1]-input_data_table$tran_time[i])))
&& (i<nrow(input_data_table))
&& ((input_data_table$fare_stage[i+1]-input_data_table$fare_stage[i])<0)
&& (input_data_table$tran_time[i+1]<(input_data_table$tran_time[i]+same_journey_window)))
| ((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])>(input_data_table$tran_time[i+2]-input_data_table$tran_time[i])))
&& (i<(nrow(input_data_table)-1))
&& (input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+2]-input_data_table$fare_stage[i])<0)
&& (input_data_table$tran_time[i+2]<(input_data_table$tran_time[i]+same_journey_window)))
| ((((input_data_table$tran_time[i]-input_data_table$tran_time[i-1])>(input_data_table$tran_time[i+3]-input_data_table$tran_time[i])))
&& (i<(nrow(input_data_table)-2))
&& (input_data_table$fare_stage[i+1]==input_data_table$fare_stage[i])
&& (input_data_table$fare_stage[i+2]==input_data_table$fare_stage[i])
&& ((input_data_table$fare_stage[i+3]-input_data_table$fare_stage[i])<0)
&& (input_data_table$tran_time[i+3]<(input_data_table$tran_time[i]+same_journey_window)))
| ((input_data_table$fare_stage[i]!=max_stage)
&& (input_data_table$fare_stage[i-1]==max_stage)
&& (input_data_table$tran_time[i]<(input_data_table$tran_time[i-1]+same_journey_window)))
){
#like above but for fare stages going down
direction <- "in"
}else if((input_data_table$fare_stage[i]==input_data_table$fare_stage[i-1])
&& (input_data_table$tran_time[i]>(input_data_table$tran_time[i-1]+same_stage_window))
&& (input_data_table$tran_time[i+1]>(input_data_table$tran_time[i]+same_stage_window))
){
#check that the transaction is within reasonable distance of the previous one
#if not assign unkown
direction <- "unknown"
}else if(((input_data_table$fare_stage[i]==input_data_table$fare_stage[i-1])
&& (input_data_table$tran_time[i]<(input_data_table$tran_time[i-1]+same_stage_window)))
){
#if the transaction is within the stage window of the previous one and
#none of the other transactions relate to it then assign same as previous
direction <- direction
}
#assign direction
input_data_table$new_dir[i] <- direction
#if direction is different from previous one add 1 to journey counter
if(i==1){
journey_counter <- 1
}else if(input_data_table$new_dir[i]!=input_data_table$new_dir[i-1]){
journey_counter <- journey_counter+1
}
#then statement to add these to the new columns
input_data_table$new_jorno[i] <- journey_counter
}
#return the result
return(input_data_table)
}