-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathupdate_all_bowling.R
56 lines (43 loc) · 1.71 KB
/
update_all_bowling.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
## See notes on updating batting
library(XML)
i<-1
i
yr<-2017
url<-paste("http://stats.espncricinfo.com/ci/engine/stats/index.html?class=11;page=",i,";spanmax2=31+Dec+",yr,";spanmin2=1+Jan+",yr,";spanval2=span;template=results;type=bowling;view=innings",sep="")
url
#
system.time(tables <-readHTMLTable(url, stringsAsFactors = F))
t <- tables$"Innings by innings list"
for (i in 2:220)
{
url<-paste("http://stats.espncricinfo.com/ci/engine/stats/index.html?class=11;page=",i,";spanmax2=31+Dec+",yr,";spanmin2=1+Jan+",yr,";spanval2=span;template=results;type=bowling;view=innings",sep="")
try(tables <-readHTMLTable(url, stringsAsFactors = F))
try(tt <- tables$"Innings by innings list")
try(t<-rbind(t,tt))
}
d<-t
library(lubridate)
library(ggplot2)
library(dplyr)
d$Player<-as.character(d$Player) ## Can leave the team name in
d$Country<- unlist(sub("\\).*", "", sub(".*\\(", "", d$Player)) ) ## Extract the country from the player's name
d$Date<-as.Date(d$"Start Date",format="%d %b %Y") ## Set up the date format
d$Year<-year(d$Date)
d$Day<-day(d$Date)
d$Month<-month(d$Date)
d$Yday<-yday(d$Date)
d$Overs<-as.numeric(as.character(d$Overs))
d$BPO<-6 ## A potential problem here, as all modern games have 6, so column missing
d$Mdns<-as.numeric(as.character(d$"Mdns"))
d$Runs<-as.numeric(as.character(d$"Runs"))
d$type<-d$Opposition
d$type<-sub(" v.*", "", d$type)
d$Opposition<- sub(".*v", "v", d$Opposition)
## The problem with the BPO column needs fixing as they don't match
write.csv(d,"all-bowling-2017.csv")
d1<-read.csv("all-bowling.csv")
d1<-d1[,-which(names(d1)=="BPO")] ## Remove the whole column
d1<-subset(d1,d1$Year<yr)
d2<-read.csv("all-bowling-2017.csv")
d2<-d2[,-which(names(d2)=="BPO")]
d<-rbind(d1,d2)