-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1_download_corpora.R
35 lines (27 loc) · 1.28 KB
/
1_download_corpora.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Author: Jeremy Boyd (kenyonboyd@gmail.com)
# Summary: Downloads CHILDES corpora. Users can specify the corpora they want
# to download by editing the childes_URL and corpora variables.
# Zip files containing different CHILDES corpora are located on different
# download pages. We're interested in corpora consisting of North American
# (NA) English that include a MOR tier (i.e., a tier that has PoS tags),
# hence the "Eng-NA-MOR" part of the URL.
childes_URL <- "http://childes.psy.cmu.edu/data/Eng-NA-MOR/"
# Specify the different corpora we want. These are all corpora with
# typically developing children.
corpora <- c("Bliss", "Brown", "Carterette", "Gleason", "Hall", "Kuczaj",
"MacWhinney", "Warren")
# Create a directory to store corpus files in.
corpus_dir <- "~/Documents/studies/A-ADJ/AP/get_adj_uses/corpora/"
dir.create(corpus_dir)
# Loop over the different corpus names.
lapply(corpora, function(x) {
# Specify a temporary filename for a corpus zip file.
temp <- tempfile()
# Download the zip file.
download.file(url = paste(childes_URL, x, ".zip", sep = ""),
destfile = temp)
# Unzip from temp to corpus directory.
unzip(zipfile = temp, exdir = corpus_dir)
# Delete the temporary file.
unlink(temp)
})