Skip to content

Commit fac2c5c

Browse files
committed
preparing for cran release to comply with tm changes
1 parent 9cad228 commit fac2c5c

12 files changed

+127
-171
lines changed

.Rbuildignore

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ vignettes/qdap_vignette.html
2424
vignettes/tm_package_compatibility-concordance.tex
2525
vignettes/tm_package_compatibility.bbl
2626
vignettes/tm_package_compatibility.log
27+
2728
vignettes/tm_package_compatibility.tex
2829
vignettes/tm_package_compatibility.synctex.gz
2930
vignettes/MASTER.bib.bak

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#################
22
## Eclipse
33
#################
4+
tm_package_compatibility-concordance.tex
45
*.pydevproject
56
.project
67
.metadata

DESCRIPTION

+19-16
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,29 @@ Package: qdap
22
Type: Package
33
Title: Bridging the Gap Between Qualitative Data and Quantitative Analysis
44
Version: 2.2.9
5-
Date: 2017-09-04
6-
Authors@R: c(person("Bryan", "Goodrich", role = "ctb"), person("Dason", "Kurkiewicz", role = "ctb"),
7-
person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", "cre")))
5+
Date: 2017-11-07
6+
Authors@R: c(person("Bryan", "Goodrich", role = "ctb"), person("Dason", "Kurkiewicz", role =
7+
"ctb"), person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut",
8+
"cre")))
89
Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
9-
Depends: R (>= 3.1.0), qdapDictionaries (>= 1.0.2), qdapRegex (>= 0.1.2), qdapTools (>= 1.3.1),
10-
RColorBrewer
11-
Imports: chron, dplyr (>= 0.3), gdata, gender (>= 0.5.1), ggplot2 (>= 2.1.0), grid, gridExtra, igraph,
12-
methods, NLP, openNLP (>= 0.2-1), parallel, plotrix, RCurl, reports, reshape2, scales,
13-
stringdist, tidyr, tm (>= 0.6.2), tools, venneuler, wordcloud, xlsx, XML
10+
Depends: R (>= 3.1.0), qdapDictionaries (>= 1.0.2), qdapRegex (>= 0.1.2), qdapTools (>=
11+
1.3.1), RColorBrewer
12+
Imports: chron, dplyr (>= 0.3), gdata, gender (>= 0.5.1), ggplot2 (>= 2.1.0), grid, gridExtra,
13+
igraph, methods, NLP, openNLP (>= 0.2-1), parallel, plotrix, RCurl, reports,
14+
reshape2, scales, stringdist, tidyr, tm (>= 0.7.2), tools, venneuler, wordcloud,
15+
xlsx, XML
1416
Suggests: koRpus, knitr, lda, proxy, stringi, SnowballC, testthat
1517
LazyData: TRUE
1618
VignetteBuilder: knitr
17-
Description: Automates many of the tasks associated with quantitative discourse analysis of transcripts
18-
containing discourse including frequency counts of sentence types, words, sentences, turns of
19-
talk, syllables and other assorted analysis tasks. The package provides parsing tools for
20-
preparing transcript data. Many functions enable the user to aggregate data by any number of
21-
grouping variables, providing analysis and seamless integration with other R packages that
22-
undertake higher level analysis and visualization of text. This affords the user a more
23-
efficient and targeted analysis. 'qdap' is designed for transcript analysis, however, many
24-
functions are applicable to other areas of Text Mining/ Natural Language Processing.
19+
Description: Automates many of the tasks associated with quantitative discourse analysis of
20+
transcripts containing discourse including frequency counts of sentence types,
21+
words, sentences, turns of talk, syllables and other assorted analysis tasks. The
22+
package provides parsing tools for preparing transcript data. Many functions enable
23+
the user to aggregate data by any number of grouping variables, providing analysis
24+
and seamless integration with other R packages that undertake higher level analysis
25+
and visualization of text. This affords the user a more efficient and targeted
26+
analysis. 'qdap' is designed for transcript analysis, however, many functions are
27+
applicable to other areas of Text Mining/ Natural Language Processing.
2528
License: GPL-2
2629
URL: http://trinker.github.com/qdap/
2730
BugReports: http://github.com/trinker/qdap/issues

INSTALL

-31
This file was deleted.

NEWS

-6
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,7 @@ BUG FIXES
3636
to factor by default and `nchar` no longer works on factor. This was caught
3737
by @karilint See issue #225
3838

39-
NEW FEATURES
40-
41-
MINOR FEATURES
4239

43-
IMPROVEMENTS
44-
45-
CHANGES
4640

4741

4842

R/as.tdm.R

+34-45
Original file line numberDiff line numberDiff line change
@@ -545,35 +545,22 @@ wfm2xtab <- function(text.var, grouping.var = NULL, ...) {
545545
#' @export
546546
#' @importFrom qdapTools list2df
547547
#' @method as.data.frame Corpus
548-
as.data.frame.Corpus <- function(x, row.names, optional, ..., doc = "docs",
548+
as.data.frame.Corpus <- function(x, row.names, optional, ..., doc = "doc_id",
549549
text = "text", sent.split = FALSE) {
550550

551551
if(!methods::is(x[[1]], "PlainTextDocument")) {
552552
x <- tm::tm_map(x, PlainTextDocument)
553553
}
554554

555555
qpaste <- function(x) paste(as.character(x), collapse = " ")
556-
out <- qdapTools::list2df(lapply(x, qpaste), col1 = text, col2 = doc)[, 2:1]
557556

558-
metadat <- NLP::meta(x)
559-
560-
if (all(unlist(lapply(NLP::meta(x, tag = "labels"), is.null)))){
561-
NLP::meta(x, tag = "labels") <- paste("doc", qdapTools::pad(seq_len(length(x))))
562-
metadat <- NLP::meta(x)
563-
}
564-
565-
if(!is.null(metadat[["labels"]])) {
566-
out[[1]] <- metadat[["labels"]]
567-
if (!is.null(metadat[["MetaID"]]) && all.equal(metadat[["labels"]], metadat[["MetaID"]])){
568-
metadat[["MetaID"]] <- NULL
569-
}
570-
colnames(metadat)[colnames(metadat) == "labels"] <- doc
571-
}
557+
out <- data.frame(
558+
qdapTools::list2df(lapply(x, qpaste), col1 = text, col2 = doc)[, 2:1],
559+
NLP::meta(x),
560+
stringsAsFactors = FALSE,
561+
check.names = FALSE
562+
)
572563

573-
if (ncol(metadat) > 1 && all(out[[1]] %in% metadat[[1]])) {
574-
colnames(metadat)[1] <- doc
575-
out <- key_merge(out, metadat)
576-
}
577564

578565
if (sent.split) {
579566
if(any(end_mark(out[["text"]]) == "_")) {
@@ -687,16 +674,16 @@ as.Corpus.default <- function(text.var, grouping.var = NULL, demographic.vars,
687674
LST <- sapply(split(DF[, "text.var"], DF[, "grouping"]),
688675
paste, collapse = " ")
689676
# LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
690-
LST_DF <- qdapTools::list2df(LST, "text.var", "id")
691-
677+
LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
678+
692679
# ## Use the tm package to convert to a Corpus
693680
# mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF),
694681
# readerControl=list(reader=qdap_tm_reader))
695-
mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
682+
mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
696683

697684
## Add metadata info
698-
NLP::meta(mycorpus, "MetaID") <- names(LST)
699-
NLP::meta(mycorpus, "labels") <- names(LST)
685+
NLP::meta(mycorpus, "MetaID") <- names(LST) #removed 11-12-2017
686+
NLP::meta(mycorpus, "labels") <- names(LST) #removed 11-12-2017
700687
pers <- unname(Sys.info()["user"])
701688
if (!is.null(pers)) {
702689
tm::DublinCore(mycorpus, tag = "creator") <- pers
@@ -908,14 +895,14 @@ apply_as_df <- function(tm.corpus, qdapfun, ..., stopwords = NULL,
908895
if (group.null) {
909896
with(dat, qdapfun(text.var = text, tot = tot, ...))
910897
} else {
911-
with(dat, qdapfun(text.var = text, grouping.var = docs, tot = tot, ...))
898+
with(dat, qdapfun(text.var = text, grouping.var = doc_id, tot = tot, ...))
912899
}
913900
} else {
914901
if (any(theargs %in% "grouping.var")) {
915902
if (group.null) {
916903
with(dat, qdapfun(text.var = text, ...))
917904
} else {
918-
with(dat, qdapfun(text.var = text, grouping.var = docs, ...))
905+
with(dat, qdapfun(text.var = text, grouping.var = doc_id, ...))
919906
}
920907
} else {
921908
with(dat, qdapfun(text.var = text, ...))
@@ -1009,10 +996,10 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
1009996
# readerControl=list(reader=qdap_tm_reader))
1010997
#
1011998
## Updated approach per tm changes 8/16/2017
1012-
1013-
LST_DF <- qdapTools::list2df(LST, "text.var", "id")
1014-
mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
1015-
999+
1000+
LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
1001+
##mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
1002+
mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
10161003

10171004
## Add metadata info
10181005
NLP::meta(mycorpus, "MetaID") <- names(LST)
@@ -1047,15 +1034,16 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
10471034

10481035
}
10491036

1050-
replace_ids <- function(corpus, ids){
10511037

1052-
stopifnot(length(corpus$content) == length(ids))
1053-
corpus$content <- Map(function(x, y) {
1054-
x$meta$id <- y
1055-
x
1056-
}, corpus$content, ids)
1057-
corpus
1058-
}
1038+
# replace_ids <- function(corpus, ids){
1039+
1040+
# stopifnot(length(corpus$content) == length(ids))
1041+
# corpus$content <- Map(function(x, y) {
1042+
# x$meta$id <- y
1043+
# x
1044+
# }, corpus$content, ids)
1045+
# corpus
1046+
# }
10591047

10601048
tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
10611049
apostrophe.remove, ...){
@@ -1100,10 +1088,11 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
11001088
# ## Use the tm package to convert to a Corpus
11011089
# mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF),
11021090
# readerControl=list(reader=qdap_tm_reader))
1103-
1104-
LST_DF <- qdapTools::list2df(LST, "text.var", "id")
1105-
mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
1106-
1091+
1092+
LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
1093+
## mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['doc_id']])
1094+
mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
1095+
11071096
## Add metadata info
11081097
NLP::meta(mycorpus, "MetaID") <- names(LST)
11091098
NLP::meta(mycorpus, "labels") <- names(LST)
@@ -1143,12 +1132,12 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
11431132
#' @method as.Corpus TermDocumentMatrix
11441133
as.Corpus.TermDocumentMatrix <- function(text.var, ...){
11451134

1146-
LST_DF <- qdapTools::list2df(mat2word_list(text.var), "text.var", "di")
1135+
LST_DF <- qdapTools::list2df(mat2word_list(text.var), "text", "doc_id")
11471136

11481137
## Use the tm package to convert to a Corpus
11491138
# tm::VCorpus(tm::DataframeSource(LST_DF),
11501139
# readerControl=list(reader=qdap_tm_reader))
1151-
replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
1140+
tm::Corpus(tm::DataframeSource(LST_DF))
11521141

11531142
}
11541143

R/wfm.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -352,12 +352,12 @@ tm_tdm_interface <- function(text.var, grouping.var, stopwords, char2space,
352352
# mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF),
353353
# readerControl=list(reader=qdap_tm_reader))
354354

355-
LST_DF <- qdapTools::list2df(LST, "text.var", "id")
355+
LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
356356

357357
# ## Use the tm package to convert to a Corpus
358358
# mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF),
359359
# readerControl=list(reader=qdap_tm_reader))
360-
mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
360+
mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
361361

362362
## Add metadata info
363363
NLP::meta(mycorpus, "MetaID") <- names(LST)

man/as.tdm.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-as.tdm.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
context("Checking as.tdm")
2-
2+
# expect_true <- isTRUE; expect_equivalent <- all.equal
33

44
test_that("as.tdm, as.dtm, as.DocumentTermMatrix, and as.TermDocumentMatrix
55
convert wfm, character and existing TermDocumentMatrix/DocumentTermMatrix",{
@@ -56,7 +56,7 @@ test_that("as.Corpus and as.data.frame methods work to convert between qadp/tm",
5656
demographic=DATA[, qcv(sex, adult, code)])
5757
expect_true(inherits(z, "Corpus"))
5858
expect_true(is.data.frame(as.data.frame(z)))
59-
expect_true(all(dim(as.data.frame(z)) == c(5, 5)))
59+
expect_true(all(dim(as.data.frame(z)) == c(5, 7)))
6060

6161
})
6262

0 commit comments

Comments
 (0)