trinker
diff --git a/‎.Rbuildignore
+1 b/‎.Rbuildignore
+1
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎DESCRIPTION
+19-16 b/‎DESCRIPTION
+19-16
diff --git a/‎INSTALL
-31 b/‎INSTALL
-31
diff --git a/‎NEWS
-6 b/‎NEWS
-6
diff --git a/‎R/as.tdm.R
+34-45 b/‎R/as.tdm.R
+34-45
diff --git a/‎R/wfm.R
+2-2 b/‎R/wfm.R
+2-2
diff --git a/‎man/as.tdm.Rd
+1-1 b/‎man/as.tdm.Rd
+1-1
diff --git a/‎tests/testthat/test-as.tdm.R
+2-2 b/‎tests/testthat/test-as.tdm.R
+2-2
@@ -24,6 +24,7 @@ vignettes/qdap_vignette.html
 vignettes/tm_package_compatibility-concordance.tex
 vignettes/tm_package_compatibility.bbl
 vignettes/tm_package_compatibility.log
+
 vignettes/tm_package_compatibility.tex
 vignettes/tm_package_compatibility.synctex.gz
 vignettes/MASTER.bib.bak
 
@@ -1,6 +1,7 @@
 #################
 ## Eclipse
 #################
+tm_package_compatibility-concordance.tex
 *.pydevproject
 .project
 .metadata
 
@@ -2,26 +2,29 @@ Package: qdap
 Type: Package
 Title: Bridging the Gap Between Qualitative Data and Quantitative Analysis
 Version: 2.2.9
-Date: 2017-09-04
-Authors@R: c(person("Bryan", "Goodrich", role = "ctb"), person("Dason", "Kurkiewicz", role = "ctb"),
-            person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", "cre")))
+Date: 2017-11-07
+Authors@R: c(person("Bryan", "Goodrich", role = "ctb"), person("Dason", "Kurkiewicz", role =
+          "ctb"), person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut",
+          "cre")))
 Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
-Depends: R (>= 3.1.0), qdapDictionaries (>= 1.0.2), qdapRegex (>= 0.1.2), qdapTools (>= 1.3.1),
-            RColorBrewer
-Imports: chron, dplyr (>= 0.3), gdata, gender (>= 0.5.1), ggplot2 (>= 2.1.0), grid, gridExtra, igraph,
-            methods, NLP, openNLP (>= 0.2-1), parallel, plotrix, RCurl, reports, reshape2, scales,
-            stringdist, tidyr, tm (>= 0.6.2), tools, venneuler, wordcloud, xlsx, XML
+Depends: R (>= 3.1.0), qdapDictionaries (>= 1.0.2), qdapRegex (>= 0.1.2), qdapTools (>=
+          1.3.1), RColorBrewer
+Imports: chron, dplyr (>= 0.3), gdata, gender (>= 0.5.1), ggplot2 (>= 2.1.0), grid, gridExtra,
+          igraph, methods, NLP, openNLP (>= 0.2-1), parallel, plotrix, RCurl, reports,
+          reshape2, scales, stringdist, tidyr, tm (>= 0.7.2), tools, venneuler, wordcloud,
+          xlsx, XML
 Suggests: koRpus, knitr, lda, proxy, stringi, SnowballC, testthat
 LazyData: TRUE
 VignetteBuilder: knitr
-Description: Automates many of the tasks associated with quantitative discourse analysis of transcripts
-            containing discourse including frequency counts of sentence types, words, sentences, turns of
-            talk, syllables and other assorted analysis tasks. The package provides parsing tools for
-            preparing transcript data. Many functions enable the user to aggregate data by any number of
-            grouping variables, providing analysis and seamless integration with other R packages that
-            undertake higher level analysis and visualization of text. This affords the user a more
-            efficient and targeted analysis. 'qdap' is designed for transcript analysis, however, many
-            functions are applicable to other areas of Text Mining/ Natural Language Processing.
+Description: Automates many of the tasks associated with quantitative discourse analysis of
+          transcripts containing discourse including frequency counts of sentence types,
+          words, sentences, turns of talk, syllables and other assorted analysis tasks. The
+          package provides parsing tools for preparing transcript data. Many functions enable
+          the user to aggregate data by any number of grouping variables, providing analysis
+          and seamless integration with other R packages that undertake higher level analysis
+          and visualization of text. This affords the user a more efficient and targeted
+          analysis. 'qdap' is designed for transcript analysis, however, many functions are
+          applicable to other areas of Text Mining/ Natural Language Processing.
 License: GPL-2
 URL: http://trinker.github.com/qdap/
 BugReports: http://github.com/trinker/qdap/issues
 
@@ -36,13 +36,7 @@ BUG FIXES
   to factor by default and `nchar` no longer works on factor.  This was caught 
   by @karilint  See issue #225
 
-NEW FEATURES
-
-MINOR FEATURES
 
-IMPROVEMENTS
-
-CHANGES
 
 
 
 
@@ -545,35 +545,22 @@ wfm2xtab <- function(text.var, grouping.var = NULL, ...) {
 #' @export
 #' @importFrom qdapTools list2df
 #' @method as.data.frame Corpus
-as.data.frame.Corpus <- function(x, row.names, optional, ..., doc = "docs", 
+as.data.frame.Corpus <- function(x, row.names, optional, ..., doc = "doc_id", 
     text = "text", sent.split = FALSE) {
 
     if(!methods::is(x[[1]], "PlainTextDocument")) {
         x <- tm::tm_map(x, PlainTextDocument)
     }
 
     qpaste <- function(x) paste(as.character(x), collapse = " ")
-    out <- qdapTools::list2df(lapply(x, qpaste), col1 = text, col2 = doc)[, 2:1]
 
-    metadat <- NLP::meta(x)
-
-    if (all(unlist(lapply(NLP::meta(x, tag = "labels"), is.null)))){
-        NLP::meta(x, tag = "labels") <- paste("doc", qdapTools::pad(seq_len(length(x))))
-        metadat <- NLP::meta(x)
-    }
-
-    if(!is.null(metadat[["labels"]])) {
-        out[[1]] <- metadat[["labels"]]
-        if (!is.null(metadat[["MetaID"]]) && all.equal(metadat[["labels"]], metadat[["MetaID"]])){
-            metadat[["MetaID"]] <- NULL
-        }
-        colnames(metadat)[colnames(metadat) == "labels"] <- doc
-    }
+    out <- data.frame(
+        qdapTools::list2df(lapply(x, qpaste), col1 = text, col2 = doc)[, 2:1],
+        NLP::meta(x),
+        stringsAsFactors = FALSE,
+        check.names = FALSE
+    )
 
-    if (ncol(metadat) > 1 && all(out[[1]] %in% metadat[[1]])) {
-        colnames(metadat)[1] <- doc
-        out <- key_merge(out, metadat)
-    }
 
     if (sent.split) {
         if(any(end_mark(out[["text"]]) == "_")) {
@@ -687,16 +674,16 @@ as.Corpus.default <- function(text.var, grouping.var = NULL, demographic.vars,
     LST <- sapply(split(DF[, "text.var"], DF[, "grouping"]), 
         paste, collapse = " ")
     # LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
-    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
-    
+    LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
+   
     # ## Use the tm package to convert to a Corpus
     # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
     #     readerControl=list(reader=qdap_tm_reader))
-    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
+    mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
 
     ## Add metadata info
-    NLP::meta(mycorpus, "MetaID") <- names(LST)
-    NLP::meta(mycorpus, "labels") <- names(LST)
+    NLP::meta(mycorpus, "MetaID") <- names(LST) #removed 11-12-2017
+    NLP::meta(mycorpus, "labels") <- names(LST) #removed 11-12-2017
     pers <- unname(Sys.info()["user"])
     if (!is.null(pers)) {
         tm::DublinCore(mycorpus, tag = "creator") <- pers
@@ -908,14 +895,14 @@ apply_as_df <- function(tm.corpus, qdapfun, ..., stopwords = NULL,
         if (group.null) {
             with(dat, qdapfun(text.var = text, tot = tot, ...)) 
         } else {
-            with(dat, qdapfun(text.var = text, grouping.var = docs, tot = tot, ...))
+            with(dat, qdapfun(text.var = text, grouping.var = doc_id, tot = tot, ...))
         }
     } else {
         if (any(theargs %in% "grouping.var")) {
             if (group.null) {
                 with(dat, qdapfun(text.var = text, ...))
             } else {
-                with(dat, qdapfun(text.var = text, grouping.var = docs, ...))
+                with(dat, qdapfun(text.var = text, grouping.var = doc_id, ...))
             }
         } else {
             with(dat, qdapfun(text.var = text, ...))
@@ -1009,10 +996,10 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
     #     readerControl=list(reader=qdap_tm_reader))
     #
     ## Updated approach per tm changes 8/16/2017
-    
-    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
-    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
-    
+  
+    LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
+    ##mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
+    mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))  
 
     ## Add metadata info
     NLP::meta(mycorpus, "MetaID") <- names(LST)
@@ -1047,15 +1034,16 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
 
 }
 
-replace_ids <- function(corpus, ids){
 
-    stopifnot(length(corpus$content) == length(ids))
-    corpus$content <- Map(function(x, y) {
-        x$meta$id <- y
-        x
-    }, corpus$content, ids)
-    corpus
-}
+# replace_ids <- function(corpus, ids){
+
+#     stopifnot(length(corpus$content) == length(ids))
+#     corpus$content <- Map(function(x, y) {
+#         x$meta$id <- y
+#         x
+#     }, corpus$content, ids)
+#     corpus
+# }
 
 tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space, 
     apostrophe.remove, ...){
@@ -1100,10 +1088,11 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
     # ## Use the tm package to convert to a Corpus
     # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
     #     readerControl=list(reader=qdap_tm_reader))
-    
-    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
-    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
-    
+
+    LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
+    ## mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['doc_id']])
+    mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
+        
     ## Add metadata info
     NLP::meta(mycorpus, "MetaID") <- names(LST)
     NLP::meta(mycorpus, "labels") <- names(LST)
@@ -1143,12 +1132,12 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
 #' @method as.Corpus TermDocumentMatrix 
 as.Corpus.TermDocumentMatrix <- function(text.var, ...){
 
-    LST_DF <- qdapTools::list2df(mat2word_list(text.var), "text.var", "di")
+    LST_DF <- qdapTools::list2df(mat2word_list(text.var), "text", "doc_id")
 
     ## Use the tm package to convert to a Corpus
     # tm::VCorpus(tm::DataframeSource(LST_DF), 
     #     readerControl=list(reader=qdap_tm_reader))
-    replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
+    tm::Corpus(tm::DataframeSource(LST_DF))
 
 }
 
 
@@ -352,12 +352,12 @@ tm_tdm_interface <- function(text.var, grouping.var, stopwords, char2space,
     # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
     #     readerControl=list(reader=qdap_tm_reader))
 
-    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
+    LST_DF <- qdapTools::list2df(LST, "text", "doc_id")
 
     # ## Use the tm package to convert to a Corpus
     # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
     #     readerControl=list(reader=qdap_tm_reader))
-    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
+    mycorpus <- tm::Corpus(tm::DataframeSource(LST_DF))
 
     ## Add metadata info
     NLP::meta(mycorpus, "MetaID") <- names(LST)
 
@@ -1,5 +1,5 @@
 context("Checking as.tdm")
-
+# expect_true <- isTRUE; expect_equivalent <- all.equal
 
 test_that("as.tdm, as.dtm, as.DocumentTermMatrix, and as.TermDocumentMatrix 
     convert wfm, character and existing TermDocumentMatrix/DocumentTermMatrix",{
@@ -56,7 +56,7 @@ test_that("as.Corpus and as.data.frame methods work to convert between qadp/tm",
            demographic=DATA[, qcv(sex, adult, code)])
     expect_true(inherits(z, "Corpus"))
     expect_true(is.data.frame(as.data.frame(z)))
-    expect_true(all(dim(as.data.frame(z)) == c(5, 5)))
+    expect_true(all(dim(as.data.frame(z)) == c(5, 7)))
 
 })