@@ -545,35 +545,22 @@ wfm2xtab <- function(text.var, grouping.var = NULL, ...) {
545
545
# ' @export
546
546
# ' @importFrom qdapTools list2df
547
547
# ' @method as.data.frame Corpus
548
- as.data.frame.Corpus <- function (x , row.names , optional , ... , doc = " docs " ,
548
+ as.data.frame.Corpus <- function (x , row.names , optional , ... , doc = " doc_id " ,
549
549
text = " text" , sent.split = FALSE ) {
550
550
551
551
if (! methods :: is(x [[1 ]], " PlainTextDocument" )) {
552
552
x <- tm :: tm_map(x , PlainTextDocument )
553
553
}
554
554
555
555
qpaste <- function (x ) paste(as.character(x ), collapse = " " )
556
- out <- qdapTools :: list2df(lapply(x , qpaste ), col1 = text , col2 = doc )[, 2 : 1 ]
557
556
558
- metadat <- NLP :: meta(x )
559
-
560
- if (all(unlist(lapply(NLP :: meta(x , tag = " labels" ), is.null )))){
561
- NLP :: meta(x , tag = " labels" ) <- paste(" doc" , qdapTools :: pad(seq_len(length(x ))))
562
- metadat <- NLP :: meta(x )
563
- }
564
-
565
- if (! is.null(metadat [[" labels" ]])) {
566
- out [[1 ]] <- metadat [[" labels" ]]
567
- if (! is.null(metadat [[" MetaID" ]]) && all.equal(metadat [[" labels" ]], metadat [[" MetaID" ]])){
568
- metadat [[" MetaID" ]] <- NULL
569
- }
570
- colnames(metadat )[colnames(metadat ) == " labels" ] <- doc
571
- }
557
+ out <- data.frame (
558
+ qdapTools :: list2df(lapply(x , qpaste ), col1 = text , col2 = doc )[, 2 : 1 ],
559
+ NLP :: meta(x ),
560
+ stringsAsFactors = FALSE ,
561
+ check.names = FALSE
562
+ )
572
563
573
- if (ncol(metadat ) > 1 && all(out [[1 ]] %in% metadat [[1 ]])) {
574
- colnames(metadat )[1 ] <- doc
575
- out <- key_merge(out , metadat )
576
- }
577
564
578
565
if (sent.split ) {
579
566
if (any(end_mark(out [[" text" ]]) == " _" )) {
@@ -687,16 +674,16 @@ as.Corpus.default <- function(text.var, grouping.var = NULL, demographic.vars,
687
674
LST <- sapply(split(DF [, " text.var" ], DF [, " grouping" ]),
688
675
paste , collapse = " " )
689
676
# LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
690
- LST_DF <- qdapTools :: list2df(LST , " text.var " , " id " )
691
-
677
+ LST_DF <- qdapTools :: list2df(LST , " text" , " doc_id " )
678
+
692
679
# ## Use the tm package to convert to a Corpus
693
680
# mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF),
694
681
# readerControl=list(reader=qdap_tm_reader))
695
- mycorpus <- replace_ids( tm :: Corpus(tm :: DataframeSource(LST_DF )), LST_DF [[ ' id ' ]] )
682
+ mycorpus <- tm :: Corpus(tm :: DataframeSource(LST_DF ))
696
683
697
684
# # Add metadata info
698
- NLP :: meta(mycorpus , " MetaID" ) <- names(LST )
699
- NLP :: meta(mycorpus , " labels" ) <- names(LST )
685
+ NLP :: meta(mycorpus , " MetaID" ) <- names(LST ) # removed 11-12-2017
686
+ NLP :: meta(mycorpus , " labels" ) <- names(LST ) # removed 11-12-2017
700
687
pers <- unname(Sys.info()[" user" ])
701
688
if (! is.null(pers )) {
702
689
tm :: DublinCore(mycorpus , tag = " creator" ) <- pers
@@ -908,14 +895,14 @@ apply_as_df <- function(tm.corpus, qdapfun, ..., stopwords = NULL,
908
895
if (group.null ) {
909
896
with(dat , qdapfun(text.var = text , tot = tot , ... ))
910
897
} else {
911
- with(dat , qdapfun(text.var = text , grouping.var = docs , tot = tot , ... ))
898
+ with(dat , qdapfun(text.var = text , grouping.var = doc_id , tot = tot , ... ))
912
899
}
913
900
} else {
914
901
if (any(theargs %in% " grouping.var" )) {
915
902
if (group.null ) {
916
903
with(dat , qdapfun(text.var = text , ... ))
917
904
} else {
918
- with(dat , qdapfun(text.var = text , grouping.var = docs , ... ))
905
+ with(dat , qdapfun(text.var = text , grouping.var = doc_id , ... ))
919
906
}
920
907
} else {
921
908
with(dat , qdapfun(text.var = text , ... ))
@@ -1009,10 +996,10 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
1009
996
# readerControl=list(reader=qdap_tm_reader))
1010
997
#
1011
998
# # Updated approach per tm changes 8/16/2017
1012
-
1013
- LST_DF <- qdapTools :: list2df(LST , " text.var " , " id " )
1014
- mycorpus <- replace_ids(tm :: Corpus(tm :: DataframeSource(LST_DF )), LST_DF [[' id' ]])
1015
-
999
+
1000
+ LST_DF <- qdapTools :: list2df(LST , " text" , " doc_id " )
1001
+ # # mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
1002
+ mycorpus <- tm :: Corpus( tm :: DataframeSource( LST_DF ))
1016
1003
1017
1004
# # Add metadata info
1018
1005
NLP :: meta(mycorpus , " MetaID" ) <- names(LST )
@@ -1047,15 +1034,16 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
1047
1034
1048
1035
}
1049
1036
1050
- replace_ids <- function (corpus , ids ){
1051
1037
1052
- stopifnot(length(corpus $ content ) == length(ids ))
1053
- corpus $ content <- Map(function (x , y ) {
1054
- x $ meta $ id <- y
1055
- x
1056
- }, corpus $ content , ids )
1057
- corpus
1058
- }
1038
+ # replace_ids <- function(corpus, ids){
1039
+
1040
+ # stopifnot(length(corpus$content) == length(ids))
1041
+ # corpus$content <- Map(function(x, y) {
1042
+ # x$meta$id <- y
1043
+ # x
1044
+ # }, corpus$content, ids)
1045
+ # corpus
1046
+ # }
1059
1047
1060
1048
tm_dtm_interface2 <- function (text.var , grouping.var , stopwords , char2space ,
1061
1049
apostrophe.remove , ... ){
@@ -1100,10 +1088,11 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
1100
1088
# ## Use the tm package to convert to a Corpus
1101
1089
# mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF),
1102
1090
# readerControl=list(reader=qdap_tm_reader))
1103
-
1104
- LST_DF <- qdapTools :: list2df(LST , " text.var" , " id" )
1105
- mycorpus <- replace_ids(tm :: Corpus(tm :: DataframeSource(LST_DF )), LST_DF [[' id' ]])
1106
-
1091
+
1092
+ LST_DF <- qdapTools :: list2df(LST , " text" , " doc_id" )
1093
+ # # mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['doc_id']])
1094
+ mycorpus <- tm :: Corpus(tm :: DataframeSource(LST_DF ))
1095
+
1107
1096
# # Add metadata info
1108
1097
NLP :: meta(mycorpus , " MetaID" ) <- names(LST )
1109
1098
NLP :: meta(mycorpus , " labels" ) <- names(LST )
@@ -1143,12 +1132,12 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
1143
1132
# ' @method as.Corpus TermDocumentMatrix
1144
1133
as.Corpus.TermDocumentMatrix <- function (text.var , ... ){
1145
1134
1146
- LST_DF <- qdapTools :: list2df(mat2word_list(text.var ), " text.var " , " di " )
1135
+ LST_DF <- qdapTools :: list2df(mat2word_list(text.var ), " text" , " doc_id " )
1147
1136
1148
1137
# # Use the tm package to convert to a Corpus
1149
1138
# tm::VCorpus(tm::DataframeSource(LST_DF),
1150
1139
# readerControl=list(reader=qdap_tm_reader))
1151
- replace_ids( tm :: Corpus(tm :: DataframeSource(LST_DF )), LST_DF [[ ' id ' ]] )
1140
+ tm :: Corpus(tm :: DataframeSource(LST_DF ))
1152
1141
1153
1142
}
1154
1143
0 commit comments