-
Notifications
You must be signed in to change notification settings - Fork 0
/
r_dataset_details.Rmd
66 lines (62 loc) · 2.84 KB
/
r_dataset_details.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
---
title: "Details of datasets with inapplicable data"
author: "Martin R. Smith"
---
```{R setup, echo=FALSE, message=FALSE}
library(TreeSearch)
citations <- TreeSearch::inapplicable.citations
phyData <- TreeSearch::inapplicable.phyData
gitRoot <- "https://raw.githubusercontent.com/ms609/ExploreInapplicable/master/"
studies <- c(matrices = "Manually revised matrices",
inapplicable = "Inapplicable",
extraState = "Extra State",
ambigAbsent = "Ambiguous")
datasets <- lapply(names(citations), function (name) {
ReadCharacters(file=paste0('matrices/', name, '.nex'))
})
```
Nexus files were constructed for each analysis using a [Perl script](https://github.com/ms609/ExploreInapplicable/blob/master/analyse_matrices.pl) and can be browsed on GitHub: `r paste0('[', studies, '](https://github.com/ms609/ExploreInapplicable/tree/master/', names(studies), ')', collapse=' | ')`
```{r datasets, echo=FALSE, results="asis"}
CharacterType <- function (datasetName) {
vapply(readLines(paste0("charType/", datasetName, ".txt")),
substr, character(1), 1, 1, USE.NAMES=FALSE)
}
CharacterNotes <- function(datasetName) {
#notes <- vapply(readLines(paste0("charType/", datasetName, ".txt")), function (line) {
# substr(line, 3, nchar(line))
#}, character(1), USE.NAMES=FALSE)
notes <- readLines(paste0("charType/", datasetName, ".txt"))
if (datasetName == 'Aria2015') {
notes[23]
} else if (datasetName == 'Liljeblad2008') {
notes[42]
} else {
notes[grepl('\\]\\s*\\S+', notes, perl=TRUE)]
}
}
AboutDataset <- function (i) {
dataset <- phyData[[i]]
datasetName <- names(phyData)[i]
characterType <- CharacterType(datasetName)
nTaxa <- length(dataset)
nChar <- sum(attr(dataset, 'weight'))
inapps <- apply(datasets[[i]], 2, function (x) any(x == '-'))
cat(sprintf("\n\n### %s\n\n<!--Citation:-->%s\n\n - %s taxa, %s characters, of which %s contain inapplicable codings.\n\n - Download: %s\n\n - Inapplicable characters:\n\n - Transformational: %s\n\n - Neomorphic: %s\n\n%s\n\n",
datasetName, citations[i],
nTaxa, nChar, sum(characterType %in% c('N', 'I', 'T') & inapps),
paste0('[', studies, '](', gitRoot, names(studies), '/',
datasetName, '.nex)', collapse=' | '),
paste(which(characterType == 'T' & inapps), collapse=', '),
paste(paste(which(characterType == 'N' & inapps), collapse=', '),
if (any(characterType == 'I' & inapps)) '; with state 1 denoting absence:'
else '',
paste(which(characterType == 'I' & inapps), collapse=', '), collapse=''),
if (length(CharacterNotes(datasetName)) > 0) {
paste(c(' - Details:', CharacterNotes(datasetName)), collapse="\n\n - ")
} else ''
))
0
}
oi <- vapply(seq_along(citations), AboutDataset, 0)
invisible()
```