-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathREADME.Rmd
119 lines (88 loc) · 3.88 KB
/
README.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
---
output:
md_document:
variant: markdown_github
---
```{r, include=FALSE}
knitr::opts_chunk$set(comment = "#>", fig.path = "tools/")
```
# madoc.utils
This repository contains an R package for
- extracting data from Madoc
- uploading images and transcribing images using the Transkribus API
> 'Madoc' is an 'Omeka S' based platform for the display, enrichment, and curation of digital objects in 'IIIF' format. The platform can be used for all kinds of crowdsourcing activities in the domain of digital humanities.
### Installation
- For installing the development version of this package: `remotes::install_github("DIGI-VUB/madoc.utils")`
### Example on Transkribus
- Create a collection, upload a document, transcribe it
```{r, transkribus}
library(madoc.utils)
library(magick)
img <- c(system.file(package = "madoc.utils", "extdata", "example.png"),
system.file(package = "madoc.utils", "extdata", "alto-example.jpg"))
api <- Transkribus$new(user = "jan.wijffels@vub.ac.be", password = Sys.getenv("TRANSKRIBUS_PWD"))
msg <- api$create_collection(label = "test-collection")
msg <- api$list_collections()
msg <- api$list_models(collection = "test-collection")
msg <- api$upload(collection = "test-collection", document = "Example document", data = img)
msg <- api$layout(collection = "test-collection", document = "Example document")
msg <- api$transcribe(collection = "test-collection", document = "Example document", page = 1,
model = "IJsberg", dictionary = "Combined_Dutch_Model_M1.dict")
api$list_job(job = msg)
## Get all documents in a collection, get pages of a document
## Once your job finished, import the PageXML file
msg <- api$list_collection(collection = "test-collection")
msg <- api$list_document(collection = "test-collection", document = "Example document")
pages <- head(msg, n = 1)
x <- read_pagexml(pages$page_xml)
View(x)
image_read(pages$url)
## Delete the collection if no longer needed
msg <- api$delete_collection(collection = "test-collection")
```

### Example on Madoc
- Get transcriptions
```{r, data-extraction}
library(madoc.utils)
site <- "https://www.madoc.ugent.be/s/brugse-vrije"
## Get all projects on that madoc site
projects <- madoc_projects(site)
projects <- subset(projects, slug == "brugse-vrije-gebruikerstest")
projects
## Get all manifests and canvasses of a collection
manifests <- list()
manifests <- madoc_collection(site = site, id = projects$collection_id, tidy_metadata = TRUE)
canvasses <- madoc_manifest(site = site, id = manifests$manifest_id)
## Get annotations on a canvas or several canvasses
annotations <- madoc_canvas_model(site = site, id = canvasses$canvas_id)
anno <- subset(annotations, nchar(value) > 0)
## Get URL of canvas for which volunteers performed an annotation
images <- madoc_canvas_image(site = site, id = sort(unique(anno$canvas_id)))
images <- merge(images, canvasses, by = "canvas_id")
images <- images[, c("manifest_id", "canvas_id", "height", "width", "image_url")]
```
- Combine annotations with image url and manifest metadata
```{r, data-export}
anno <- merge(images, anno, by = "canvas_id", all.x = TRUE, all.y = FALSE)
anno <- merge(anno, manifests, by = "manifest_id", all.x = TRUE, all.y = FALSE, suffixes = c("", "_manifest"))
anno <- subset(anno, !is.na(value) & nchar(value) > 0)
str(anno)
library(writexl)
write_xlsx(anno, "brugse-vrije.xlsx")
```
- See the image of a canvas
```{r, img-example}
library(magick)
url <- anno$image_url[[1]]
url
img <- image_read(url)
img <- image_resize(img, "x800")
txt <- anno$value[[1]]
trans <- image_blank(width = image_info(img)$width, height = image_info(img)$height)
trans <- image_annotate(trans, txt, size = 10, color = "green")
image_append(c(img, trans))
```
### DIGI
By DIGI: Brussels Platform for Digital Humanities: https://digi.research.vub.be
