-
Notifications
You must be signed in to change notification settings - Fork 0
/
plots.Rmd
110 lines (87 loc) · 3.61 KB
/
plots.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
---
title: "Plots and statistics for the paper: Operationalizing borrowability: phonological segments as a case study"
author: "Elad Eisen, Steven Moran"
date: "(`r format(Sys.time(), '%d %B, %Y')`)"
output:
github_document: default
pdf_document: default
---
# Libraries
```{r, message=FALSE}
library(plyr)
library(rstatix)
library(tidyverse)
```
# Data
First, load the data.
```{r, message=FALSE}
df <- read_csv('probablistic_model/model_1_borrowability.csv')
df_w_features <- read_csv('probablistic_model/model_1_borrowability_w_features.csv')
```
# Statistics and plots
## Frequency of borrowing vs. typological frequency
```{r}
p <- ggplot(df, aes(x=PHOIBLE_frequency_relative, y=SEGBO_frequency_relative)) +
xlab("Typological frequency") + ylab("Frequency of borrowing") +
geom_text(label=df$Segment)+
geom_smooth(span=0.4, method="loess") +
theme_bw() +
theme(axis.text.x = element_text(margin = margin(t = 8))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)))
p
ggsave(file="figures/freq_scatter_loess.svg", plot=p, width=10, height=6)
```
```{r}
p <- ggplot(df, aes(x=PHOIBLE_frequency_relative, y=SEGBO_frequency_relative)) +
xlab("Typological frequency") +
ylab("Frequency of borrowing") +
geom_text(label=df$Segment) +
geom_smooth(span=0.4, method="loess", color="black", linewidth=0.2) +
theme_bw() +
theme(axis.text.x = element_text(margin = margin(t = 8))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)))
p
ggsave(file="figures/freq_scatter_loess_bw.svg", plot=p, width=10, height=6)
```
## Borrowability of consonants vs. vowels
```{r}
my_colors <- RColorBrewer::brewer.pal(6, "Paired")[c(1,6)]
type_median <- ddply(df_w_features, .(Type), numcolwise(median))
p <- ggplot(data=df_w_features, aes(x=Borrowability, group=Type, fill=Type)) +
scale_fill_manual(values = my_colors) +
geom_density(adjust=1.5, alpha=.4) +
geom_vline(data=type_median, aes(xintercept=Borrowability, linetype=Type), linewidth=0.5) +
ylab("Density") +
scale_linetype_manual(values=c("longdash", "dotted")) +
theme_bw() +
theme(axis.text.x = element_text(margin = margin(t = 8))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)))
p
ggsave(file="figures/consonants_vs_vowels.svg", plot=p, width=10, height=6)
```
```{r}
my_colors <- RColorBrewer::brewer.pal(6, "Paired")[c(1,6)]
type_median <- ddply(df_w_features, .(Type), numcolwise(median))
p <- ggplot(data=df_w_features, aes(x=Borrowability, group=Type, fill=Type)) +
scale_fill_grey(start = 0.8, end = 0.2) +
geom_density(adjust=1.5, alpha=.4) +
geom_vline(data=type_median, aes(xintercept=Borrowability, linetype=Type), linewidth=0.5) +
ylab("Density") +
scale_linetype_manual(values=c("dotted", "dashed")) +
theme_bw() +
theme(axis.text.x = element_text(margin = margin(t = 8))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)))
p
ggsave(file="figures/consonants_vs_vowels_bw.svg", plot=p, width=10, height=6)
```
Wilcoxon rank-sum test for whether consonants or vowels are statistically more significant when comparing mean scores.
```{r}
wilcox.test(Borrowability ~ Type, data = df_w_features, conf.int = T)
```
```{r}
wilcox_effsize(data=df_w_features, formula=Borrowability ~ Type)
```