-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvisualize.R
More file actions
79 lines (67 loc) · 2.56 KB
/
visualize.R
File metadata and controls
79 lines (67 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
## Packages
library(readr)
library(dplyr)
library(ggplot2)
library(Kendall)
library(reshape2)
## Data
corpora <- read_csv('merged_results.tsv') %>% filter(Corpus_name!="grouptest")
corpora.melt <- melt(corpora
# %>% filter(Window==10000)
%>% select(-Window))
## Plots
get_os <- function(){
sysinf <- Sys.info()
if (!is.null(sysinf)){
os <- sysinf['sysname']
if (os == 'Darwin')
os <- "osx"
} else { ## mystery machine
os <- .Platform$OS.type
if (grepl("^darwin", R.version$os))
os <- "osx"
if (grepl("linux-gnu", R.version$os))
os <- "linux"
}
tolower(os)
}
os <- get_os()
if (os == 'linux') {
cairo_pdf('sttr-plots.pdf', onefile = TRUE, width = 16, heigh = 12)
} else if (os == 'osx') {
pdf('sttr-plots.pdf', onefile = TRUE, width = 16, heigh = 12)
}
date <- as.POSIXct(Sys.time())
for (measure in c('STTR', 'Yules_K')) {
g <- ggplot(corpora.melt %>% filter(variable==measure), aes(variable, value, color = Brow)) +
geom_boxplot() + geom_jitter(position=position_jitterdodge(), size=0.3) +
facet_grid(Type ~ Corpus_name, scales = 'free', space = 'fixed') +
ggtitle(paste('Results matrix:', measure, date))
print(g)
}
ggplot(corpora.melt %>% filter(variable=='Sent_len_mean') %>% filter(Type=='Tokenized'), aes(variable, value, color = Brow)) +
geom_boxplot() + geom_jitter(position=position_jitterdodge(), size=0.3) +
facet_grid(Type ~ Corpus_name, scales = 'free') +
ggtitle(paste('Results matrix: Mean sentence length (Tokenized)', date))
ggplot(corpora.melt %>% filter(Type=='Tokenized'), aes(variable, value, group = Corpus_name, color = Corpus_name)) +
geom_boxplot() + geom_jitter(position=position_jitterdodge(), size=0.3) +
facet_wrap(vars(variable), scales = 'free') +
ggtitle(paste('Results matrix: All (Tokenized)', date))
for (measure in c('STTR', 'Yules_K', 'Sent_len_mean')) {
for (corpus in unique(corpora.melt$Corpus_name)) {
d.authors <- corpora.melt %>%
filter(variable==measure) %>%
filter(Corpus_name==corpus) %>%
filter(!is.na(Author)) %>%
filter(!is.na(Brow))
if (nrow(d.authors) > 1) {
g <- ggplot(d.authors, aes(Author, value, color = Brow)) +
geom_boxplot() +
facet_wrap(~ Type, scales = 'free') +
theme(axis.text.x = element_text(angle = 40, hjust = 1, vjust = 1)) +
ggtitle(paste('By author:', measure, corpus, date))
print(g)
}
}
}
dev.off()