library(quanteda)
## Package version: 2.0.1
## Parallel computing: 2 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(formattable)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(parallel)
library(regexSelect)
library(ggplot2)
library(ggthemes)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
#farben
frau <- "#70b7b6"
mann <- "#c8b446"
links <- "#ffc65c"
mitte <- "#f08f43"
rechts <- "#e06552"
alt <- "#472d30"
jung <- "#bf8891"
setwd("/Users/annameisser/Desktop/DATA/AAA")
load("NR_SR_Jan_Okt_2019.RData")
# nach Sprache filtern
NR_SR_Jan_Okt_2019 <- NR_SR_Jan_Okt_2019 %>% filter(Lang == "de") %>% filter(Candidate.Nationalrat == "1" | Candidate.Staenderat == "1")
NR_SR_Jan_Okt_2019$Text <- NR_SR_Jan_Okt_2019$Text %>% str_remove_all("http[:graph:]+")
NR_SR_Jan_Okt_2019$Alter <- NR_SR_Jan_Okt_2019$Age
NR_SR_Jan_Okt_2019$Alter <- as.numeric(NR_SR_Jan_Okt_2019$Alter)
NR_SR_Jan_Okt_2019$Alter <- car::recode(NR_SR_Jan_Okt_2019$Alter, "17:38 = 'jung'; 39:100 = 'alt'")
corpus_SP <- corpus(NR_SR_Jan_Okt_2019,text_field="Text")
#corpus_SP
Anzahl Tweets Frauen / Männer im Datensatz
### Total Mönner / Frauen im Datensatz für verhältnis vergleich
NR_klein <- NR_SR_Jan_Okt_2019 %>% select(c("Gender"))
library(reshape)
library(reshape2)
NR_klein$Gender <- car::recode(NR_klein$Gender, "'' = 'u'; 'NA'='u'")
NR_klein_f <- filter(NR_klein, Gender == "f")
NR_klein_f$case <- 1
NR_klein_f <- sum(NR_klein_f$case)
NR_klein_m <- filter(NR_klein, Gender == "m")
NR_klein_m$case <- 1
NR_klein_m <- sum(NR_klein_m$case)
cases <- c(NR_klein_f, NR_klein_m)
group <- c("f", "m")
tweet_cases <- as.data.frame(cases, group)
tweet_cases$group <- c("f", "m")
Anzahl Parteien im Datensatz
###### Total Parteien Tweets für Vergleich
NR_party <- NR_SR_Jan_Okt_2019 %>% select(c("Party_Short"))
NR_party$Party_Short <- car::recode(NR_party$Party_Short, "'' = 'u'; 'NA'='u'")
NR_party_glp <- filter(NR_party, Party_Short == "glp")
NR_party_glp$case <- 1
NR_party_glp <- sum(NR_party_glp$case)
NR_party_FDP <- filter(NR_party, Party_Short == "FDP")
NR_party_FDP$case <- 1
NR_party_FDP <- sum(NR_party_FDP$case)
NR_party_CVP <- filter(NR_party, Party_Short == "CVP")
NR_party_CVP$case <- 1
NR_party_CVP <- sum(NR_party_CVP$case)
NR_party_SP <- filter(NR_party, Party_Short == "SP")
NR_party_SP$case <- 1
NR_party_SP <- sum(NR_party_SP$case)
NR_party_Grüne <- filter(NR_party, Party_Short == "Grüne")
NR_party_Grüne$case <- 1
NR_party_Grüne <- sum(NR_party_Grüne$case)
NR_party_SVP <- filter(NR_party, Party_Short == "SVP")
NR_party_SVP$case <- 1
NR_party_SVP <- sum(NR_party_SVP$case)
NR_party_jglp <- filter(NR_party, Party_Short == "jglp")
NR_party_jglp$case <- 1
NR_party_jglp <- sum(NR_party_jglp$case)
NR_party_jf <- filter(NR_party, Party_Short == "jf")
NR_party_jf$case <- 1
NR_party_jf <- sum(NR_party_jf$case)
NR_party_BDP <- filter(NR_party, Party_Short == "BDP")
NR_party_BDP$case <- 1
NR_party_BDP <- sum(NR_party_BDP$case)
NR_party_JUSO <- filter(NR_party, Party_Short == "JUSO")
NR_party_JUSO$case <- 1
NR_party_JUSO <- sum(NR_party_JUSO$case)
NR_party_JCVP <- filter(NR_party, Party_Short == "JCVP")
NR_party_JCVP$case <- 1
NR_party_JCVP <- sum(NR_party_JCVP$case)
NR_party_EVP <- filter(NR_party, Party_Short == "EVP")
NR_party_EVP$case <- 1
NR_party_EVP <- sum(NR_party_EVP$case)
NR_party_JSVP <- filter(NR_party, Party_Short == "JSVP")
NR_party_JSVP$case <- 1
NR_party_JSVP <- sum(NR_party_JSVP$case)
NR_party_JG <- filter(NR_party, Party_Short == "JG")
NR_party_JG$case <- 1
NR_party_JG <- sum(NR_party_JG$case)
NR_party_Piraten <- filter(NR_party, Party_Short == "Piraten")
NR_party_Piraten$case <- 1
NR_party_Piraten <- sum(NR_party_Piraten$case)
NR_party_JBDP <- filter(NR_party, Party_Short == "JBDP")
NR_party_JBDP$case <- 1
NR_party_JBDP <- sum(NR_party_JBDP$case)
NR_party_jevp <- filter(NR_party, Party_Short == "jevp")
NR_party_jevp$case <- 1
NR_party_jevp <- sum(NR_party_jevp$case)
NR_party_up <- filter(NR_party, Party_Short == "up!")
NR_party_up$case <- 1
NR_party_up<- sum(NR_party_up$case)
NR_party_AL <- filter(NR_party, Party_Short == "AL ZH")
NR_party_AL$case <- 1
NR_party_AL <- sum(NR_party_AL$case)
cases <- c(NR_party_glp, NR_party_FDP, NR_party_CVP, NR_party_SP, NR_party_Grüne, NR_party_SVP, NR_party_jglp, NR_party_jf,
NR_party_BDP, NR_party_JUSO, NR_party_JCVP, NR_party_EVP, NR_party_JSVP, NR_party_JG, NR_party_Piraten, NR_party_JBDP,
NR_party_jevp, NR_party_up, NR_party_AL)
group <- c("glp", "FDP", "CVP", "SP", "Grüne", "SVP", "jglp", "jf", "BDP", "JUSO", "JCVP", "EVP", "JSVP", "JG", "Piraten", "JBDP", "jevp",
"up!", "AL ZH")
tweet_cases2 <- as.data.frame(cases, group)
tweet_cases2$group <- c("glp", "FDP", "CVP", "SP", "Grüne", "SVP", "jglp", "jf", "BDP", "JUSO", "JCVP", "EVP", "JSVP", "JG",
"Piraten", "JBDP", "jevp", "up!", "AL ZH")
Anzahl Alt / Jung
### alter Vergleich
NR_alter <- NR_SR_Jan_Okt_2019 %>% select(c("Age"))
NR_alter$Age <- car::recode(NR_alter$Age, "'' = 'u'; 'NA'='u'")
NR_alt <- filter(NR_alter, Age > 38)
NR_alt$case <- 1
NR_alt <- sum(NR_alt$case)
NR_jung <- filter(NR_alter, Age <= 38)
NR_jung$case <- 1
NR_jung <- sum(NR_jung$case)
cases <- c(NR_alt, NR_jung)
group <- c("alt", "jung")
tweet_cases3 <- as.data.frame(cases, group)
tweet_cases3$group <- c("alt", "jung")
Art <- as.character(c("Neutralisierung", "Doppelenennung", "Schrägstrich", "Klammerschreibweise", "Binnen-I", "Gendergap", "GenderX", "Genderstern"))
Beispiel <- as.character(c("die Studierenden", "Kollegen & Kolleginnen", "Kolleg/innen", "Kolleg(inn)en", "LehrerInnen", "Lehrer_Innen", "Lehrx", "Richter*innen" ))
Form <- as.factor(c("Neutralisierung", "Feminisierung", "Feminisierung", "Feminisierung", "Feminisierung", "Genderinklusiv", "Genderinklusiv", "Genderinklusiv"))
sprache <- as.data.frame(Art)
sprache$Beispiel <- Beispiel
sprache$Form <- Form
tabelle <- formattable(sprache, align=c("l", "l", "l"), list(area(col = Form ~ normalize_bar("red"))))
tabelle
Art | Beispiel | Form |
---|---|---|
Neutralisierung | die Studierenden | Neutralisierung |
Doppelenennung | Kollegen & Kolleginnen | Feminisierung |
Schrägstrich | Kolleg/innen | Feminisierung |
Klammerschreibweise | Kolleg(inn)en | Feminisierung |
Binnen-I | LehrerInnen | Feminisierung |
Gendergap | Lehrer_Innen | Genderinklusiv |
GenderX | Lehrx | Genderinklusiv |
Genderstern | Richter*innen | Genderinklusiv |
wer_twittert <- select(NR_SR_Jan_Okt_2019,Alter, Party_Short, Age, Gender, Source, Name)
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
Wer twittert Plots Gender
##############################3 Geschlecht ##############################
gender <- wer_twittert %>% group_by(Name, Gender) %>% select(Name) %>% summarise_all(sum) %>% na.omit()
geschlecht <- getmode(gender$Gender)
geschlecht_m <- filter(gender, Gender == "m")
geschlecht_f <- filter(gender, Gender == "f")
WT_geschlecht <- ggplot() + geom_bar(data = gender, mapping = aes(x = Gender), fill = alpha(c(frau, mann), .9)) + theme_tufte() + xlab("") + ylab("") + ggtitle("Anzahl Kandidat*innen") + theme(text = element_text(family = "Arial")) + scale_x_discrete(labels = c("Frauen", "Männer"))
gender2 <- wer_twittert %>% group_by(Gender, Status_id) %>% select(Status_id) %>% summarise_all(sum) %>% na.omit
geschlecht_m2 <- filter(gender2, Gender == "m")
geschlecht_f2 <- filter(gender2, Gender == "f")
WT_geschlecht2 <- ggplot() + geom_bar(data = gender2, mapping = aes(x = Gender), fill = alpha(c(frau, mann), .9)) + ylab("") + theme_tufte() + xlab("") + ggtitle("Anzahl Tweets") + theme(text = element_text(family = "Arial")) + scale_x_discrete(labels = c("Frauen", "Männer"))
plot_grid( WT_geschlecht, WT_geschlecht2)
Wer twittert Alter
############################################################################
############################# Alter #######################################
wer_twittert$Age <- as.numeric(wer_twittert$Age)
alter <- wer_twittert %>% group_by(Name, Age) %>% select(Name) %>% summarise_all(sum)
alter_mean <- mean(alter$Age, na.rm = TRUE)
alter_median <- median(alter$Age, na.rm = TRUE)
alter <- wer_twittert %>% group_by(Name, Alter) %>% select(Name) %>% summarise_all(sum) %>% na.omit
alter_alt <- filter(alter, Alter == "alt")
alter_jung <- filter(alter, Alter =="jung")
WT_alter <- ggplot() + geom_bar(data = alter, mapping = aes(x = Alter), fill = alpha(c(alt, jung), .9)) + ylab("") +theme_tufte() + xlab("") + ggtitle("Anzahl Kandidat*innen") + theme(text = element_text(family = "Arial")) + scale_x_discrete(labels = c("über 38", "unter 38"))
alter2 <- wer_twittert %>% group_by(Alter, Status_id) %>% select(Status_id) %>% summarise_all(sum) %>% na.omit
alter_alt2 <- filter(alter2, Alter == "alt")
alter_jung2 <- filter(alter2, Alter == "jung")
WT_alter2 <- ggplot() + geom_bar(data = alter2, mapping = aes(x = Alter), fill = alpha(c(alt, jung), .9)) + ylab("") +theme_tufte() + xlab("") + ggtitle("Anzahl Tweets") + theme(text = element_text(family = "Arial")) + scale_x_discrete(labels = c("über 38", "unter 38"))
plot_grid( WT_alter, WT_alter2)
Wer twittert Partei
###############################################################################
######################### Partei ##############################################
partei <- wer_twittert %>% group_by(Name, Party_Short) %>% select(Name) %>% summarise_all(sum) %>% na.omit
partei_mode <- getmode(partei$Party_Short)
partei <- within(partei,
Party_Short <- factor(Party_Short,
levels=names(sort(table(Party_Short),
decreasing=TRUE))))
head(partei$Party_Short)
## [1] FDP Piraten Piraten CVP SP Grüne
## 39 Levels: glp FDP CVP SP Grüne SVP jglp jf BDP JUSO JCVP EVP JSVP ... PUM
partei <- filter(partei, Party_Short %in% c("glp", "FDP", "CVP", "SP", "Grüne", "SVP", "jglp", "jf", "BDP", "JUSO"))
#Farben zu links / mitte / rechts
WT_partei <- ggplot() + geom_bar(data = partei, mapping = aes(x = Party_Short),
fill = c(mitte, rechts, mitte, links, links, rechts, mitte, rechts, rechts, links), bindwidth = .9, ) + theme_tufte() + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = alpha("orange3"), .5) +
xlab("") + ylab("") + ggtitle("Anzahl Kandidat*innen") + theme(text = element_text(family = "Arial"))
partei2 <- wer_twittert %>% group_by(Status_id, Party_Short) %>% select(Status_id) %>% summarise_all(sum) %>% na.omit
partei2 <- within(partei2,
Party_Short <- factor(Party_Short,
levels=names(sort(table(Party_Short),
decreasing=TRUE))))
head(partei2$Party_Short)
## [1] glp LOVB SP JCVP SVP jevp
## 39 Levels: CVP glp jglp up! FDP SVP SP Grüne EVP jf JSVP Piraten BDP ... PdA
partei2 <- filter(partei2, Party_Short %in% c("CVP", "glp", "jglp", "up!", "SVP", "FDP", "SP", "Grüne", "EVP", "jf"))
WT_partei2 <- ggplot() + geom_bar(data = partei2, mapping = aes(x = Party_Short),
fill = c(mitte, mitte, mitte, mitte, rechts, rechts, links, links, mitte, rechts), bindwidth = .9, ) + theme_tufte() + theme(axis.text.x = element_text(angle = 45)) + ggtitle("Anzahl Tweets") + theme(text = element_text(family = "Arial")) +
xlab("") + ylab("") + labs(fill = "Legend") +
scale_color_manual(name = "Links Rechts Einordnung der Partei", values = c(links, mitte, rechts), labels = c(links, mitte, rechts))
plot_grid(WT_partei, WT_partei2)
Für Reihenfolge braucht es urpsrüngliche Wortreihenfolge: in context / tokens sentences
quanteda_options(threads = detectCores())
RF_tokens <- tokens(corpus_SP) %>% tokens_tolower()
#RF_dfm <- dfm(RF_tokens)
dict_reihe <- dictionary(list(fm = c("kandidatin und kandidat","kandidatin & kanditat", "kandidatin&kandidat","kandidatin u. kandidat",
"kandidatin u kandidat","kandidatin / kandidat","kandidatin/kandidat",
"kandidatinnen und kandidaten", "kandidatinnen & kandidaten",
"kandidatinnen&kandidaten", "kandidatinnen u. kandidaten",
"kandidatinnen u kandidaten", "kandidatinnen / kandidaten", "kandidatinnen/kandidaten",
"wählerin und wähler", "wählerin & wähler",
"wählerin&wähler", "wählerin u. wähler",
"wählerin u wähler", "wählerin / wähler", "wählerin/wähler",
"wählerinnen und wähler", "wählerinnen & wähler",
"wählerinnen&wähler", "wählerinnen u. wähler",
"wählerinnen u wähler", "wählerinnen / wähler", "wählerinnen/wähler", "bürgerin und bürger",
"bürgerin & bürger", "bürgerin&bürger", "bürgerin u. bürger",
"bürgerin u bürger", "bürgerin / bürger", "bürgerin/bürger",
"bürgerinnen und bürger", "bürgerinnen & bürger",
"bürgerinnen&bürger", "bürgerinnen u. bürger",
"bürgerinnen u bürger", "bürgerinnen / bürger", "bürgerinnen/bürger"),
mf = c("kandidat und kandidatin","kandidat & kandidatin", "kandidat&kandidatin", "kandidat u. kandidatin",
"kandidat u kandidatin","kandidat / kandidatin", "kandidat/kandidatin",
"kandidaten und kandidatinnen", "kandidaten & kandidatinnen",
"kandidaten&kandidatinnen", "kandidaten u. kandidatinnen",
"kandidaten u kandidatinnen","kandidaten / kandidatinnen", "kandidaten/kandidatinnen",
"wähler und wählerin","wähler & wählerin",
"wähler&wählerin", "wähler u. wählerin",
"wähler u wählerin", "wähler / wählerin","wähler/wählerin",
"wähler und wählerinnen", "wähler & wählerinnen",
"wähler&wählerinnen","wähler u. wählerinnen",
"wähler u wählerinnen", "wähler / wählerinnen", "wähler/wählerinnen",
"bürger und bürgerin", "bürger & bürgerin",
"bürger&bürgerin", "bürger u. bürgerin",
"bürger u bürgerin", "bürger / bürgerin", "bürger/bürgerin",
"bürger und bürgerinnen", "bürger & bürgerinnen",
"bürger&bürgerinnen", "bürger u. bürgerinnen",
"bürger u bürgerinnen", "bürger / bürgerinnen", "bürger/bürgerinnen")))
Plot Frauen / Männer
# im Vergleich zu wie viele Frauen Twittern / wie viele Mönner Kandidaten twitteren?
dfm_reihe <- tokens_lookup(RF_tokens, dict_reihe) %>% dfm()
#reihe <- textstat_frequency(dfm_reihe)
reihe_GE <- textstat_frequency(dfm_reihe, group = "Gender")
reihe_GE$group <- car::recode(reihe_GE$group, "'' = 'u'; 'NA'='u'")
reihe_GE <- merge(tweet_cases, reihe_GE,by= "group")
# ohne relative
plot_GE1 <- ggplot() + geom_col(position = "dodge", data = reihe_GE, mapping = aes(x = feature, y = frequency, fill = group)) +
scale_fill_manual(values = alpha(c(frau, mann), .9)) +
theme_tufte() + ylab("") + xlab("") + scale_x_discrete(labels = c("Frau vor Mann", "Mann vor Frau")) +
theme(text = element_text(family = "Arial"))
plot_GE2 <- ggplot() + geom_col(position = "dodge", data = reihe_GE, mapping = aes(x = feature, y = frequency/cases, fill = group)) +
scale_fill_manual(values = alpha(c(frau, mann), .9)) +
theme_tufte() + scale_x_discrete(labels = c("Frau vor Mann", "Mann vor Frau")) +
theme(text = element_text(family = "Arial", size = 14)) + labs(fill = "Geschlecht", title = "Reihenfolge der Nennung", x = "", y = "")
plot_GE2
############### ist es besser mit cases - alle tweets oder nur tweets die eines der Wörter besprecheen?
quanteda_options(threads = detectCores())
HK_tokens <- tokens(corpus_SP, "word", remove_url=T, remove_numbers = T) %>% tokens_tolower()
HK_dfm <- dfm(HK_tokens)
dict_GE <- dictionary(list(mann = c("wähler", "kandidat", "kandidaten", "bürger", "nationalratskdandidat", "nationalratskandidaten" ), frau = c("wählerin", "wählerinnen", "kandidatin", "kandidatinnen", "bürgerin", "bürgerinnen", "nationalratskandidatin", "nationalratskandidatinnen")))
dfm_GE2 <- dfm_lookup(HK_dfm, dict_GE)
#########################################################################
# muss vorkomnisse in Verbindung zu einander abziehen um in einer Grafik darzustellen ohne
reihe_GEtotal <- textstat_frequency(dfm_reihe, group = "Gender")
abziehen_f <- reihe_GEtotal %>% filter(group == "f")
abziehen_f <- sum(abziehen_f$frequency)
abziehen_m <- reihe_GEtotal %>% filter(group == "m")
abziehen_m <- sum(abziehen_m$frequency)
abziehen_nicht <- 0
feature <- c("mf", "fm", "mann", "frau")
abziehen1 <- c(abziehen_nicht, abziehen_nicht, abziehen_m, abziehen_f)
abziehen <- as.data.frame(abziehen1, feature)
abziehen$feature <- feature
#######################################################################
reihe_GE2 <- textstat_frequency(dfm_GE2, group = "Gender")
reihe_GE2 <- merge(tweet_cases, reihe_GE2, by= "group")
reihe_GE2 <- select(reihe_GE2, group, cases, feature, frequency)
reihe_GE3 <- select(reihe_GE, group, cases, feature, frequency)
reihe_GE3 <- rbind(reihe_GE2, reihe_GE3)
reihe_GE3 <- merge(reihe_GE3, abziehen, by = "feature")
reihe_GE3$abziehen1 <- as.numeric(reihe_GE3$abziehen1)
reihe_GE3$frequency <- as.numeric(reihe_GE3$frequency)
reihe_GE3$V3 <- reihe_GE3$frequency-reihe_GE3$abziehen1
Plot im Vergleich zu Reihenfolge
reihe_GEplot <- reihe_GE3
reihe_GEplot$feature <- car::recode(reihe_GEplot$feature, "'fm' = 'Kombination Frau - Mann'; 'mf' = 'Kombination Mann - Frau'; 'frau' = 'nur Frau'; 'mann' = 'nur Mann'")
plot_GE2 <- ggplot() + geom_col(position ="dodge", data = reihe_GEplot, mapping = aes(x = feature, y = V3/cases, fill = group), width = .6) +
scale_fill_manual(values = alpha(c(frau, mann), .9)) +
theme_tufte() + theme(text = element_text(family = "Arial", size = 22)) + labs(fill = "Geschlecht", x = "", y = "", title = "Reihenfolge der Nennung im Vergleich zu Einzelnennungen") + coord_flip()
plot_GE2
## selbe mit Alter
######################################################################### dfm reihe ##############################
reihe_alter_total <- textstat_frequency(dfm_reihe, groups = "Age")
reihe_alter_total$group <- as.numeric(reihe_alter_total$group)
## Warning: NAs durch Umwandlung erzeugt
reihe_alter_total$group <- car::recode(reihe_alter_total$group, "17:38 = 'jung'; 39:100 = 'alt'")
abziehen_alt <- reihe_alter_total %>% filter(group == "alt")
abziehen_alt <- sum(abziehen_alt$frequency)
abziehen_jung <- reihe_alter_total %>% filter(group == "jung")
abziehen_jung <- sum(abziehen_jung$frequency)
abziehen_nicht <- 0
feature <- c("mf", "mf", "fm", "fm", "mann", "mann", "frau", "frau")
abziehen1 <- c(abziehen_nicht, abziehen_nicht, abziehen_nicht, abziehen_nicht, abziehen_alt, abziehen_jung, abziehen_alt, abziehen_jung)
abziehen <- as.data.frame(abziehen1, feature)
abziehen$feature <- c("mf", "mf", "fm", "fm", "mann", "mann", "frau", "frau")
abziehen$group <- c("alt", "jung", "alt", "jung", "alt", "jung", "alt", "jung")
reihe_alter_total$group <- car::recode(reihe_alter_total$group, "'' = 'u'; 'NA'='u'")
reihe_alter_total <- select(reihe_alter_total, group, frequency, feature)
#reihe_alt1 <- merge(tweet_cases3, reihe_alter_total,by= "group")
####################################################################### dfm_GE2 ###########################
reihe_alter <- textstat_frequency(dfm_GE2, group = "Alter")
reihe_alter$feature <- as.factor(reihe_alter$feature)
reihe_alter$frequency <- as.numeric(reihe_alter$frequency)
reihe_alter2 <- reihe_alter %>% select(group, frequency, feature)
#reihe_alter3 <- merge(tweet_cases3, reihe_alter2, by = "group")
######################################## merging ###########################################
reihe_alter2 <- merge(reihe_alter2, tweet_cases3, by = "group")
reihe_alter_total <- merge(reihe_alter_total, tweet_cases3, by = "group")
reihe_alt5 <- rbind(reihe_alter2, reihe_alter_total)
reihe_alt5 <- merge(reihe_alt5, abziehen, by = c("feature", "group"))
reihe_alt5$abziehen1 <- as.numeric(reihe_alt5$abziehen1)
reihe_alt5$frequency <- as.numeric(reihe_alt5$frequency)
reihe_alt5$V4 <- reihe_alt5$frequency-reihe_alt5$abziehen1
#######################################################################
Plot im Vergleich zu Reihenfolge
alt_plot <- reihe_alt5
alt_plot$feature <- car::recode(alt_plot$feature, "'fm' = 'Kombination Frau - Mann'; 'mf' = 'Kombination Mann - Frau'; 'frau' = 'nur Frau'; 'mann' = 'nur Mann'")
alt_plot1 <- ggplot() + geom_col(position = "dodge", data = alt_plot, mapping = aes(x = feature, y = V4/cases, fill = group), width = .6) +
scale_fill_manual(values = alpha(c(alt, jung), .9)) +
theme_tufte() + theme(text = element_text(family = "Arial", size = 22)) + labs(fill = "Alter", x = "", y="", title = "Reihenfolge der Nennung im Vergleich zu Einzelnennungen")+ coord_flip()
alt_plot1
Create a dictionary
dict_inklusiv_test <- dictionary(list(inklusiv = c("*_innen", "*_Innen", "*_in", "*_In", "*_in",
"*'/inn'", "*'/In'", "*'/innen'", "'*/Innen'",
"'/inn'", "'/In'", "'/innen'", "'/Innen",
"*In", "*Innen", "*'*Innen'", "*'*innen'",
"*'*In'","*'*in'", "*!n", "*!innen",
"*-In", "*-Innen", "*-innen",
"-In", "-Innen", "-innen")), tolower = F)
Wichtig: kein Stemming weil wir benötigen Wortende, wir benötigen punkte
WE_tokens <- tokens(corpus_SP, what = "word", remove_numbers = T, remove_url = T) %>% tokens_remove(stopwords('de'))
WE_dfm_test <- dfm(WE_tokens, tolower = FALSE)
dfm_WE_test2 <- dfm_lookup(WE_dfm_test, dict_inklusiv_test, valuetype = "glob", case_insensitive = FALSE)
#topfeatures(dfm_WE_test, n = 70)
Plots Geschlecht
############################################################
inklusive_GE <- textstat_frequency(dfm_WE_test2, group = "Gender")
#inklusive <- filter(inklusive, feature != filter_list)
inklusive_GE2 <- merge(inklusive_GE, tweet_cases, by = "group")
INK_plot_GE <- ggplot() + geom_col(data = inklusive_GE2, mapping = aes(x = group, y = frequency/cases ),
fill = alpha(c(frau, mann), .9)) +
theme_tufte() + xlab("") + ylab("") + theme(text = element_text(family = "Arial", size = 22))
#INK_plot_GE
Plots Alter
####################################### Plotten - nach Alter - median? ##########################
inklusive_alter <- textstat_frequency(dfm_WE_test2, group = "Alter")
inklusive_alter2 <- merge(inklusive_alter, tweet_cases3, by = "group")
INK_plot_alt <- ggplot() + geom_col(data = inklusive_alter2, mapping = aes(x = group, y = frequency/cases),
fill = alpha(c(alt, jung), .9)) +
theme_tufte() + xlab("") + ylab("") + theme(text = element_text(family = "Arial", size = 22))
title <- title <- ggdraw() + draw_label("Geschlechtergerechte Wortenden", x = 0, hjust = -.2, vjust = 6.5) + theme(text = element_text(family = "Arial", size = 40))
plots <- plot_grid(INK_plot_GE, INK_plot_alt)
plot_grid(title, plots, ncol = 1)
Plots Parteien
####################################### Plotten - nach Partei ##########################
fdp <- "#3872B5"
al <- "#AD3434"
glp <- "#999900"
grün <- "#5D8132"
cvp <- "#B56100"
sp <- "#D0362E"
piraten <- "#B57300"
jevp <- "#996E00"
EVP <- "#996E00"
jglp <- "#999900"
JCVP <- "#B56100"
library(reshape)
inklusive_partei <- textstat_frequency(dfm_WE_test2, group = "Party_Short")
inklusive_partei2 <- merge(inklusive_partei, tweet_cases2, by = "group")
inklusive_partei2 <- filter(inklusive_partei2, group %in% c("AL ZH", "jevp", "Grüne", "SP", "glp", "FDP", "EVP", "jglp", "CVP", "JCVP"))
INK_plot_partei <- ggplot() + geom_col(data = inklusive_partei2, mapping = aes(x = reorder(group, -(frequency/cases)), y = frequency/cases),
fill = c(al, cvp, EVP, fdp, glp, grün, JCVP, jevp, jglp, sp ), alpha = .9) +
theme_tufte() + theme(text = element_text(family = "Arial", size = 22)) + labs(title = "Geschlechtergerechte Wortenden Parteien", x = "", y = "")
INK_plot_partei
Welche inklusiven Formen werden gewäehlt?
dfm_WE_test <- dfm_select(WE_dfm_test, dict_inklusiv_test, valuetype = "glob", case_insensitive = FALSE) #was ist am beliebtesten
inklusive <- textstat_frequency(dfm_WE_test)
#topfeatures(dfm_WE_test, 70)
filter_list <- c("Opt-In", "@velo_in", "OptIn", "#LinkedIn", "Fly-In", "#OptIn", "@LinkedIn")
inklusive <- filter(inklusive, feature != filter_list)
reihe_einzeln <- textstat_frequency(dfm_reihe, group = "Name")
reihe_einzeln <- select(reihe_einzeln, group, frequency)
GE_einzeln <- textstat_frequency(dfm_GE2, group = "Name")
GE_einzeln1 <- filter(GE_einzeln, feature == "frau")
GE_einzeln1 <- filter(GE_einzeln1, frequency > 2)
GE_einzeln2 <- filter(GE_einzeln, feature == "mann")
GE_einzeln2 <- filter(GE_einzeln2, group %in% c("Ale Chioccarello", "Alime Kösecioğulları", "Andrea Degen Iseli, Dr. med." ,
"Andreas Bisig" , "Andri Silberschmidt" , "Balthasar Glättli" ,
"Barbara Schaffner" , "Beat Flach" , "Beatrice Simon",
"Chris Schmid" , "Christa Markwalder" , "Christian Keller \U0001f4cc" ,
"Christian Wasserfallen" , "Christina Bachmann-Roth", "Claudio Zanetti" ,
"Cédric Wermuth" , "Diana Gutjahr" , "Doris Fiala" ,
"Elias Meier" , "Elisabeth Schneider-Schneiter" , "Fabienne Brauchli" ,
"Furkan Oguz" , "Gerhard Pfister" , "Hans-Jakob Boesch" ,
"Jeanine Glarner" , "Karin Fehr Thoma" , "Lukas Reimann" ,
"Lutz Fischer-Lamprecht \U0001f1ec\U0001f1f1", "Manuel C. Widmer" , "Marc Oliver Bürgi" ,
"Marc Schinzel" , "Marianne Binder" , "Mario Pasinelli \U0001f1e8\U0001f1ed",
"Martin Brügger" , "Maya Bally" , "Maya Graf" ,
"Maya Weber Hadorn" , "Meret Schneider" , "Min Li Marti" ,
"Müller-Boder Nicole" , "Nancy Holten | TV-Moderatorin" , "Natascha Wey" ,
"Nicola Forster" , "Nicole Barandun" , "Oliver Hunziker" ,
"Oliver Imfeld", "Oliver Thommen" , "Philipp Schönbächler" ,
"Roger Stettler" , "Ruedi Löffel" , "Ruedi Noser" ,
"Sandro Covo", "Silvio A. Fareri" , "Stefan Schlegel" ,
"Stefanie Heimgartner" , "Susanne Brunner" , "Synes Ernst" ,
"Thomas Aeschi" , "Victor Kadlubowski" , "Walter Angst" ,
"Yvonne Bürgin" , "thomas hardegger" ))
GE_einzeln1 <- select(GE_einzeln1, frequency, group)
GE_einzeln2 <- select(GE_einzeln2, frequency, group)
GE_einzeln_tot <- merge(GE_einzeln1, GE_einzeln2, by = "group")
GE_einzeln_tot$frequency <- GE_einzeln_tot$frequency.x/GE_einzeln_tot$frequency.y
GE_einzeln_tot <- select(GE_einzeln_tot, group, frequency)
inklusive_einzeln <- textstat_frequency(dfm_WE_test2, group = "Name")
inklusive_einzeln <- select(inklusive_einzeln, group, frequency)
overall <- rbind(reihe_einzeln, GE_einzeln_tot, inklusive_einzeln)
overall$frequency <- as.numeric(overall$frequency)
overall1 <- overall %>% group_by(group) %>% select(frequency) %>% summarise_all(sum) %>% arrange(desc(frequency))
overall2 <- filter(overall1, frequency >19)
overall2$group <- car::recode(overall2$group, "'Christian Keller 📌' = 'Christian Keller'")
overall2$group <- car::recode(overall2$group, "'(((Kilian))) ☮ ✏️ ⚓ 🕹' = 'Kilian Brogli'" )
Die Fleissigsten absolut
fdp <- "#3872B5"
al <- "#AD3434"
glp <- "#999900"
grün <- "#5D8132"
cvp <- "#B56100"
sp <- "#D0362E"
piraten <- "#B57300"
svp <- "#4F9141"
overall2$Ausrichtung <- c("fdp", "al", "grün", "glp", "glp", "grün", "glp", "grün", "sp", "cvp")
plot_overall <- ggplot() + geom_col(data = overall2, mapping = aes(x = reorder(group, -(frequency)), y = frequency, fill = Ausrichtung)) +
scale_fill_manual(values = c(al, cvp, fdp, glp, grün, piraten, sp)) +
theme_tufte() + theme(text = element_text(family = "Arial", size = 22)) +
theme(axis.text.x = element_text(angle = 90)) + labs(title = "Die Fleissigsten",
fill = "Partei", x = "", y = "")
plot_overall
Die Fleissigsten relativ
wer_twittert$tweets <- 1
einzeln_t <- wer_twittert %>% group_by(Name) %>% select(tweets) %>% summarise_all(sum)
reihe_einzeln <- textstat_frequency(dfm_reihe, group = "Name")
reihe_einzeln <- select(reihe_einzeln, group, frequency)
GE_einzeln_tot <- select(GE_einzeln_tot, group, frequency)
ende_einzeln <- textstat_frequency(dfm_WE_test2, group = "Name")
ende_einzeln <- select(ende_einzeln, group, frequency)
einzeln <- merge(reihe_einzeln, GE_einzeln_tot, by = c("group"))
einzeln$frequency <- einzeln$frequency.x + einzeln$frequency.y
einzeln <- select(einzeln, group, frequency)
einzeln <- merge(einzeln, ende_einzeln, by = c("group"))
einzeln$frequency <- einzeln$frequency.x + einzeln$frequency.y
einzeln <- select(einzeln, group, frequency)
einzeln <- einzeln %>% group_by(group) %>% select(frequency) %>% summarise_all(sum)
names(einzeln_t)[names(einzeln_t) == "Name"] <- "group"
einzeln2 <- merge(einzeln_t, einzeln, by = c("group"))
einzeln2$v1 <- einzeln2$frequency / einzeln2$tweets
einzeln_p <- filter(einzeln2, v1 >0.036170213)
einzeln_p$Partei<- c("cvp", "glp", "cvp", "svp", "fdp", "grün", "sp", "sp", "cvp", "al")
einzeln_p$group <- car::recode(einzeln_p$group, "'Andrea Degen Iseli, Dr. med.' = 'Andrea Degen Iseli'" )
einzeln_plot <- ggplot() + geom_col(data = einzeln_p,
mapping = aes(x = reorder(group, -(v1)),
y = v1, fill = Partei), width = .6) +
scale_fill_manual(values = c(al, cvp, fdp, glp, grün, sp, svp)) +
theme_tufte() + theme(text = element_text(family = "Arial", size = 22)) + labs( x = "", y="", title = "Wer am häufigsten geschlechtergerechte Sprache benutzt") + theme(axis.text.x = element_text(angle = 90))
einzeln_plot