Notes

Dieses Dokument beschreibt die Verarbeitung und Analyse der Daten, welche als Grundlage für den Artikel "So unterscheiden sich Männer und Frauen auf Twitter", im Rahmen des Forschungsseminars Datenjournalismus an der UZH entstanden sind. Die Analysen wurden in R-Studio und RMarkdown durchgeführt.

Dieser Bericht wurde am 05-06-2020 11:27:04 generiert. R-Version: 3.6.2.

Setup Markdown

# Setup
knitr::opts_chunk$set(collapse = TRUE)
knitr::opts_chunk$set(tidy = TRUE)
knitr::opts_chunk$set(warning = FALSE) 
knitr::opts_chunk$set(message = FALSE) 
knitr::opts_chunk$set(cache = FALSE) 
knitr::opts_chunk$set(comment = FALSE) 
knitr::opts_chunk$set(fig.pos = 'H')

# Set Working Directory
setwd("~/Desktop/Blogbeitrag")

Load library

rm(list = ls())

# Library
library(knitr)
library(dplyr)
library(kableExtra)
library(reshape)
library(ggplot2)
library(quanteda)
library(bbplot)
library(ggwaffle)
library(ggalt)
library(tidyr)
library(tidyverse)
library(scales)
library(ggrepel)
library(magick)

Import data

Die Twitter Daten wurden von Digital Democracy Lab zur Verfügung gestellt. Daten zu den Kandidierenden stammen vom Bundesamt für Statistik (BFS).

load("NR_SR_Jan_Okt_2019.RData") 

Data Preparation

In einem ersten Schritt wurden zwei Datensätze generiert. "data_g" enthält alle Kandidierenden welche das Geschlecht angeben. "data_pb" enthält alle Kandidierenden welche das Geschlecht angeben und für eine der sechs grössten Parteien kandidieren. Einige Parteien wurden zudem umgruppiert.

# Keep only data with gender information
data_g <- NR_SR_Jan_Okt_2019 %>%  #
  filter(Gender %in% c("m", "f")) 

#### Reshuffle parties 
NR_SR_Jan_Okt_2019$Party_Short[NR_SR_Jan_Okt_2019$Party_Short=="JCVP"] <- "CVP"
NR_SR_Jan_Okt_2019$Party_Short[NR_SR_Jan_Okt_2019$Party_Short=="jf"] <- "FDP"
NR_SR_Jan_Okt_2019$Party_Short[NR_SR_Jan_Okt_2019$Party_Short=="jglp"] <- "glp"
NR_SR_Jan_Okt_2019$Party_Short[NR_SR_Jan_Okt_2019$Party_Short=="JG"] <- "Grüne"
NR_SR_Jan_Okt_2019$Party_Short[NR_SR_Jan_Okt_2019$Party_Short=="JUSO"] <- "SP"
NR_SR_Jan_Okt_2019$Party_Short[NR_SR_Jan_Okt_2019$Party_Short=="JSVP"] <- "SVP"

#### only large parties with indication of gender
data_bp <- NR_SR_Jan_Okt_2019 %>%  
  filter(Gender %in% c("m", "f") &
           Party_Short %in% c("CVP", "FDP", "glp", "Grüne", "SP", "SVP"))

Visualizations

Anteil Tweets zu den Wahlen 2019

Um die Anteil Tweets zu den Wahlen 2019 zu berechnen, wurde ein Korpus aus den Twitter Texten erstellt. Zudem wurde je ein Korpus erstellt welcher nur Tweets von Frauen oder nur Tweets von Männern enthält. Danach wurde aus den Korpussen eine document-feautre matrix gebildet, in welcher in einem nächsten Schritt die Häufigkeit der relevanten Hastags nach Datum herausgefilter wurde.

# Korpus erstellen
corpus <- corpus(NR_SR_Jan_Okt_2019, text_field = "Text")
save(corpus, file="corpus.Rda")
tokens <- tokens(corpus, remove_punct = TRUE,
include_docvars = TRUE)
save(tokens, file="tokens.Rda")

## Korpus, only women
women <- NR_SR_Jan_Okt_2019 %>%
  filter(Gender == "f")
corpus_w <- corpus(women, text_field = "Text")
save(corpus_w, file="corpus_w.Rda")
tokens_w <- tokens(corpus_w, remove_punct = TRUE,
include_docvars = TRUE)
save(tokens_w, file="tokens_w.Rda")

## Korpus, only men
men <- NR_SR_Jan_Okt_2019 %>%
  filter(Gender == "m")
corpus_m <- corpus(men, text_field = "Text")
save(corpus_m, file="corpus_m.Rda")
tokens_m <- tokens(corpus_m, remove_punct = TRUE,
include_docvars = TRUE)
save(tokens_m, file="tokens_m.Rda")
# DFM
dfm <- dfm(tokens, tolower = TRUE)
dfm_w <- dfm(tokens_w, tolower = TRUE)
dfm_m <- dfm(tokens_m, tolower = TRUE)

 # Wörterbuch
dict_hashtags <- dictionary(list(hashtag = c("#wahlen19",
"#wahlench19", "#wahlen2019", "#wahlch19","#ef2019", "#ef19","#chvote")))

# Anwendung Wörterbuch
dfm_hashtags <- dfm_lookup(dfm, dict_hashtags, nomatch =
"nomatch") #auch Tweets ohne #wahl sammeln
dfm_hashtags_w <- dfm_lookup(dfm_w, dict_hashtags, nomatch =
"nomatch") #auch Tweets ohne #wahl sammeln
dfm_hashtags_m <- dfm_lookup(dfm_m, dict_hashtags, nomatch =
"nomatch") #auch Tweets ohne #wahl sammeln

# Berechnung Häufigkeiten pro Tag
features_dfm_hashtags <- textstat_frequency(dfm_hashtags, group = "Datum")
features_dfm_hashtags_w <- textstat_frequency(dfm_hashtags_w, group = "Datum")
features_dfm_hashtags_m <- textstat_frequency(dfm_hashtags_m, group = "Datum")

features_dfm_hashtags_w$Gender <- "Frauen"
features_dfm_hashtags_m$Gender <- "Männer"
features_dfm_hashtags_b <- as.data.frame(rbind(features_dfm_hashtags_w, features_dfm_hashtags_m))

# Output für Visualisierung vorbereiten
data_ggplot <- features_dfm_hashtags %>%
  group_by(group) %>% #group=Datum
  mutate(doctot = sum(docfreq)) %>% #doctot: Anzahl # pro Datum
  mutate(pcent = 100 * docfreq/doctot) %>%
  mutate(date = as.Date(group, format="%Y-%m-%d")) %>%
  ungroup()

data_ggplot_b <- features_dfm_hashtags_b %>%
  group_by(group) %>% #group=Datum
  mutate(doctot = sum(docfreq)) %>% #doctot: Anzahl # pro Datum
  mutate(pcent = 100 * docfreq/doctot) %>%
  mutate(date = as.Date(group, format="%Y-%m-%d")) %>%
  ungroup()


# Visualisierung
n_tweets <- ndoc(corpus)
n_users <- length(unique(NR_SR_Jan_Okt_2019$Screen_name))


plot1.1 <- ggplot(data = subset(data_ggplot, feature != "nomatch")) +
  aes(x = date, y = pcent) +
  geom_line(size=0.7, color="#1380A1") +
  geom_area(fill="#1380A1", alpha = 0.4) +
  geom_hline(yintercept = 0, size = 1, colour="#333333") +
  bbc_style()  +
  labs(caption="Hastags: #wahlen19, #wahlench19, #wahlen2019, #wahlch19, #chvote, #ef2019, #ef19") +
  scale_y_continuous(limits = c(0,40)) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_colour_manual(name = '', 
         values =c('Total'='#1380A1','Frauen'='#826CB7', 'Männer'='#00C4A9')) +
  theme(legend.title = element_blank(), 
        legend.position = "right", 
        legend.direction = "vertical",
        legend.text = element_text(size=rel(.8)),
        plot.caption = element_text(size=rel(.9), face="italic", hjust = 0), 
        axis.text.y = element_text(size=rel(.8)),
        axis.text.x = element_text(size=rel(.8), angle = 45, hjust = 1),
        plot.title = element_text(size=rel(2)),
        plot.subtitle = element_text(size=rel(1.5))) +

## Add election date line
 geom_segment(aes(x = as.Date("2019-10-20"), y = 0, xend = as.Date("2019-10-20"), yend = 36), size=.5, linetype="dashed", colour = "#555555") +
 annotate("text", x=as.Date("2019-10-20"), y=36, label="Wahltag\n20. Oktober 2020", size = rel(4), hjust=.5, vjust=-.6, colour = "#555555" ) +

## Add annotation 
geom_label(aes(x = as.Date("2019-04-01"), y = 25, 
                 label = "Abstimmung vom\n19. Mai 2019"), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = rel(4)) +
  geom_curve(aes(x = as.Date("2019-05-10"), y = 24, xend = as.Date("2019-05-19"), yend = 14), 
                             colour = "#555555", 
                             size=0.1, 
                             curvature = -0.1,
                             arrow = arrow(length = unit(0.02, "npc"))) 

## Plot per gender
plot1.2 <- ggplot(data = subset(data_ggplot_b, feature != "nomatch"), 
                  aes(x = date, y = pcent, color=Gender)) +
  geom_line(size=0.7) +
  geom_hline(yintercept = 0, size = 1, colour="#333333") +
  bbc_style()  +
  labs(caption="Hastags: #wahlen19, #wahlench19, #wahlen2019, #wahlch19, #chvote, #ef2019, #ef19") +
  scale_y_continuous(limits = c(0,20)) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_colour_manual(name = '', 
         values =c('Total'='#1380A1','Frauen'='#826CB7', 'Männer'='#00C4A9')) +
  theme(legend.title = element_blank(), 
        legend.position = c(0.1, 0.85), 
        legend.direction = "vertical",
        legend.text = element_text(size=rel(1)),
        plot.caption = element_text(size=rel(.9), face="italic", hjust = 0), 
        axis.text.y = element_text(size=rel(.8)),
        axis.text.x = element_text(size=rel(.8), angle = 45, hjust = 1),
        plot.title = element_text(size=rel(2)),
        plot.subtitle = element_text(size=rel(1.5))) +
  geom_point(shape=15, size=0) +
  guides(colour = guide_legend(override.aes = list(size=3,linetype=0))) +

## Add election date line
 geom_segment(aes(x = as.Date("2019-10-20"), y = 0, xend = as.Date("2019-10-20"), yend = 16), size=.5, linetype="dashed", colour = "#555555") +
 annotate("text", x=as.Date("2019-10-20"), y=16, label="Wahltag\n20. Oktober 2020", size = rel(4), hjust=.5, vjust=-.6, colour = "#555555" ) +

## Add annotation
### 19.05.2019
geom_label(aes(x = as.Date("2019-04-01"), y = 16, 
                 label = "Abstimmung vom\n19. Mai 2019"), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = rel(4)) +
  geom_curve(aes(x = as.Date("2019-05-10"), y = 14, xend = as.Date("2019-05-19"), yend = 10), 
                             colour = "#555555", 
                             size=0.1, 
                             curvature = -0.1,
                             arrow = arrow(length = unit(0.02, "npc"))) +
### 10.02.2019
geom_label(aes(x = as.Date("2019-2-01"), y = 13, 
                 label = "Abstimmung vom\n10. Februar 2019"), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = rel(4)) +
  geom_curve(aes(x = as.Date("2019-02-25"), y = 11, xend = as.Date("2019-02-10"), yend = 7.5), 
                             colour = "#555555", 
                             size=0.1, 
                             curvature = 0.1,
                             arrow = arrow(length = unit(0.02, "npc"))) 

Anteil Tweets zu den Wahlen 2019

Total

359’920 Tweets, 29’406 User

# Plot 
plot1.1


Frauen und Männer

359’920 Tweets, 29’406 User

# Plot 
plot1.2


Geschlechterverhältnis Twitter User
twitter_user_NRSR <- NR_SR_Jan_Okt_2019 %>%
  filter(Candidate.Staenderat == 1 | Candidate.Nationalrat == 1) %>%
  group_by(Gender) %>%
  filter(Gender %in% c("f", "m")) %>%
  distinct(Name, .keep_all = TRUE) %>%
  select(Name, First_Name, Last_Name, Party, Party_Short, Gender)

#### Data in waffle format
waffle <- waffle_iron(twitter_user_NRSR, aes_d(group = Gender), rows = 15)
waffle$name <- as.character(paste0(twitter_user_NRSR$First_Name," ", twitter_user_NRSR$Last_Name))
waffle$gender[waffle$group == "f"] <- "Frauen"
waffle$gender[waffle$group == "m"] <- "Männer"
waffle$gender <- factor(waffle$gender, levels = c("Männer", "Frauen"))

### Create plot
plot2 <- ggplot(waffle, aes(x, y, fill = gender, text = paste("Name: ", name))) + 
        geom_tile(size = 1, color = "white") + 
        scale_fill_manual(name = "",
          values = c("Männer" = "#00C4A9", "Frauen" = "#826CB7")) + 
    labs(x = NULL, y = NULL) +
  guides(fill=FALSE) +
  theme_minimal() +
   theme(plot.margin=unit(x=c(0,0,1,0),units="cm"),
         axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size=14, family="Helvetica"),
        legend.text=element_text(size=8),
        legend.title = element_text(size = 8, face="bold"),
        strip.text.x = element_text(size = 8, color = "#7a7d7e", face = "bold"),
        strip.text.y = element_text( size = 8, color = "#7a7d7e", face = "bold")) 

## add annotation
n_frauen <- nrow(filter(twitter_user_NRSR, Gender == "f"))
n_männer <- nrow(filter(twitter_user_NRSR, Gender == "m"))

plot2 <- plot2 +
  geom_label(aes(x = 1, y = -3, 
                 label = paste(n_frauen, "Frauen")), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = rel(4)) +
  geom_segment(aes(x = 3, y = 0, xend = 3, yend = -1.8), 
                             colour = "#555555", 
                             size=0.1, 
                             curvature = 0,
                             linetype = "dotted") +
  geom_label(aes(x = 30, y = -3, 
                 label = paste(n_männer, "Männer")), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = rel(4)) +
  geom_segment(aes(x = 32, y = 0, xend = 32, yend = -1.8), 
                             colour = "#555555", 
                             size=0.1, 
                             linetype = "dotted")

Geschlechterverhältnis der Kandidierenden auf Twitter

Wahlen 2019, 1220 User

# Plot 
plot2

Geschlechterverhältnis der Kandidierenden auf Twitter nach Partei

Für das Geschlechterverhältnis der Kandidierenden auf Twitter wurden Datensätze vom Bundesamt für Statistik herbeigezogen, welche alle Nationalratskandidierenden und alle Ständeratskandidierenden enthalten. Danach wurde für jede Partei und nach Geschlecht berechnet, wie gross der Anteil der Kandidierenden auf Twitter ist.

#Prepare data
## Data Kandidierende
url <- "https://www.bfs.admin.ch/bfsstatic/dam/assets/9386465/appendix"
nr_2019 <- read.csv(url, sep=";", encoding="UTF-8", stringsAsFactors = F) %>%
  filter(geschlecht %in% c("M", "F"),
         partei_bezeichnung_de %in% c("CVP", "FDP", "GLP", "GPS", "SP", "SVP")) %>%
  select(name, vorname, "Gender"=geschlecht, "Partei"=partei_bezeichnung_de)


url2 <- "https://www.bfs.admin.ch/bfsstatic/dam/assets/9386472/appendix"
sr_2019 <- read.csv(url2, sep=";", encoding="UTF-8", stringsAsFactors = F) %>%
  filter(geschlecht %in% c("M", "F"),
         partei_bezeichnung_de %in% c("CVP", "FDP", "GLP", "GPS", "SP", "SVP")) %>%
  select(name, vorname, "Gender"=geschlecht, "Partei"=partei_bezeichnung_de)

kand <- rbind(nr_2019, sr_2019) %>%
  mutate(full_name = paste(vorname, name)) %>%
    distinct(full_name, .keep_all = TRUE) %>%
  group_by(Partei, Gender) %>%
  tally()

kand$Gender[kand$Gender == "M"] <- "Männer"
kand$Gender[kand$Gender == "F"] <- "Frauen"
kand$Partei[kand$Partei == "GLP"] <- "glp"
kand$Partei[kand$Partei == "GPS"] <- "Grüne"

kand$group <- "Kandidierende"

# Data Twitter
twitt <- NR_SR_Jan_Okt_2019 %>%
  filter(Candidate.Staenderat == 1 | Candidate.Nationalrat == 1,
         Gender %in% c("f", "m"),
         Party_Short %in% c("CVP", "FDP", "glp", "Grüne", "SP", "SVP"))  %>%
  group_by(Gender) %>%
  distinct(Name, .keep_all = TRUE) %>%
  select(Name, First_Name, Last_Name, Party, "Partei" = Party_Short, Gender) %>%
  group_by(Gender, Partei) %>%
  tally() 

twitt$Gender[twitt$Gender == "f"] <- "Frauen"
twitt$Gender[twitt$Gender == "m"] <- "Männer"

twitt$group <- "Twitter"

# Merge data
vrgl <- rbind(twitt, kand) 
vrgl <- vrgl %>%
  group_by(Partei) %>%
  mutate(tot = sum(n[group == "Kandidierende"])) %>%
  ungroup() %>%
  group_by(Partei, Gender) %>%
  mutate(v=(n[group == "Twitter"]/tot)) %>%
  filter(group != "Kandidierende") %>%
  arrange(desc(v))

#Plot
#Make plot
ant <- ggplot(vrgl, aes(x = reorder(Partei, v), 
                           y = v, 
                           fill = Gender)) +
  geom_bar(stat="identity", alpha=.95) +
  geom_hline(yintercept = 0, size = 1, colour="#333333") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
  bbc_style() +
  scale_fill_manual(values = c("#826CB7","#00C4A9")) + 
  geom_text(aes(label = paste0(round(v*100),"%"), fill=Gender),
             position = position_stack(),
             hjust = 1.2, 
             vjust = 0.4, 
             colour = "white", 
             fill = NA, 
             label.size = NA, 
             family="Helvetica", 
             size = rel(4)) +
    theme(legend.text = element_text(size=rel(1.2)),
        legend.justification='left',
        plot.caption = element_text(size=rel(.9), hjust = 0), 
        plot.title = element_text(size=rel(2)),
        plot.subtitle = element_text(size=rel(1.5))) +
  coord_flip()

Anteil Kandidierende auf Twitter

Wahlen 2019, 3384 Kandidierende

# Plot 
ant

Popularität

Die Popularität wurde nach Vorbild des Beitrags von Clau Dermont et al. (2019) analysiert.

Popularität der Kandidierenden auf Twitter

165’952 Tweets, 1237 Users

data_pop <- NR_SR_Jan_Okt_2019 %>%  
  filter(Gender %in% c("m", "f")) %>%
  group_by(Name) %>%
  mutate(Likes = sum(Favorite_count), 
         Retweets = sum(Retweet_count), 
         Followers = max(Followers_count), 
         Name2 = paste(First_Name,Last_Name),
         "Likes + Retweets" = Likes + Retweets) %>%
  select(Name2, Party_Short, Gender, Likes, Retweets, Followers, `Likes + Retweets`) %>%
  distinct(Name2, .keep_all = TRUE)

data_pop$`Likes + Retweets` <- as.numeric(data_pop$`Likes + Retweets`)
data_pop$Followers <- as.numeric(data_pop$Followers)

n_tw <- length(data_g$Reply_to_status_id)
n_u <- length(data_pop$Name)


pop <- ggplot(data_pop, aes(x=Followers, y=`Likes + Retweets`, color=Gender)) +
  geom_point() +
  geom_point(shape=15, size=0) +
  geom_hline(yintercept = 0, size = 1, colour="#333333") +
  bbc_style() +
  labs(x = "Followers", y ="Likes + Retweets") +
  scale_colour_manual(name = '', 
         values =c('f'='#826CB7', 'm'='#00C4A9'),
         labels = c("Frauen", "Männer")) +
  theme(legend.text = element_text(size=rel(1.2)),
        legend.justification='left',
        plot.caption = element_text(size=rel(.9), face="italic", hjust = 0), 
        axis.text.y = element_text(size=rel(.8)),
        axis.text.x = element_text(size=rel(.8), angle = 45, hjust = 1),
        axis.title = element_text(),
        plot.title = element_text(size=rel(2)),
        plot.subtitle = element_text(size=rel(1.5))) +
  guides(colour = guide_legend(override.aes = list(size=4,linetype=0))) +
# labels
  geom_text_repel(data=filter(data_pop, Followers > 15000 | `Likes + Retweets` > 70000), 
                  aes(x=Followers, y=`Likes + Retweets`,
                      label=paste0(Name2," ","(",Party_Short,")")), size=4, 
                  show.legend = FALSE, nudge_y = 5000) 
# Plot 
pop

Follower
## Dumbbell Charts: Follower
### Prepare data
 follower_p <- data_bp %>%
  group_by(Gender, Party_Short) %>%
  summarise("Anzahl Follower" = round(mean(Followers_count, na.rm=T))) %>%
  ungroup() %>%
  group_by(Party_Short) %>%
  mutate("Difference" = (`Anzahl Follower`[Gender == "m"]-`Anzahl Follower`[Gender=="f"])) %>% 
  spread(Gender, `Anzahl Follower`) %>%
  arrange(desc(Difference)) %>%
  mutate("perc_diff" = round((100/m)*Difference))

names(follower_p)[3] <- "Frauen"
names(follower_p)[4] <- "Männer"

follower_p$diff.n <- ifelse(follower_p$Difference >= 0, 
                         paste("+", follower_p$Difference),
                         paste("-", (-1*follower_p$Difference)))


## Function for labelling percentage
percent <- function(x) {
  x <- sprintf("%d%%", round(x))
}

### Create basic plot
plot6 <- ggplot(follower_p, aes(x = Frauen, xend = Männer, y = Party_Short, group = Party_Short)) + 
  geom_dumbbell(colour = "#dddddd",
                size = 3,
                colour_x = "#826CB7",
                colour_xend = "#00C4A9") +
  bbc_style() + 
  theme(axis.text.y = element_text(size=12),
        axis.text.x = element_text(size=12),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=16)) +

### Add Labels
  geom_text(aes(x = Frauen), label= follower_p$Frauen, color = "#717D8C", size = 3.75, vjust = 2.5) +
  geom_text(aes(x = Männer), label= follower_p$Männer, color = "#717D8C", size = 3.75, vjust = 2.5, hjust = .75) +

### Add legend
  geom_text(data=filter(follower_p, Party_Short=="SVP"), aes(x = Frauen, y = Party_Short), label= "Frauen", color = "#826CB7", size = 4, fontface = "bold", vjust = -2, hjust = .8) +
  geom_text(data=filter(follower_p, Party_Short=="SVP"), aes(x = Männer, y = Party_Short), label= "Männer", color = "#00C4A9", size = 4, fontface = "bold", vjust = -2) +

### Add annotation
geom_label(aes(x = 2500, y = 2.5, 
                 label = "Bloss die Frauen der\nFDP haben mehr Follower\nals die Männer"), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 4) +
  geom_curve(aes(x = 2480, y = 2.5, xend = 1850, yend = 2), 
                             colour = "#555555", 
                             size=0.1, 
                             curvature = -0.1,
                             arrow = arrow(length = unit(0.02, "npc")))

Durchschnittliche Anzahl Follower pro Partei

pro Partei

# Plot
plot6

Retweets
## Dumbbell Charts: Retweets
### Prepare data
retweets_p <- data_bp %>%
  group_by(Gender, Party_Short) %>%
  summarise("Anzahl Retweets" = round(mean(Retweet_count, na.rm=T))) %>%
    ungroup() %>%
  group_by(Party_Short) %>%
  mutate("Difference" = (`Anzahl Retweets`[Gender == "m"]-`Anzahl Retweets`[Gender=="f"])) %>% 
  spread(Gender, `Anzahl Retweets`) %>%
  arrange(desc(abs(Difference)))


names(retweets_p)[3] <- "Frauen"
names(retweets_p)[4] <- "Männer"

retweets_p$diff.n <- ifelse(retweets_p$Difference >= 0, 
                         paste("+", retweets_p$Difference),
                         paste("-", (-1*retweets_p$Difference)))

### Create basic plot
plot_retweets_p <- ggplot(retweets_p, aes(x = Frauen, xend = Männer, y = reorder(Party_Short, abs(Difference)), group = Party_Short)) + 
  geom_dumbbell(colour = "#dddddd",
                size = 3,
                colour_x = "#826CB7",
                colour_xend = "#00C4A9") +
  bbc_style() + 
  theme(axis.text.y = element_text(size=12),
        axis.text.x = element_text(size=12),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=16)) +

### Add Labels
  geom_text(aes(x = Frauen), label= retweets_p$Frauen, color = "#717D8C", size = 3.75, vjust = 2.5) +
  geom_text(aes(x = Männer), label= retweets_p$Männer, color = "#717D8C", size = 3.75, vjust = 2.5, hjust = .75) +

### Add legend
  geom_text(data=filter(retweets_p, Party_Short=="Grüne"), aes(x = Frauen, y = reorder(Party_Short, abs(Difference))), label= "Frauen", color = "#826CB7", size = 4, fontface = "bold", vjust = -2, hjust = .8) +
  geom_text(data=filter(retweets_p, Party_Short=="Grüne"), aes(x = Männer, y = reorder(Party_Short, abs(Difference))), label= "Männer", color = "#00C4A9", size = 4, fontface = "bold", vjust = -2) +

### Add annotation
geom_label(aes(x = 16, y = 3.5, 
                 label = "Die Beiträge von linken\nFrauen werden deutlich\nöfter geteilt "), 
             lineheight = 1.2,
             hjust = 0,
             vjust = 0.5,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 4) +
  geom_curve(aes(x = 23, y = 3.5, xend = 26, yend = 4.5), 
                             colour = "#555555", 
                             size=0.1, 
                             curvature = 0.3,
                             arrow = arrow(length = unit(0.02, "npc")))

Anzahl geteilte Beiträge

1220 User

# Plot
plot_retweets_p

Anzahl Erwähnungen

Für die Berechnung der Anzahl Erwähnungen wurde ein Datensatz erstellt, welcher den Namen und das Geschlecht des Users oder der Userin enthält welche den Beitrag erstellt hat, sowie den Namen und das Geschlecht des Users oder der Userin welche erwähnt wurde. Nur Posts welche mindestens eine Erwähnung enthalten wurden in die Analyse miteinbezogen.

## Create data set containing the mentions and the mention id, remove empty rows
mentions <- data %>%
  filter(Is_retweet != T) %>% #excluding retweets
  select(Name, Gender, Mentions_user_id) %>%
  mutate_all(na_if,"") %>%
  filter(Mentions_user_id != "NA") %>%
  separate_rows(Mentions_user_id) %>% #separate lists in different columns
  mutate_all(na_if,"") %>%
  filter(Mentions_user_id != "NA") %>%
  filter(Mentions_user_id != "c") 

# Drop Status_id
mentions <- mentions[ , -which(names(mentions) %in% c("Status_id"))]
names(mentions) <- c("Name_poster", "Gender_poster", "User_id")

## Merge data with original data set to get the gender
gender <- data %>% select(User_id, Name, Gender, Party_Short) 
gender <- gender[ , -which(names(gender) %in% c("Status_id"))]
gender <- gender %>% 
  distinct(User_id, .keep_all = TRUE)

mentions_final <- inner_join(mentions, gender, by = "User_id")
save(mentions_final, file="mentions_final.Rda")
## Bar chart: Mentions
### Prepare data
load("mentions_final.Rda")

# mentions by party
name_party <- data_bp %>% group_by(Name) %>% distinct(Name, .keep_all = T) %>% select(Name_poster=Name, Party_Poster = Party_Short)
mentions_bp <- inner_join(mentions_final, name_party, by= "Name_poster")

# mentions by gender
mentions <- mentions_final %>%
  filter(Gender %in% c("m", "f")) %>%
  group_by(Name, Gender) %>%
  tally() %>%
  arrange(desc(n)) %>% 
  ungroup() %>%
  group_by(Gender) %>%
  mutate(n2 = mean(n, na.rm=T)) %>%
  summarize("Anzahl Erwähnungen"=round(mean(n2, na.rm=T)))
# Who mentions whom by party?

men_men_p <- mentions_bp %>% 
  filter(Gender %in% c("m", "f") & Gender_poster == "m" & Gender =="m",
           Party_Poster %in% c("CVP", "FDP", "glp", "Grüne", "SP", "SVP")) %>%
  group_by(Name, Gender, Party_Poster) %>%
  tally() %>%
  arrange(desc(n)) %>% 
  ungroup() %>%
  group_by(Gender, Party_Poster) %>%
  mutate(n2 = sum(n, na.rm=T)) %>%
  summarize("Männer → Männer"=round(mean(n2, na.rm=T),1)) %>%
  select(-Gender) 

men_women_p <- mentions_bp %>% 
  filter(Gender %in% c("m", "f") & Gender_poster == "m" & Gender =="f",
           Party_Poster %in% c("CVP", "FDP", "glp", "Grüne", "SP", "SVP")) %>%
  group_by(Name, Gender, Party_Poster) %>%
  tally() %>%
  arrange(desc(n)) %>% 
  ungroup() %>%
  group_by(Gender, Party_Poster) %>%
  mutate(n2 = sum(n, na.rm=T)) %>%
  summarize("Männer → Frauen"=round(mean(n2, na.rm=T),1)) %>%
  select(-Gender)

women_men_p <- mentions_bp %>% 
  filter(Gender %in% c("m", "f") & Gender_poster == "f" & Gender =="m",
           Party_Poster %in% c("CVP", "FDP", "glp", "Grüne", "SP", "SVP")) %>%
  group_by(Name, Gender, Party_Poster) %>%
  tally() %>%
  arrange(desc(n)) %>% 
  ungroup() %>%
  group_by(Gender, Party_Poster) %>%
  mutate(n2 = sum(n, na.rm=T)) %>%
  summarize("Frauen → Männer"=round(mean(n2, na.rm=T),1)) %>%
  select(-Gender) 

women_women_p <- mentions_bp %>% 
  filter(Gender %in% c("m", "f") & Gender_poster == "f" & Gender =="f",
           Party_Poster %in% c("CVP", "FDP", "glp", "Grüne", "SP", "SVP")) %>%
  group_by(Name, Gender, Party_Poster) %>%
  tally() %>%
  arrange(desc(n)) %>% 
  ungroup() %>%
  group_by(Gender, Party_Poster) %>%
  mutate(n2 = sum(n, na.rm=T)) %>%
  summarize("Frauen → Frauen"=round(mean(n2, na.rm=T),1)) %>%
  select(-Gender)

df <- as.data.frame(cbind(men_men_p, men_women_p, women_men_p, women_women_p)) %>%
  select(Party_Poster, `Männer → Männer`, `Männer → Frauen`, `Frauen → Männer`, `Frauen → Frauen`) %>% arrange(desc(`Männer → Männer`))

df$sum_men <- rowSums(df[2:3])
df$sum_women <- rowSums(df[4:5])

df2 <- data.frame(Partei=df$Party_Poster, lapply(df[2:3], function(x) round((x/df$sum_men)*100)),
           lapply(df[4:5], function(x) round((x/df$sum_women)*100))) 
names(df2) <- c("Partei", "Männer → Männer", "Männer → Frauen", "Frauen → Männer", "Frauen → Frauen") 
df2 %>%
  arrange(desc(`Männer → Männer`)) %>% 
  kable(row.names = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), fixed_thead = F, full_width = T,  font_size = 13)  %>%
 footnote(general="Beispiel glp: In 81% der Tweets der glp-Kandidaten wurden andere Männer erwähnt, in 19% Frauen. In 69% der Tweets von glp-Kandidatinnen wurden Männer erwähnt, in 31% Frauen.", general_title = "Lesehilfe: ")
Partei Männer → Männer Männer → Frauen Frauen → Männer Frauen → Frauen
glp 81 19 69 31
FDP 77 23 57 43
CVP 70 30 55 45
Grüne 70 30 50 50
SP 69 31 51 49
SVP 67 33 71 29
Lesehilfe:
Beispiel glp: In 81% der Tweets der glp-Kandidaten wurden andere Männer erwähnt, in 19% Frauen. In 69% der Tweets von glp-Kandidatinnen wurden Männer erwähnt, in 31% Frauen.