#Packages used
library(haven)
library(tidyverse)
library(ggpubr)
library(scales)
#Prep Data
selects <- read_dta("495_Selects_CumulativeFile_Data_1971-2019_v2.2.0.dta")

selects.short <- selects %>%  select(year, age,sex, lr1,vdn1b,ip13, ip14) %>%
  filter(year >= 2018) %>% 
  filter(lr1 >= 0) %>% 
  filter(ip13 >= 1) %>% 
  filter(ip14 >=0) %>% 
  filter(age > 0)
#2018 gender gap visualization
group.df <- selects.short %>% group_by(age,sex) %>% summarise(mean_lr = mean(lr1))

ggplot(data=group.df, aes(x=(age), y=(mean_lr),colour= as.factor(sex)))+
  geom_point(alpha = 0.5)+
  geom_smooth()+
  scale_color_manual(values=c("blue","#eb7e83"), labels= c("Männer","Frauen"))+
  theme(legend.title = element_blank())+
  labs(title = "2018 wählten junge Frauen linker als gleichaltrige Männer.",
       subtitle = "Selbstpositionierung von Frauen und Männern nach Alter, 0 = links, 10 = rechts", y= "links-rechts",x= "Age")

#1971 gender gap visualization

selects.old.gg <- selects %>%  select(year, age,sex, lr1,vdn1b,ip13) %>%
  filter(year == 1971) %>% 
  mutate(geschlecht = case_when(sex <=0 ~ "Mann",
                                sex == 1 & age %in% seq(18,50) ~ "F18-50",
                                TRUE ~ "F51+")) %>% 
  filter(lr1 >= 0) %>% 
  filter(age >0)

group.df <- selects.old.gg %>% group_by(age,sex) %>% summarise(mean_lr = mean(lr1))


ggplot(data=group.df, aes(x=(age), y=(mean_lr),colour= as.factor(sex)))+
  geom_point(alpha = 0.5)+
  geom_smooth()+
  scale_color_manual(values=c("blue","#eb7e83"), labels= c("Männer","Frauen"))+
  theme(legend.title = element_blank())+
  labs(title = "1971 wählten die Frauen nicht linker als die Männer. Ältere Frauen wählten sogar konservativer.",
       subtitle = "Selbstpositionierung von Frauen und Männern nach Alter, 0 = links, 10 = rechts", y= "links-rechts",x= "Alter")

#Umweltschutz Visualisierung

group.nature <- selects.short %>% group_by(age,sex) %>% summarise(mean_ip14 = mean(ip14))


ggplot(data=group.nature, aes(x=(age), y=(mean_ip14),colour= as.factor(sex)))+
  geom_point(alpha = 0.5)+
  geom_smooth(method = "loess")+
  scale_color_manual(values=c("blue","#eb7e83"), labels= c("Männer","Frauen"))+
  theme(legend.title = element_blank())+
  labs(title = "Junge Frauen priotisieren den Umweltschutz verstärkter als gleichaltrige Männer",
       subtitle = "Selbstpositionierung Umwelt vs. Wirtschaft: 1 = für Umweltschutz & 5 = für Wirtschaftswahstum", x= "Alter", y= "Umwelt - Wirtschaft")

selects.mip.young <- selects %>% 
  select(year, age,sex, lr1, mip1) %>%
  filter(year >= 2018) %>% filter(lr1 >= 0) %>%
  filter(mip1 >= 0) %>%
  filter(age>0) %>% 
  filter(sex == 1) %>% 
  filter(age <= 50)

selects.mip.old <- selects %>% 
  select(year, age,sex, lr1, mip1) %>%
  filter(year >= 2018) %>% filter(lr1 >= 0) %>%
  filter(mip1 >= 0) %>%
  filter(age>0) %>% 
  filter(sex == 1) %>% 
  filter(age > 50)


tr2.young <- selects.mip.young %>% group_by(mip1) %>% 
  summarise(n=n()) %>% mutate(percentage = (n/sum(n))*100)

tr2.old <- selects.mip.old %>% group_by(mip1) %>% 
  summarise(n=n()) %>% mutate(percentage= (n/sum(n))*100)


tr2.young$topics <- as.factor(tr2.young$mip1)
tr2.old$topics <- as.factor(tr2.old$mip1)

tr2.young$topics <- recode_factor(tr2.young$topics,"1"= "agriculture",
                                    "2"= "economics",
                                    "3"= "education.youth.culture",
                                    "4" = "Umwelt und Energie",
                                    "5" = "Europäische Integration",
                                    "6"="finances.taxes",
                                    "7"="gender issues.discrimination",
                                    "8"="Immigration und Asylfragen",
                                    "9"="international relations.army",
                                    "10"= "labour market",
                                    "11"= "Law and order",
                                    "12"= "Politisches System",
                                    "13"="Gesundheit",
                                    "14"= "public service",
                                    "15"= "regions.national cohesion",
                                    "16"= "Soziale Sicherheit",
                                    "17"= "other problem",
                                    "18"="no problem"
)

tr2.old$topics <- recode_factor(tr2.old$topics,"1"= "agriculture",
                                    "2"= "economics",
                                    "3"= "education.youth.culture",
                                    "4" = "Umwelt und Energie",
                                    "5" = "Europäische Integration",
                                    "6"="finances.taxes",
                                    "7"="gender issues.discrimination",
                                    "8"="Immigration und Asylfragen",
                                    "9"="international relations.army",
                                    "10"= "labour market",
                                    "11"= "Law and order",
                                    "12"= "Politisches System",
                                    "13"="Gesundheit",
                                    "14"= "public service",
                                    "15"= "regions.national cohesion",
                                    "16"= "Soziale Sicherheit",
                                    "17"= "other problem",
                                    "18"="no problem"
)
summary(tr2.young$topics) # All 18 topics
##                  agriculture                    economics 
##                            1                            1 
##      education.youth.culture           Umwelt und Energie 
##                            1                            1 
##      Europäische Integration               finances.taxes 
##                            1                            1 
## gender issues.discrimination   Immigration und Asylfragen 
##                            1                            1 
## international relations.army                labour market 
##                            1                            1 
##                Law and order           Politisches System 
##                            1                            1 
##                   Gesundheit               public service 
##                            1                            1 
##    regions.national cohesion           Soziale Sicherheit 
##                            1                            1 
##                other problem                   no problem 
##                            1                            1
short.tr2.young <- tr2.young[order(desc(tr2.young$percentage)),] %>% head()
short.tr2.old <- tr2.old[order(desc(tr2.old$percentage)),] %>% head()

#Plot young women


ggplot(short.tr2.young, aes(x=reorder(topics, percentage), y=percentage))+
  geom_col(width = .7, fill= c("#8b1a22","#eb7e83","#edb81d","#947032","#565b7b","#86b0a6"))+
  theme(axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 12))+
  coord_flip()+
  ylim(0,32)+
  scale_y_continuous(labels = scales::percent_format(scale = 1))+
  labs(x="", y="", subtitle = "Frauen bis und mit 50", title = "Die 6 meistgenannten Themen")

#Plot older women


ggplot(short.tr2.old, aes(x=reorder(topics, percentage), y=percentage))+
  geom_col(width = .7, fill= c("#8b1a22","#eb7e83","#565b7b","#edb81d","#947032","#86b0a6"))+
  theme(axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 12))+
  coord_flip()+
  ylim(0,32)+
  scale_y_continuous(labels = scales::percent_format(scale = 1))+
  labs(x="", y="", subtitle = "Frauen über 50", title = "")