Code für Blogbeitrag: ‘Gen Z: Die finanzielle Zufriedenheit sinkt Jahr um Jahr’

Author

Marc Stadler

Published

June 25, 2024

library(tidyverse)
library(haven)
library(ggplot2)

# loading data
for (year in sprintf("%02d", 0:22)) {
  assign(paste0("data", year, "_H"), read_sav(paste0("SHP", year, "_H_USER.sav")))
  assign(paste0("data", year, "_I"), read_sav(paste0("SHP", year, "_P_USER.sav")))
}


### Merging the data sets

for (year in sprintf("%02d", 0:22)) {
  # Merge data frames for the current year
  combined_data <- merge(get(paste0("data", year, "_H")), get(paste0("data", year, "_I")), by = paste0("IDHOUS", year))
  
  # Assign combined data frame to a variable
  assign(paste0("combined_data_", year), combined_data)
}


# Creating a variable year for every data frame

for (year in sprintf("%02d", 00:22)) {
  # Get the combined data frame for the current year
  combined_data <- get(paste0("combined_data_", year))
  
  # Add a "year" variable to the data frame
  combined_data$year <- 2000 + as.numeric(year)
  
  # Assign the modified data frame back to its variable
  assign(paste0("combined_data_", year), combined_data)
}

# Variable gender
# Loop through the years from 0 to 22
for (year in sprintf("%02d", 0:22)) {
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  # renaming the variable
  combined_data <- combined_data %>%
    rename_with(~ ifelse(.x == paste0("SEX", year), "SEXXX", .x))
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
}

# Variable age

# Loop through the years from 0 to 22
for (year in sprintf("%02d", 0:22)) {
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  # renaming the variable
  combined_data <- combined_data %>%
    rename_with(~ ifelse(.x == paste0("AGE", year), "AGEXX", .x))
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
}


# variable education

# Loop through the years from 0 to 22
for (year in sprintf("%02d", 0:22)) {
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  # renaming the variable
  combined_data <- combined_data %>%
    rename_with(~ ifelse(.x == paste0("EDUCAT", year), "EDUCATXX", .x))
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
}


# variable satisfaction with financial situation

# Loop through the years from 0 to 22
for (year in sprintf("%02d", 0:22)) {
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  # renaming the variable
  combined_data <- combined_data %>%
    rename_with(~ ifelse(.x == paste0("P", year, "I01"), "PXXI01", .x))
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
}

# variable cantons

# Loop through the years from 0 to 22
for (year in sprintf("%02d", 0:22)) {
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  # renaming the variable
  combined_data <- combined_data %>%
    rename_with(~ ifelse(.x == paste0("CANTON", year), "CANTONXX", .x))
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
}


### Select variables of interest across data frames

for (year in sprintf("%02d", 0:22)) {
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  # Select variables of interest
  combined_data <- combined_data %>% dplyr::select(year, SEXXX, AGEXX, EDUCATXX, PXXI01, CANTONXX)
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
}


### Generations for all years
## Writing a function for the generations
# minimal age for Millennials in first year
age = 3
for (year in sprintf("%02d", 0:22)) {
  
  # constructing the variable name
  df_name <- paste0("combined_data_", year)
  
  # retrieving the data frame
  combined_data <- get(df_name, envir = .GlobalEnv)
  
  combined_data <- combined_data %>%
    dplyr::mutate(generation = case_when(
      AGEXX <= age  ~ "Generation Z",
      AGEXX >= (age+1) & AGEXX <= (age+16)  ~ "Millennials",
      AGEXX >= (age+17) & AGEXX <= (age+32)  ~ "Generation X",
      AGEXX >= (age+33) & AGEXX <= (age+51)  ~ "Baby Boomers",
      AGEXX >= (age+52) & AGEXX <= (age+69)  ~ "Silent Generation",
      TRUE                  ~ "Other"
    ))
  
  combined_data$generation <- as.factor(combined_data$generation)
  
  # assigning the data frame back to the global environment
  assign(df_name, combined_data, envir = .GlobalEnv)
  
  age= age + 1
}

# Combine all the data frames into one
# Create an empty data frame to store the combined data
combined_all <- data.frame()

# Loop through the years
for (year in sprintf("%02d", 0:22)) {
  # Get the combined data frame for the current year
  combined_data <- get(paste0("combined_data_", year))
  
  # Combine the current data frame with the overall combined data
  combined_all <- rbind(combined_all, combined_data)
}

##################################### Recoding and NA's

# only people aged 18 and above
combined_all <- combined_all %>% dplyr::filter(AGEXX >= 18)

# binary gender for sake of simplicity
combined_all$SEXXX <- as_factor(combined_all$SEXXX)
combined_all <- combined_all %>% dplyr::mutate(SEXXX = dplyr::recode(SEXXX, "other" = NA_character_))

# Cantons as factors

combined_all$CANTONXX <- as_factor(combined_all$CANTONXX)

### for 22
combined_data_22$CANTONXX <- as_factor(combined_data_22$CANTONXX)


combined_all <- combined_all %>%
  filter(!is.na(CANTONXX))

combined_data_22 <- combined_data_22 %>%
  filter(!is.na(CANTONXX))

# filter out NA's on dependant variable

combined_all<- combined_all %>%
  dplyr::filter(!is.na(PXXI01))


##################################### Descriptive statistics

## Gender

table(combined_all$SEXXX, useNA = "always")

ggplot(combined_all, aes(x = SEXXX, fill = SEXXX)) +
  geom_bar(alpha = 0.7) +
  labs(title = "Histogram Geschlecht",
       x = "Geschlecht",
       y = "Anzahl") +
  scale_fill_manual(
    name = "Kategorien", 
    values = c("man" = "purple", "woman" = "darkred"),
    labels = c("man" = "Mann", "woman" = "Frau")) +
  scale_x_discrete(labels = c("man" = "Mann", "woman" = "Frau")) +
  theme_minimal()

## Satisfaction with financial situation - everyone

table(combined_all$PXXI01, useNA = "always")
hist(combined_all$PXXI01)

plot_fin_sat_des <- ggplot(combined_all, aes(x = PXXI01)) +
  geom_bar(aes(y = ..prop.. * 100), fill="maroon", stat="count") +
  scale_y_continuous(limits = c(0,30)) +
  labs(title = "Finanzielle Zufriedenheit in der Schweiz | 2022",
       x = "Finanzielle Zufriedenheit | 0: sehr unzufrieden, 10: sehr zufrieden",
       y = "Prozent",
       caption = "Daten: SHP 2022") +
  theme_minimal()

plot_fin_sat_des

ggsave(plot_fin_sat_des, filename="plot_fin_sat_des.jpg", 
       width=12, height=6)


## Satisfaction with financial situation - Gen Z
gen_z <- combined_all %>% dplyr::filter(generation=="Generation Z")
table(gen_z$PXXI01, useNA = "always")
hist(gen_z$PXXI01)

plot_fin_sat_des_gen_z <- ggplot(gen_z, aes(x = PXXI01)) +
  geom_bar(aes(y = ..prop.. * 100), fill="maroon", stat="count") +
  scale_y_continuous(limits = c(0,30)) +
  labs(title = "Finanzielle Zufriedenheit in der Schweiz bei der Gen Z | 2022",
       x = "Finanzielle Zufriedenheit | 0: sehr unzufrieden, 10: sehr zufrieden",
       y = "Prozent",
       caption = "Daten: SHP 2022") +
  theme_minimal()

plot_fin_sat_des_gen_z

ggsave(plot_fin_sat_des_gen_z, filename="plot_fin_sat_des_gen_z.jpg", 
       width=12, height=6)


## Satisfaction with financial situation - everyone except for Gen Z
not_gen_z <- combined_all %>% dplyr::filter(generation!="Generation Z")
table(not_gen_z$PXXI01, useNA = "always")
hist(not_gen_z$PXXI01)

plot_fin_sat_des_not_gen_z <- ggplot(not_gen_z, aes(x = PXXI01)) +
  geom_bar(aes(y = ..prop.. * 100), fill="maroon", stat="count") +
  scale_y_continuous(limits = c(0,30)) +
  labs(title = "Finanzielle Zufriedenheit in der Schweiz bei der Gen Z | 2022",
       x = "Finanzielle Zufriedenheit | 0: sehr unzufrieden, 10: sehr zufrieden",
       y = "Prozent",
       caption = "Daten: SHP 2022") +
  theme_minimal()

plot_fin_sat_des_not_gen_z

ggsave(plot_fin_sat_des_not_gen_z, filename="plot_fin_sat_des_not_gen_z.jpg", 
       width=12, height=6)

# Combining the two histograms - Gen Z and not Gen Z
combined_all <- combined_all %>%
  mutate(group = ifelse(generation == "Generation Z", "Generation Z", "Übrige Bevölkerung"))

plot_fin_sat_des_gen_z_rest <- ggplot(combined_all, aes(x = PXXI01, fill = group)) +
  geom_bar(aes(y = ..prop.. * 100), stat="count", position = position_dodge(width = 0.8)) +
  scale_y_continuous(limits = c(0,30)) +
  scale_fill_manual(values = c("Generation Z" = "maroon", "Übrige Bevölkerung" = "darkblue")) +
  labs(title = "Finanzielle Zufriedenheit in der Schweiz | 2022",
       x = "Finanzielle Zufriedenheit | 0: sehr unzufrieden, 10: sehr zufrieden",
       y = "Prozent",
       caption = "Daten: SHP 2022",
       fill = "Generation") +
  theme_minimal()

plot_fin_sat_des_gen_z_rest

ggsave(plot_fin_sat_des_gen_z_rest, filename="plot_fin_sat_des_gen_z_rest.jpg", 
       width=12, height=6)



## Alter

ggplot(combined_all, aes(x = AGEXX)) +
  geom_bar(fill="gray70") +
  labs(title = "Verteilung des Alters",
       x = "Alter",
       y = "Anzahl") +
  theme_minimal()

# Bildungsniveau

ggplot(combined_all, aes(x = EDUCATXX)) +
  geom_bar() +
  labs(title = "Verteilung der Bildung",
       x = "Bildungsniveau",
       y = "Anzahl") +
  theme_minimal()

# Kanton
ggplot(combined_all, aes(x = CANTONXX)) +
  geom_bar(fill="darkblue") +
  labs(title = "Verteilung der Kantone",
       x = "Kanton",
       y = "Anzahl") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Generationen
combined_all$generation <- factor(combined_all$generation, levels = c("Generation Z", "Millennials", "Generation X", "Baby Boomers", "Silent Generation", "Other"))

ggplot(combined_all, aes(x = generation)) +
  geom_bar(fill="darkred") +
  labs(title = "Verteilung der Generationen",
       x = "Generation",
       y = "Anzahl") +
  theme_minimal()

#### Basic models to look at some correlations
#### Model where I want to look at differences in satisfaction with financial situation over the cantons
ml1 <- lm(PXXI01 ~ as.factor(CANTONXX), data = combined_all)
summary(ml1)
table(combined_all$CANTONXX, combined_all$PXXI01, useNA = "always")

### Model with development over the years
ml2 <- lm(PXXI01 ~ year, data = combined_all)
summary(ml2)

### Model with control variables
ml3 <- lm(PXXI01 ~ SEXXX + generation + EDUCATXX + as.factor(year) + CANTONXX, data = combined_all)
summary(ml3)

#### Visualization with map of Switzerland / Cantons
library(geomerge)
library(raster)
library(sf)

swiss_cantons <- st_read("swissBOUNDARIES3D_1_5_TLM_KANTONSGEBIET.shp")

# Plot the shapefile
plot(swiss_cantons['geometry'], main = "Map of Swiss Cantons", col = "white", border = "black")

### edit canton names in data frame

combined_all <- combined_all %>%
  dplyr::mutate(canton_map = recode(CANTONXX,
                                    "GE  Geneva" = "Genève",
                                    "FR  Fribourg" = "Fribourg",
                                    "SO  Solothurn" = "Solothurn",
                                    "LU  Lucerne" = "Luzern",
                                    "ZH  Zurich" = "Zürich",
                                    "SZ  Schwyz" = "Schwyz",
                                    "AR  Appenzell Outer-Rhodes" = "Appenzell Ausserrhoden",
                                    "AI  Appenzell Inner-Rhodes" = "Appenzell Innerrhoden",
                                    "OW  Obwalden" = "Obwalden",
                                    "VD  Vaud" = "Vaud",
                                    "NE  Neuchatel" = "Neuchâtel",
                                    "BE  Berne" = "Bern",
                                    "BS  Basle-Town" = "Basel-Stadt",
                                    "SG  St. Gall" = "St. Gallen",
                                    "TG Thurgovia" = "Thurgau",
                                    "SH  Schaffhausen" = "Schaffhausen",
                                    "NW  Nidwalden" = "Nidwalden",
                                    "JU  Jura" = "Jura",
                                    "VS  Valais" = "Valais",
                                    "AG  Argovia" = "Aargau",
                                    "TI  Ticino" = "Ticino",
                                    "BL  Basle-Country" = "Basel-Landschaft",
                                    "ZG  Zug" = "Zug",
                                    "GR  Grisons" = "Graubünden",
                                    "GL  Glarus" = "Glarus",
                                    "UR  Uri" = "Uri"))

combined_data_22$CANTONXX <- as.factor(combined_data_22$CANTONXX)
combined_data_22 <- combined_data_22 %>%
  dplyr::mutate(canton_map = dplyr::recode(CANTONXX,
                                           "GE  Geneva" = "Genève",
                                           "FR  Fribourg" = "Fribourg",
                                           "SO  Solothurn" = "Solothurn",
                                           "LU  Lucerne" = "Luzern",
                                           "ZH  Zurich" = "Zürich",
                                           "SZ  Schwyz" = "Schwyz",
                                           "AR  Appenzell Outer-Rhodes" = "Appenzell Ausserrhoden",
                                           "AI  Appenzell Inner-Rhodes" = "Appenzell Innerrhoden",
                                           "OW  Obwalden" = "Obwalden",
                                           "VD  Vaud" = "Vaud",
                                           "NE  Neuchatel" = "Neuchâtel",
                                           "BE  Berne" = "Bern",
                                           "BS  Basle-Town" = "Basel-Stadt",
                                           "SG  St. Gall" = "St. Gallen",
                                           "TG Thurgovia" = "Thurgau",
                                           "SH  Schaffhausen" = "Schaffhausen",
                                           "NW  Nidwalden" = "Nidwalden",
                                           "JU  Jura" = "Jura",
                                           "VS  Valais" = "Valais",
                                           "AG  Argovia" = "Aargau",
                                           "TI  Ticino" = "Ticino",
                                           "BL  Basle-Country" = "Basel-Landschaft",
                                           "ZG  Zug" = "Zug",
                                           "GR  Grisons" = "Graubünden",
                                           "GL  Glarus" = "Glarus",
                                           "UR  Uri" = "Uri"))

# add grouped age data by canton
age_data <- combined_all %>%
  group_by(canton_map) %>%
  summarize(mean_age = mean(AGEXX, na.rm = TRUE))

### for 2022
# add grouped age data by canton
age_data_22 <- combined_data_22 %>%
  group_by(canton_map) %>%
  summarize(mean_age = mean(AGEXX, na.rm = TRUE))

# add grouped satisfaction with financial situation by generations

generation_means <- combined_all %>%
  group_by(generation, canton_map) %>%
  summarise(mean_PXXI01 = mean(PXXI01, na.rm = TRUE)) %>%
  ungroup()

## for 2022
generation_means_22 <- combined_data_22 %>%
  group_by(generation, canton_map) %>%
  summarise(mean_PXXI01 = mean(PXXI01, na.rm = TRUE)) %>%
  ungroup()


# Merge spatial and non-spatial data
swiss_cantons <- merge(swiss_cantons, age_data_22, by.x = "NAME", by.y = "canton_map", all.x = TRUE)
swiss_cantons <- left_join(swiss_cantons, generation_means_22, by = c("NAME" = "canton_map"))

swiss_cantons$simplified_geometry <- st_simplify(swiss_cantons$geometry, preserveTopology = TRUE)

# Try plotting with simplified geometries
ggplot(data = swiss_cantons) +
  geom_sf(aes(geometry = simplified_geometry))

ggplot(data = swiss_cantons) +
  geom_sf(aes(geometry = simplified_geometry), fill = "lightblue", color = "white") +
  geom_sf_text(aes(geometry = st_centroid(simplified_geometry), label = NAME), check_overlap = TRUE, size = 3) +
  labs(title = "Swiss Cantons") +
  theme_minimal()

# Filter for a specific generation, here for Gen Z
gen_z_data_22 <- swiss_cantons %>%
  filter(generation == "Generation Z")

ggplot(data = gen_z_data_22) +
  geom_sf(aes(geometry = simplified_geometry, fill = mean_PXXI01), color = "white") +
  scale_fill_continuous() +
  labs(title = "Finanzielle Zufriedenheit der Gen Z nach Kanton - 2022",
       fill = "Durchschnittliche Zufriedenheit (0 bis 10)",
       caption = "Daten: SHP, Legende: 0 = sehr unzufrieden, 10 = sehr zufrieden") +
  geom_sf_text(aes(label = round(mean_PXXI01, 1)), color = "white", size = 3, check_overlap = TRUE) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.title.x = element_blank(), 
        axis.title.y = element_blank()) 

#### 2022 - Millenials
gen_m_data_22 <- swiss_cantons %>%
  filter(generation == "Millennials")

ggplot(data = gen_m_data_22) +
  geom_sf(aes(geometry = simplified_geometry, fill = mean_PXXI01), color = "white") +
  scale_fill_continuous() +
  labs(title = "Finanzielle Zufriedenheit der Millennials nach Kanton - 2022",
       fill = "Durchschnittliche Zufriedenheit (0 bis 10)",
       caption = "Daten: SHP, Legende: 0 = sehr unzufrieden, 10 = sehr zufrieden") +
  geom_sf_text(aes(label = round(mean_PXXI01, 1)), color = "white", size = 3, check_overlap = TRUE) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.title.x = element_blank(), 
        axis.title.y = element_blank())

#### 2022 - Gen X
gen_x_data_22 <- swiss_cantons %>%
  filter(generation == "Generation X")

ggplot(data = gen_x_data_22) +
  geom_sf(aes(geometry = simplified_geometry, fill = mean_PXXI01), color = "white") +
  scale_fill_continuous() +
  labs(title = "Finanzielle Zufriedenheit der Gen X nach Kanton - 2022",
       fill = "Durchschnittliche Zufriedenheit (0 bis 10)",
       caption = "Daten: SHP, Legende: 0 = sehr unzufrieden, 10 = sehr zufrieden") +
  geom_sf_text(aes(label = round(mean_PXXI01, 1)), color = "white", size = 3, check_overlap = TRUE) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.title.x = element_blank(), 
        axis.title.y = element_blank())

#### 2022 - Boomers
gen_b_data_22 <- swiss_cantons %>%
  filter(generation == "Baby Boomers")


ggplot(data = gen_b_data_22) +
  geom_sf(aes(geometry = simplified_geometry, fill = mean_PXXI01), color = "white") +
  scale_fill_continuous() +
  labs(title = "Finanzielle Zufriedenheit der Baby Boomers nach Kanton - 2022",
       fill = "Durchschnittliche Zufriedenheit (0 bis 10)",
       caption = "Daten: SHP, Legende: 0 = sehr unzufrieden, 10 = sehr zufrieden") +
  geom_sf_text(aes(label = round(mean_PXXI01, 1)), color = "white", size = 3, check_overlap = TRUE) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.title.x = element_blank(), 
        axis.title.y = element_blank())

#### 2022 - Silent Generation
gen_s_data_22 <- swiss_cantons %>%
  filter(generation == "Silent Generation")

ggplot(data = gen_s_data_22) +
  geom_sf(aes(geometry = simplified_geometry, fill = mean_PXXI01), color = "white") +
  scale_fill_continuous() +
  labs(title = "Finanzielle Zufriedenheit der Silent Generation nach Kanton - 2022",
       fill = "Durchschnittliche Zufriedenheit (0 bis 10)",
       caption = "Daten: SHP, Legende: 0 = sehr unzufrieden, 10 = sehr zufrieden") +
  geom_sf_text(aes(label = round(mean_PXXI01, 1)), color = "white", size = 3, check_overlap = TRUE) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.title.x = element_blank(), 
        axis.title.y = element_blank())

#### 2022 - Everyone
canton_means_22 <- combined_data_22 %>%
  group_by(canton_map) %>%
  summarise(mean_PXXI01 = mean(PXXI01, na.rm = TRUE)) %>%
  ungroup()

swiss_cantons_everyone <- st_read("swissBOUNDARIES3D_1_5_TLM_KANTONSGEBIET.shp")
swiss_cantons_everyone <- left_join(swiss_cantons_everyone, canton_means_22, by = c("NAME" = "canton_map"))

swiss_cantons_everyone$simplified_geometry <- st_simplify(swiss_cantons_everyone$geometry, preserveTopology = TRUE)

fin_sat_every_cantons <- ggplot(data = swiss_cantons_everyone) +
  geom_sf(aes(geometry = simplified_geometry, fill = mean_PXXI01), color = "white") +
  scale_fill_continuous() +
  labs(title = "Finanzielle Zufriedenheit nach Kanton - 2022",
       fill = "Skala",
       caption = "Daten: SHP, Legende: 0 = sehr unzufrieden, 10 = sehr zufrieden") +
  geom_sf_text(aes(label = round(mean_PXXI01, 1)), color = "white", size = 3, check_overlap = TRUE) +
  theme_minimal() +
  theme(legend.position = "right",
        legend.direction = "vertical",
        axis.title.x = element_blank(), 
        axis.title.y = element_blank()) 

fin_sat_every_cantons

ggsave(fin_sat_every_cantons, filename="fin_sat_every_cantons.jpg", 
       width=12, height=6)

# Models to test if findings are significant

# H1: For the year 2022 Gen Z in Ticino was less satisfied financially compared to Genz Z in Aargau
gen_z_2022 <- combined_data_22 %>% dplyr::filter(generation=="Generation Z")
gen_z_2022 <- gen_z_2022 %>% dplyr::filter(AGEXX >= 20)

ml1 <- lm(PXXI01 ~ canton_map + SEXXX + EDUCATXX, data=gen_z_2022)

summary(ml1) # no statistic significant effect -> was thus not used for the article

ml0 <- lm(PXXI01 ~ canton_map, data=gen_z_2022)

summary(ml0)

# H1: Millennials view their financial situation worse in 2022 compared to 2018 -> significant!
# significant when controlled for sex, age and education
gen_m_18_22 <- combined_all %>% dplyr::filter(generation=="Millennials")
gen_m_18_22 <- gen_m_18_22 %>% dplyr::filter(year=="2018"|year=="2022")

ml2 <- lm(PXXI01 ~ year + SEXXX + AGEXX + EDUCATXX, data = gen_m_18_22)

summary(ml2)

# H1z: Gen Z over time -> they are doing worse in recent years
# significant when controlled for sex, age and education
gen_z_17_22 <- combined_all %>% dplyr::filter(generation=="Generation Z")
ml2z <- lm(PXXI01 ~ year + SEXXX + AGEXX + EDUCATXX, data = gen_z_17_22)
summary(ml2)

#### Liniendiagramm der Generationen über die Zeit
yearly_generation_data <- combined_all %>%
  group_by(year, generation) %>%
  summarise(mean_PXXI01 = mean(PXXI01, na.rm = TRUE), .groups = 'drop')

yearly_generation_data_others_removed <- yearly_generation_data %>%
  filter(generation != "Other")


fin_sat_years_gen <- ggplot(yearly_generation_data_others_removed, aes(x = year, y = mean_PXXI01)) +
  geom_smooth(color = "brown", linewidth = 1) +
  geom_point(color = "black", size = 2) +
  labs(title = "Finanzielle Zufriedenheit der Generationen über die Zeit",
       x = "Jahr",
       y = "Durchschnittliche finanzielle Zufriedenheit",
       caption = "Daten wurden nach Jahr und Generationen aggregiert, Quelle: SHP") +
  facet_wrap(~generation, scales = "free_y") +
  scale_y_continuous(limits = c(6, 8.5),
                     breaks = seq(6, 8.5, 1),
                     minor_breaks = NULL) +
  theme_minimal()

fin_sat_years_gen

ggsave(fin_sat_years_gen, filename="fin_sat_years_gen.jpg", 
       width=12, height=6)

## Nur für Gen Z
gen_z_data_line_plot <- yearly_generation_data %>%
  filter(generation == "Generation Z")

ggplot(gen_z_data_line_plot, aes(x = year, y = mean_PXXI01)) +
  geom_smooth(color = "brown", size = 1) +
  geom_point(color = "black", size = 2) +
  labs(title = "Average PXXI01 Over Time for Generation Z",
       x = "Year",
       y = "Average PXXI01",
       caption = "Data aggregated by year for Generation Z.") +
  scale_y_continuous(limits = c(5, 8),
                     breaks = seq(5, 8, 0.5),
                     minor_breaks = NULL) +
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.x = element_blank(),  # Optionally removes the x-axis title
        axis.title.y = element_blank(),  # Optionally removes the y-axis title
        plot.title = element_text(hjust = 0.5),  # Centers the title
        axis.text = element_text(size = 10),
        plot.caption = element_text(size = 8))


# for all generations
yearly_data <- combined_all %>%
  group_by(year) %>%
  summarise(mean_PXXI01 = mean(PXXI01, na.rm = TRUE), .groups = 'drop')

fin_sat_years <- ggplot(yearly_data, aes(x = year, y = mean_PXXI01)) +
  geom_smooth(color = "brown", linewidth = 1) + 
  geom_point(color = "black", size = 2) +
  labs(title = "Finanzielle Zufriedenheit der Schweizer*innen über die Zeit",
       x = "Jahr",
       y = "Durchschnittliche finanzielle Zufriedenheit",
       caption = "Daten wurden nach Jahr aggregiert, Quelle: SHP") +
  scale_y_continuous(limits = c(6, 8.5),
                     breaks = seq(6, 8.5, 1),
                     minor_breaks = NULL) +
  theme_minimal()

fin_sat_years

ggsave(fin_sat_years, filename="fin_sat_years.jpg", 
       width=12, height=6)