library(tidyverse)
library(maps)
library(mapproj)
library(sf)
library(viridis)
library(cowplot)
library(raster)
library(stars)
library(swissdd)
library(ggplot2)
library(foreign)
library(stringr)
library(conflicted)
library(rnaturalearth)
library(RColorBrewer)
library(leaflet)
library(tmap)
library(lme4)
conflicts_prefer(
dplyr::filter,
dplyr::lag,
dplyr::select, #select und mutate haut´s sonst iw immer ausi
dplyr::mutate
)
# setting Working Directory
setwd("C:/Users//camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit")
Foschungsseminar Politischer Datenjournalismus: Die unsaubere Aufteilung der Hausarbeit
Bemerkungen zum Skript
Das Skript enthält sämtlichen Code, der für die Auswertungen und Visualisierungen im Blogbeitrag verwendet wurde. Analysen, die im Laufe der Arbeit durchgeführt wurden, sind grössten Teils nicht im Skirpt enthalten. Teile, die vielleicht trotztdem von Interesse sind, wurde herauskommentiert oder mit code-fold verborgen und können so eingesehen werden. Die Bearbeitung erfolgete in ausserdem in mehreren Skripten, bitte um Rücksicht, falls gewisse Schritte doppelt ausgeführt wurden. Die Datenbereinigung (inklusive Recodierung und Zusammenfügen der Wellen) sind im zweiten Skript, RS_Bellmann_Data-cleaning.qmd enthalten. Code-Chunks, die lange Zeite laden, sprich alle Zeitanalysen, wurden zum Rendern unterdrückt.
Set-up
shp21_rc <- read.csv("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_21_recoded.csv")
shp21_pair <- rio::import("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_21_partners.csv")
shp21_pair <- shp21_pair |> filter(sex_person == "female" & beziehung == "hetero")
shp21_pair$canton_person <- haven::as_factor(shp21_pair$canton_person)
table(shp21_pair$cant_nr_person)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
913 736 296 26 108 22 31 28 67 225 211 92 189 49 52 10 306 133 478 154
21 22 23 24 25 26
232 579 250 163 202 39
Ein weiters recoding:
weiteres Recoding (1)
shp21_pair <- shp21_pair |>
mutate(canton_gr = case_when(
cant_nr_person %in% c(4, 6, 7, 8) ~ "OW, NW, UR & GL",
cant_nr_person %in% c(15, 16) ~ "AR & AI",
cant_nr_person %in% c(26, 24) ~ "JU & NE",
cant_nr_person == 1 ~ "ZH Zurich",
cant_nr_person == 2 ~ "BE Berne",
cant_nr_person == 3 ~ "LU Lucerne",
cant_nr_person == 5 ~ "SZ Schwyz",
cant_nr_person == 9 ~ "ZG Zug",
cant_nr_person == 10 ~ "FR Fribourg",
cant_nr_person == 11 ~ "SO Solothurn",
cant_nr_person == 12 ~ "BS Basle-Town",
cant_nr_person == 13 ~ "BL Basle-Country",
cant_nr_person == 14 ~ "SH Schaffhausen",
cant_nr_person == 17 ~ "SG St. Gall",
cant_nr_person == 18 ~ "GR Grisons",
cant_nr_person == 19 ~ "AG Argovia",
cant_nr_person == 20 ~ "TG Thurgovia",
cant_nr_person == 21 ~ "TI Ticino",
cant_nr_person == 22 ~ "VD Vaud",
cant_nr_person == 23 ~ "VS Valais",
cant_nr_person == 25 ~ "GE Geneva",
TRUE ~ NA_character_ ),
lang_can = case_when(
cant_nr_person == 2 ~ "DE", #FR
cant_nr_person == 12 ~ "DE",
cant_nr_person == 11 ~ "DE",
cant_nr_person == 24 ~ "FR",
cant_nr_person == 19 ~ "DE",
cant_nr_person == 1 ~ "DE",
cant_nr_person == 13 ~ "DE",
cant_nr_person == 6 ~ "DE",
cant_nr_person == 22 ~ "FR",
cant_nr_person == 9 ~ "DE",
cant_nr_person == 17 ~ "DE",
cant_nr_person == 25 ~ "FR",
cant_nr_person == 10 ~ "FR",#De
cant_nr_person == 18 ~ "DE", #IT?
cant_nr_person == 23 ~ "FR", #DE?
cant_nr_person == 3 ~ "DE",
cant_nr_person == 26 ~ "FR",
cant_nr_person == 5 ~ "DE",
cant_nr_person == 14 ~ "DE",
cant_nr_person == 20 ~ "DE",
cant_nr_person == 15 ~ "DE",
cant_nr_person == 8 ~ "DE",
cant_nr_person == 16 ~ "DE",
cant_nr_person == 7 ~ "DE",
cant_nr_person == 4 ~ "DE",
cant_nr_person == 21 ~ "IT",
))
shp21_pair <- shp21_pair |>
mutate(reli_can = case_when(
canton_person == "BE Berne" ~ "Protestant",
canton_person == "BS Basle-Town" ~ "no denomination",
canton_person == "SO Solothurn" ~ "Catholic",
canton_person == "NE Neuchatel" ~ "Protestant",
canton_person == "AG Argovia" ~ "Protestant",
canton_person == "ZH Zurich" ~ "Protestant",
canton_person == "BL Basle-Country" ~ "Protestant",
canton_person == "NW Nidwalden" ~ "Catholic",
canton_person == "VD Vaud" ~ "Protestant",
canton_person == "ZG Zug" ~ "Catholic",
canton_person == "SG St. Gall" ~ "Catholic",
canton_person == "GE Geneva" ~ "no denomination", #is there a dominat?
canton_person == "FR Fribourg" ~ "Catholic",
canton_person == "GR Grisons" ~ "Catholic",
canton_person == "VS Valais" ~ "Catholic",
canton_person == "LU Lucerne" ~ "Catholic",
canton_person == "JU Jura" ~ "Catholic",
canton_person == "SZ Schwyz" ~ "Catholic",
canton_person == "SH Schaffhausen" ~ "Protestant",
canton_person == "TG Thurgovia" ~ "Protestant",
canton_person == "AR Appenzell Outer-Rhodes" ~ "Catholic",
canton_person == "GL Glarus" ~ "Protestant",
canton_person == "AI Appenzell Inner-Rhodes" ~ "Catholic",
canton_person == "UR Uri" ~ "Catholic",
canton_person == "OW Obwalden" ~ "Catholic",
canton_person == "TI Ticino" ~ "Catholic"))
shp21_pair <- shp21_pair |>
mutate(educ_person = case_when(
Educ_19_person %in% c(0, 1, 2, 3) ~ "basic",
Educ_19_person %in% c(4, 5, 6, 7, 8) ~ "vocational",
Educ_19_person %in% c(12, 13, 14) ~ "high_voc",
Educ_19_person %in% c(15, 16, 17, 18) ~ "uni",
TRUE ~ NA_character_
),
educ_partner = case_when(
Educ_19_partner %in% c(0, 1, 2, 3) ~ "basic",
Educ_19_partner %in% c(4, 5, 6, 7, 8) ~ "vocational",
Educ_19_partner %in% c(12, 13, 14) ~ "high_voc",
Educ_19_partner %in% c(15, 16, 17, 18) ~ "uni",
TRUE ~ NA_character_
),
educ_person2 = case_when(
educ_person == "basic" ~ "basic",
educ_person %in% c("high_voc", "vocational") ~"voc",
educ_person == "uni" ~ "uni"
),
educ_partner2 = case_when(
educ_partner == "basic" ~ "basic",
educ_partner %in% c("high_voc", "vocational") ~"voc",
educ_partner == "uni" ~ "uni"
)) |>
mutate(age_pers_cat = case_when(
is.na(age_person) ~ NA_character_,
age_person >= 18 & age_person < 35 ~ "18-34",
age_person >= 35 & age_person <= 54 ~ "35-54",
age_person > 55 ~ "55+",
TRUE ~ NA_character_),
age_pers_cat2 = case_when(
is.na(age_person) ~ NA_character_,
age_person >= 18 & age_person <= 29 ~ "18-29",
age_person >= 30 & age_person <= 44 ~ "30-44",
age_person >= 45 & age_person <= 59 ~ "45-59",
age_person >= 60 ~ "60+",
TRUE ~ NA_character_) )
Deskriptive Inspektion
Welche Kantone haben traditionelle Rollenbilder?
#hier andere Resultate als z.B. auf Karte weil, nicht der Pair-Datensatz
hwtab_sex_cant <- shp21_rc |>
subset(!is.na(household_time)) |>
group_by(cant_nr, sex_bin)|>
dplyr::summarize(mean_household_time = weighted.mean(household_time, weights = weights_2)) |>
pivot_wider(names_from = sex_bin, values_from = mean_household_time) |>
dplyr::mutate(hw_dif = (female-male)) |>
dplyr::arrange(desc(hw_dif)) |>
ungroup()
`summarise()` has grouped output by 'cant_nr'. You can override using the
`.groups` argument.
Welche Kantone haben traditionelle Rollenbilder?
hwtab_sex_cant
# A tibble: 26 × 5
cant_nr female male `NA` hw_dif
<int> <dbl> <dbl> <dbl> <dbl>
1 6 16.6 5.45 NA 11.1
2 4 15.4 6.03 NA 9.37
3 20 14.3 5.36 NA 8.89
4 18 14.4 5.93 NA 8.49
5 26 13.2 5.88 NA 7.32
6 21 14.2 7.22 NA 7.03
7 5 12.3 5.59 NA 6.75
8 7 14.5 7.77 NA 6.73
9 15 13.6 7.12 NA 6.51
10 3 13.2 6.91 NA 6.27
# ℹ 16 more rows
Jetzt weiter mit der “Within-Pair” Variable:
weighted.mean(shp21_pair$hw_widif_person, weights =shp21_pair$weights_2_person, na.rm = TRUE)
[1] 8.188659
Rund einen Arbeitstag mehr.
#Häufigkeitstabelle
hw <- weighted.mean(shp21_pair$household_time_person, weights= shp21_pair$weights_2_person, na.rm = TRUE) #15.20842
care <- weighted.mean(shp21_pair$care_work_person, weights= shp21_pair$weights_2_person, na.rm = TRUE) #8.804722
tz <- weighted.mean(shp21_pair$tz_rate_person, weights= shp21_pair$weights_2_person, na.rm = TRUE) #63.01054
woche_bezahlt <- (42 * (tz/100))
woche_gratis <- (hw + care)
cat("Bezahlt", woche_bezahlt, "Stunden", "Unbezahlte Arbeitsstunden", woche_gratis, "Insgesamt", woche_bezahlt + woche_gratis, "h/Woche für hetero Frauen")
Bezahlt 26.46443 Stunden Unbezahlte Arbeitsstunden 24.01314 Insgesamt 50.47757 h/Woche für hetero Frauen
hw <- weighted.mean(shp21_pair$household_time_partner, weights= shp21_pair$weights_2_partner, na.rm = TRUE) #7.112324
care <- weighted.mean(shp21_pair$care_work_partner, weights= shp21_pair$weights_2_partner, na.rm = TRUE) #4.284855
tz <- weighted.mean(shp21_pair$tz_rate_partner, weights= shp21_pair$weights_2_partner, na.rm = TRUE) #88.83743
woche_bezahlt <- (42 * (tz/100))
woche_gratis <- (hw + care)
cat("Bezahlt", woche_bezahlt, "Stunden", "Unbezahlte Arbeitsstunden", woche_gratis, "Insgesamt", woche_bezahlt + woche_gratis, "h/Woche, für hetero Männer")
Bezahlt 37.31172 Stunden Unbezahlte Arbeitsstunden 11.39718 Insgesamt 48.7089 h/Woche, für hetero Männer
Sind die Mittelwertsunterschiede auch signifikant?
t.test(shp21_pair$household_time_person, shp21_pair$household_time_partner, alternative = "greater")
Welch Two Sample t-test
data: shp21_pair$household_time_person and shp21_pair$household_time_partner
t = 43.977, df = 6681.8, p-value < 2.2e-16
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
7.793235 Inf
sample estimates:
mean of x mean of y
15.208418 7.112324
var.test(shp21_pair$household_time_person, shp21_pair$household_time_partner, alternative = "greater")
F test to compare two variances
data: shp21_pair$household_time_person and shp21_pair$household_time_partner
F = 2.5072, num df = 3943, denom df = 3845, p-value < 2.2e-16
alternative hypothesis: true ratio of variances is greater than 1
95 percent confidence interval:
2.378401 Inf
sample estimates:
ratio of variances
2.507202
t.test(shp21_pair$care_work_person, shp21_pair$care_work_partner, alternative = "greater") #
Welch Two Sample t-test
data: shp21_pair$care_work_person and shp21_pair$care_work_partner
t = 11.128, df = 5747.2, p-value < 2.2e-16
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
3.851665 Inf
sample estimates:
mean of x mean of y
8.804722 4.284855
t.test(shp21_pair$tz_rate_person, shp21_pair$tz_rate_partner, alternative = "less") # men are haveing more hours
Welch Two Sample t-test
data: shp21_pair$tz_rate_person and shp21_pair$tz_rate_partner
t = -38.524, df = 5386.7, p-value < 2.2e-16
alternative hypothesis: true difference in means is less than 0
95 percent confidence interval:
-Inf -24.72399
sample estimates:
mean of x mean of y
63.01054 88.83743
Hier gefilterter Datensatz (nur heterosexuelle Frauen): Sie leisten deutlich mehr Haus- und Pflegearbeit als ihre Partner. Bei der Teilzeitrate geht es in die andere Richtung. Alle Unterschiede sind signifikant.
Zusammenhänge
Alter wird als wichtigste Kontrollvariable gesehen und dann auch im Text diskutiert. ### Geographie und Ehe Sind die Unterschiede zwischen den Kantonen signifikant?
# Wegen der kleinen Fallzahlen in den Kantonen zuerst mal
cant_gr_aov <- aov(hw_widif_person ~ as.factor(canton_gr), data = shp21_pair, weights = weights_2_person)
summary(cant_gr_aov)
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(canton_gr) 20 10321 516.0 5.222 3.91e-13 ***
Residuals 2731 269876 98.8
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2839 Beobachtungen als fehlend gelöscht
# Auch mit kleinen Fallzahlen in den Kantonen?
cant_aov <- aov(hw_widif_person ~ as.factor(canton_person), data = shp21_pair, weights = weights_2_person)
summary(cant_aov)
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(canton_person) 25 10920 436.8 4.422 1.97e-12 ***
Residuals 2726 269277 98.8
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2839 Beobachtungen als fehlend gelöscht
Unterschiede zwischen den Kantonen sind signifikant. Multivariate Modell spätere zeigen, dass sich das Tessin signifikant von der Deutsch- & Französischen Schweiz unterscheidet.
Spielt Heirat eine Rolle?
tapply(shp21_pair$hw_widif_person, shp21_pair$cohab_person, weights = weights_2_person, weighted.mean, na.rm = TRUE)
married together
9.127350 3.454741
# cohab = married (1), not married = 2
t.test(hw_widif_person ~ cohab_person, data = shp21_pair, weights = weights_2_person, na.rm = TRUE)
Welch Two Sample t-test
data: hw_widif_person by cohab_person
t = 12.001, df = 824.72, p-value < 2.2e-16
alternative hypothesis: true difference in means between group married and group together is not equal to 0
95 percent confidence interval:
4.744851 6.600367
sample estimates:
mean in group married mean in group together
9.127350 3.454741
Auch hier sieht man, dass auch die Ehe einen signifikanten Einfluss hat. Der Unterschied nimmt unter Berücksichtigung von Kontrollvariablen ab. Darum dann Modellschätzung und interpretation.
Unzufriedenheit
Diese Modelle zeigen wo es Zusammenhang gibt und wo nicht:
Wie sieht es mit der Unzufriedenheit aus?
summary(lm(shp21_pair$satis_housework_share_person ~ shp21_pair$household_time_person + shp21_pair$age_person))
Call:
lm(formula = shp21_pair$satis_housework_share_person ~ shp21_pair$household_time_person +
shp21_pair$age_person)
Residuals:
Min 1Q Median 3Q Max
-8.1722 -0.9741 0.2977 1.5830 3.8304
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.993688 0.123750 56.515 <2e-16 ***
shp21_pair$household_time_person -0.031078 0.003406 -9.125 <2e-16 ***
shp21_pair$age_person 0.021359 0.002249 9.497 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.017 on 3930 degrees of freedom
(1658 Beobachtungen als fehlend gelöscht)
Multiple R-squared: 0.03447, Adjusted R-squared: 0.03398
F-statistic: 70.16 on 2 and 3930 DF, p-value: < 2.2e-16
Call:
lm(formula = shp21_pair$satis_housework_share_person ~ shp21_pair$hw_widif_person)
Residuals:
Min 1Q Median 3Q Max
-8.0011 -0.8973 0.2844 1.5179 3.3743
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.923220 0.046289 171.170 < 2e-16 ***
shp21_pair$hw_widif_person -0.025950 0.003298 -7.868 5.11e-15 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.992 on 2799 degrees of freedom
(2790 Beobachtungen als fehlend gelöscht)
Multiple R-squared: 0.02164, Adjusted R-squared: 0.02129
F-statistic: 61.9 on 1 and 2799 DF, p-value: 5.11e-15
Pro Stunde mehr die eine Frau in einer Heterobeziehung mehr im Haushalt arbeitet, desto eher ist sie um 0.02 Punkte (also 2%) unglücklicher mit der Aufteilung. Pro Stunde, die sie mehr leisten als ihr Partner, sind sie auch um 2% unglücklicher. Das ist nicht viel, auf den durchschnittlichen Unterschied von 8 Stunden gerechnet sind das in etwa 0.19 Punkte unzufriedener. Da der Effekt klein ist und nicht im direkten Zusammenhang mit der Forschungsfrage steht, kommt diese Variable nicht ins spätere Modell.
Weiteres Recoding (2)
#variable.names(shp21_pair)
#table(shp21_pair$confession_person) # Confession or religion
# rel_freq_person = Participation in religious services: Frequency
# rel_party_person = Participation in religious services: Festivals, family ceremonies
shp21_pair <- shp21_pair |>
mutate(confession_person = case_when(
confession_person == 6 ~ "protestant",
confession_person == 7 & 8 ~ "catholic",
confession_person == 9 ~ "other Christian denomination",
confession_person == 10 ~ "Jewish",
confession_person == 11 ~ "Muslim",
confession_person == 12 ~ "other",
confession_person == 13 ~ "no denomination/reli",
confession_person == 14 ~ "Evangelical",
confession_person == 15 ~ "Christian orthodox"),
confession_red_person = case_when(
confession_person == "protestant" ~ "protestant",
confession_person == "catholic" ~ "catholic",
confession_person == "no denomination/reli" ~ "no denomination/reli",
# confession_person == "Jewish" ~ "Jewish",
# confession_person == "Muslim" ~ "Muslim",
# confession_person == "Christian orthodox" ~ "Christian orthodox",
TRUE ~ NA_character_),
confession_red_person = as_factor(confession_red_person),
reli_frequ_bin = case_when(
rel_freq_person == 6 & 7 & 8 & 9 ~ "regular",
rel_freq_person == 2 & 3 & 4 & 5 ~ "sometimes",
rel_freq_person == 1 ~"never"),
reli_combi_person = case_when(
confession_red_person == "protestant" & reli_frequ_bin == "regular" ~ "protestant_regular",
confession_red_person == "protestant" & reli_frequ_bin == "sometimes" ~ "protestant_sometimes",
confession_red_person == "catholic" & reli_frequ_bin == "regular" ~ "catholic_regular",
confession_red_person == "catholic" & reli_frequ_bin == "sometimes" ~ "catholic_sometimes",
confession_red_person == "no denomination/reli" & reli_frequ_bin == "never" ~ "atheism",
TRUE ~ NA_character_))
Alter
Theoretisch ergibt es Sinn Alter zu verwenden. Als numerische Variable hat sie jedoch mal keinen signifikanten Einfluss. Eine mögliche Erklärung ist, dass es kein linearer Zusammenhang ist, sondern nach Lebensabschnitten verläuft (Bernardi, Ryser, and Le Goff 2013, S. 19). Alter wurde in drei Altersklassen recodiert um zu sehen, ob es in diesem einen Einfluss trägt, der nicht-linear verläuft. Wegen der unausgewogen
table(shp21_pair$age_pers_cat2)
18-29 30-44 45-59 60+
295 1281 1998 2016
table(shp21_pair$age_pers_cat) #ausgewogenere Gruppen
18-34 35-54 55+
694 2186 2574
age_wmean <- shp21_pair |>
group_by(age_pers_cat) |>
summarise(weighted_mean = weighted.mean(hw_widif_person, weights = weights_2_person, na.rm = TRUE)) |>
na.omit()
gt::gt(as.data.frame(age_wmean))
age_pers_cat | weighted_mean |
---|---|
18-34 | 4.498328 |
35-54 | 8.763272 |
55+ | 8.649219 |
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(age_person) 71 32575 458.8 4.966 <2e-16 ***
Residuals 2680 247622 92.4
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2839 Beobachtungen als fehlend gelöscht
Werte & Einkommen: Religion, left-right Position & Education
Nur die grösseren Religionen wurden verwendet, da es bei den kleineren zu geringe Fallzahlen gibt, um sie im Modell zu verwenden.
gt::gt(as.data.frame(table(shp21_pair$confession_person)))
Var1 | Freq |
---|---|
catholic | 1390 |
Christian orthodox | 35 |
Evangelical | 158 |
Jewish | 14 |
Muslim | 48 |
no denomination/reli | 850 |
other | 53 |
other Christian denomination | 52 |
protestant | 1318 |
shp21_pair$confession_red_person <- relevel(shp21_pair$confession_red_person, ref= "no denomination/reli")
Die Variable “reli_combi_person” vergleicht die häufig besuchenden Protestant:innen oder Katholik:innen, weniger häufig besuchenden Protestant:innen oder Katholik:innen, mit den Atheist:innen.
table(shp21_pair$reli_combi_person)
atheism catholic_regular catholic_sometimes
579 129 290
protestant_regular protestant_sometimes
86 347
Für finanzielle Entscheidung wurde bewusst Bildung genommen, da das Einkommen zu stark mit der Teilzeitquote korreliert und so einen Trugschluss verheissen könnte.
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
model_p3 <- lm(hw_widif_person ~ educ_person2 + educ_partner2 + rel_freq_person + lr_pos_partner + lr_pos_person, weights = weights_2_person, data = shp21_pair)
stargazer::stargazer(model_p3,
type = "html",
covariate.labels = c(
"Education: Uni (person)",
"Education: Voc (person)",
"Education: Uni (partner)",
"Education: Voc (partner)",
"Religious Frequency (person)",
"Political Position (partner)",
"Political Position (person)"),
float = FALSE)
Dependent variable: | |
hw_widif_person | |
Education: Uni (person) | -3.203*** |
(0.654) | |
Education: Voc (person) | 1.085 |
(0.710) | |
Education: Uni (partner) | 2.013*** |
(0.681) | |
Education: Voc (partner) | 1.568** |
(0.683) | |
Religious Frequency (person) | 0.643*** |
(0.109) | |
Political Position (partner) | 0.439*** |
(0.148) | |
Political Position (person) | -0.016 |
(0.153) | |
Constant | 3.964*** |
(0.955) | |
Observations | 2,264 |
R2 | 0.043 |
Adjusted R2 | 0.040 |
Residual Std. Error | 9.276 (df = 2256) |
F Statistic | 14.332*** (df = 7; 2256) |
Note: | p<0.1; p<0.05; p<0.01 |
Alter gilt als theoretisch wichtigste Kontrollvariable (wenn auch nicht immer signifikant), deshalb wird das Modell (zur robustness) nochmals geschätzt.
shp21_pair$age_pers_cat <- shp21_pair$age_pers_cat |>
as_factor()|>
relevel(ref= "18-34")
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
model_p3_age <- lm(hw_widif_person ~ age_pers_cat + educ_person2 + educ_partner2 + rel_freq_person + lr_pos_partner + lr_pos_person, weights = weights_2_person, data = shp21_pair)
stargazer::stargazer(model_p3_age,
type = "html",
covariate.labels = c(
"Age: 55+",
"Age: 35-54",
"Education: Uni (person)",
"Education: Voc (person)",
"Education: Uni (partner)",
"Education: Voc (partner)",
"Religious Frequency (person)",
"Political Position (partner)",
"Political Position (person)"),
float = FALSE)
Dependent variable: | |
hw_widif_person | |
Age: 55+ | -1.538* |
(0.908) | |
Age: 35-54 | -1.412 |
(0.872) | |
Education: Uni (person) | -3.563*** |
(0.689) | |
Education: Voc (person) | 0.785 |
(0.723) | |
Education: Uni (partner) | 1.934*** |
(0.697) | |
Education: Voc (partner) | 1.666** |
(0.694) | |
Religious Frequency (person) | 0.639*** |
(0.111) | |
Political Position (partner) | 0.405*** |
(0.150) | |
Political Position (person) | -0.014 |
(0.155) | |
Constant | 5.732*** |
(1.328) | |
Observations | 2,209 |
R2 | 0.044 |
Adjusted R2 | 0.040 |
Residual Std. Error | 9.311 (df = 2199) |
F Statistic | 11.270*** (df = 9; 2199) |
Note: | p<0.1; p<0.05; p<0.01 |
Left-right position von Partner ist signifikant. Auch die Religiöse Frequency. Uni-Abschluss der Frau reduziert die Unterschiede. Links-Positionierung des Partners ebenfalls.
model_checks_p3 <- performance::check_model(model_p3)
model_checks_p3
hist(model_p3$residuals, main = "Residual Histogram")
Die graphische Analsyse zeigt, dass der Grossteil der Annahmen halten. Homogenität der Varianz und Normalverteilung der Residuen sind nicht ganz perfekt. Ich möchte argumentieren, dass die Annahmen nach graphischer Inspektion trotzdem noch in einem Rahmen liegen, der das Einhalten der Linearen Regressionsbedingungen erfüllt.
Modelle mit allen Variablen
Hier gilt jedoch Vorsicht bei der Interpretation, denn die Fallzahlen werden hier sehr klein. Nur Ergebnisse, die auch zuvor signifikant waren werden berücksichtigt.
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
model_p3_combi <- lm(hw_widif_person ~ age_pers_cat + educ_person2 + educ_partner2 + reli_combi_person + lr_pos_partner, weights = weights_2_person, data = shp21_pair)
stargazer::stargazer(model_p3_combi,
type = "html",
covariate.labels = c("Age: 55+",
"Age: 35-54",
"Education: Uni (person)",
"Education: Voc (person)",
"Education: Uni (partner)",
"Education: Voc (partner)",
"Religion: Catholic-regular (Person)",
"Religion: Catholic-sometimes (Person)",
"Religion: Protestant-regular (Person)",
"Religion: Protestant-sometimes (Person)",
"Political Position (partner)"),
float = FALSE)
Dependent variable: | |
hw_widif_person | |
Age: 55+ | 0.021 |
(1.402) | |
Age: 35-54 | 1.403 |
(1.301) | |
Education: Uni (person) | -4.329*** |
(1.143) | |
Education: Voc (person) | -1.223 |
(1.180) | |
Education: Uni (partner) | 0.461 |
(1.079) | |
Education: Voc (partner) | 0.860 |
(1.068) | |
Religion: Catholic-regular (Person) | 5.432*** |
(1.647) | |
Religion: Catholic-sometimes (Person) | -0.633 |
(1.034) | |
Religion: Protestant-regular (Person) | 4.242** |
(1.884) | |
Religion: Protestant-sometimes (Person) | 0.046 |
(1.126) | |
Political Position (partner) | -0.074 |
(0.199) | |
Constant | 8.204*** |
(1.850) | |
Observations | 831 |
R2 | 0.054 |
Adjusted R2 | 0.042 |
Residual Std. Error | 8.882 (df = 819) |
F Statistic | 4.271*** (df = 11; 819) |
Note: | p<0.1; p<0.05; p<0.01 |
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)
model_full <- lm(hw_widif_person ~ age_pers_cat + educ_person2 + educ_partner2 + cohab_person + reli_combi_person + lr_pos_person + lr_pos_partner + lang_can, weights = weights_2_person, data = shp21_pair)
stargazer::stargazer(model_full,
type = "html",
covariate.labels = c("Age: 55+",
"Age: 35-54",
"Education: Uni (person)",
"Education: Voc (person)",
"Education: Uni (partner)",
"Education: Voc (partner)",
"Civil Status: Relationship (reference: Marriage)",
"Religion: Catholic-regular (person)",
"Religion: Catholic-sometimes (person)",
"Religion: Protestant-regular (person)",
"Religion: Protestant-sometimes (person)",
"Political Position (person)",
"Political Position (partner)",
"Canton language: France",
"Canton language: German"),
float = FALSE)
Dependent variable: | |
hw_widif_person | |
Age: 55+ | 0.756 |
(1.370) | |
Age: 35-54 | 3.030** |
(1.273) | |
Education: Uni (person) | -3.617*** |
(1.110) | |
Education: Voc (person) | -0.513 |
(1.155) | |
Education: Uni (partner) | 0.369 |
(1.078) | |
Education: Voc (partner) | 1.774* |
(1.038) | |
Civil Status: Relationship (reference: Marriage) | -3.841*** |
(1.054) | |
Religion: Catholic-regular (person) | 6.229*** |
(1.637) | |
Religion: Catholic-sometimes (person) | 0.566 |
(1.025) | |
Religion: Protestant-regular (person) | 5.480*** |
(1.768) | |
Religion: Protestant-sometimes (person) | 1.275 |
(1.102) | |
Political Position (person) | 0.160 |
(0.242) | |
Political Position (partner) | -0.023 |
(0.229) | |
Canton language: France | 1.378 |
(1.039) | |
Canton language: German | 5.515*** |
(2.095) | |
Constant | 4.556** |
(1.899) | |
Observations | 741 |
R2 | 0.123 |
Adjusted R2 | 0.105 |
Residual Std. Error | 7.967 (df = 725) |
F Statistic | 6.778*** (df = 15; 725) |
Note: | p<0.1; p<0.05; p<0.01 |
Alles in allem, zeigen die Modelle aber eine tiefe Varianzaufklärung. Die einzelne Effekte wurden im Text interpretiert. Es soll angemerkt sein, dass zur Interpretation gewisse Vereinfachungen angewandt wurden um das Leseverständnis zu erleichter. Diese Regressionstabelle gilt als Hauptresultate, auch die ergebnisse von weiter oben werden berücksichtigt, dienen aber auch der Robustheit und somit Eindeutigkeit der Ergebnisse.
model_checks <- performance::check_model(model_full)
model_checks
Annahmen nicht perfekt erfüllt, aber in einem tollerierbaren Rahmen, wenn man bedenkt, dass die Fallzahlen hier kleiner werden. Heteroskedaszitität und Normalverteilung der Residuen sind ein Problem, der Trend ist nicht besorgniserregend, aber in einem Rahmen, der (meiner Einschätzung nach) die Interpretation der Resultate erlaubt, besonders unter Berücksichtigung der anderen Analysen.
Die Analyse der Zusammenhänge ist sehr breit. Ziel war es möglichst robuste Ergebnisse zu erhalten, deshalb die vielen Multivariaten Modelle. Da die Interpretation der Ergebnisse im Text für Bildung, Politische Positionierung und Religion in Zusammenhang stehen, wurden für diese Variablen schliesslich Multivariate Modelle geschätzt. Das “ganzheitliche” Modell, das sämtliche Variablen verwendet dient ebenfalls der Robutsheit und dem Ziel nicht die Einflüsse gewisser Variablen zu überschätzen.
Daten visualisierung
Grafik 1: Wochenaufteilung
hw_person <- weighted.mean(shp21_pair$household_time_person, weights = shp21_pair$weights_2_person, na.rm = TRUE)
care_person <- weighted.mean(shp21_pair$care_work_person, weights = shp21_pair$weights_2_person, na.rm = TRUE)
tz_person <- weighted.mean(shp21_pair$tz_rate_person, weights = shp21_pair$weights_2_person, na.rm = TRUE)
paid_work_person <- 42 * (tz_person / 100)
unpaid_work_person <- hw_person + care_person
total_person <- sum(unpaid_work_person, paid_work_person)
hw_partner <- weighted.mean(shp21_pair$household_time_partner, weights = shp21_pair$weights_2_partner, na.rm = TRUE)
care_partner <- weighted.mean(shp21_pair$care_work_partner, weights = shp21_pair$weights_2_partner, na.rm = TRUE)
tz_partner <- weighted.mean(shp21_pair$tz_rate_partner, weights = shp21_pair$weights_2_partner, na.rm = TRUE)
paid_work_partner <- 42 * (tz_partner / 100)
unpaid_work_partner <- hw_partner + care_partner
total_partner <- sum(unpaid_work_partner, paid_work_partner)
###Adding sd
#
# hw_person_sd <- sd(shp21_pair$household_time_person, na.rm = TRUE)
# care_person_sd <- sd(shp21_pair$care_work_person, na.rm = TRUE)
# tz_person_sd <- sd(shp21_pair$tz_rate_person, na.rm = TRUE)
#
# hw_partner_sd <- sd(shp21_pair$household_time_partner, na.rm = TRUE)
# care_partner_sd <- sd(shp21_pair$care_work_partner, na.rm = TRUE)
# tz_partner_sd <- sd(shp21_pair$tz_rate_partner, na.rm = TRUE)
data <- data.frame(
Gender = rep(c("Frauen", "Männer"), each = 2),
WorkType = rep(c("Bezahlte Arbeit", "Unbezahlte Arbeit"), 2),
Hours = c(paid_work_person, unpaid_work_person, paid_work_partner, unpaid_work_partner),
SubType = c(NA, paste0("Hausarbeit: ", round(hw_person, 2), "\nCare Arbeit: ", round(care_person, 2)),
NA, paste0("Hausarbeit: ", round(hw_partner, 2), "\nCare Arbeit: ", round(care_partner, 2)))
)
unbezahlte_data <- data |> filter(WorkType == "Unbezahlte Arbeit")
total_hours <- data.frame(
Gender = c("Frauen", "Männer"),
Hours = c(round(total_person, digits = 1), round(total_partner, digits = 1)),
WorkType = c("Bezahlte Arbeit", "Bezahlte Arbeit") #otherwise it does not work
)
plot_arbeit <- ggplot(data, aes(x = Gender, y = Hours, fill = WorkType)) +
geom_bar(stat = "identity", position = "stack") +
# geom_errorbar(aes(ymin = Hours - sd, ymax = Hours + sd), width = 0.2, position = position_stack(vjust = 0.5)) +
geom_text(data = unbezahlte_data, aes(label = SubType),
position = position_stack(vjust = 0.5), size = 2.5, hjust = 0.5, vjust = 0.25) +
geom_text(data = total_hours, aes(label = paste(Hours, "Stunden")),
position = position_stack(vjust = 1.05), size = 3, vjust = -0.2) +
#geom_text(aes(x = Gender, label = paste("50 Stunden")))+
#c("50 Stunden", "49 Stunden")), vjust = top_frac, size = 3)) +
#geom_text(aes(label= c("a", "b")), vjust=-0.3, size=3.5) +
#geom_text(data = data.frame(Gender = c("Frauen", "Männer"), label = c("a", "b")), aes(x = Gender, y = 0, label = label), vjust = -0.3, size = 3.5) +
labs(
title = "Arbeitsstunden im Vergleich",
y = "Wochenstunden",
fill = " ",
x = " "
) +
theme_minimal() +
scale_fill_manual(values = c("Unbezahlte Arbeit" = "#CC7A8B", "Bezahlte Arbeit" = "#7E587E")) +
theme(
text = element_text(family = "sans", size = 10),
axis.title.y = element_text(size = 8),
title = element_text(size = 12),
plot.title = element_text(hjust = 0.5)
)
plot_arbeit
ggsave(plot_arbeit, filename = "Graphs/plot_arbeit.png", width = 5, height = 3)
Grafik 2: Karten
within_sex_cant <- shp21_pair |>
group_by(cant_nr_person) |>
summarize(
wimean_hw = weighted.mean(hw_widif_person, weights = weights_2_person, na.rm = TRUE),
wimean_care = weighted.mean(care_widif_person, weights = weights_2_person, na.rm = TRUE),
wimean_tz = weighted.mean(tz_widif_person, weights = weights_2_person, na.rm = TRUE),
wimean_sathw = weighted.mean(hwsat_widif_person, weights = weights_2_person, na.rm = TRUE)) |>
ungroup()
within_sex_cant
# A tibble: 26 × 5
cant_nr_person wimean_hw wimean_care wimean_tz wimean_sathw
<int> <dbl> <dbl> <dbl> <dbl>
1 1 7.71 3.18 -22.4 -0.924
2 2 8.72 4.24 -26.8 -0.897
3 3 8.47 6.39 -27.3 -0.874
4 4 11.5 5.12 -35.4 -1.24
5 5 10.9 6.44 -30.6 -0.967
6 6 9.38 5.4 -60 -0.6
7 7 8.85 4.38 -50 -0.867
8 8 6.4 3.13 -26.7 -0.556
9 9 7.90 12.0 -23.1 -0.629
10 10 6.34 6.96 -28.7 -1.06
# ℹ 16 more rows
Diese Tabelle gibt auch gleich die genaueren Infos darüber, wie viele Stunden Unterschied genau in verschiedenen Kantonen ist.
#1. Shapefile von bund
#download: https://www.swisstopo.admin.ch/de/landschaftsmodell-swissboundaries3d#swissBOUNDARIES3D---Download für Zitation
switzerland_shapefile <- "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Karte/swissboundaries3d_2023-01_2056_5728.shp/swissBOUNDARIES3D_1_5_TLM_KANTONSGEBIET.shp"
#2. des als Kartengrundlage mit st_read()
map_sf <- st_read(dsn = switzerland_shapefile)
Reading layer `swissBOUNDARIES3D_1_5_TLM_KANTONSGEBIET' from data source
`C:\Users\camil\Desktop\Aa_MA-Semester1\Forschungsseminar\Karte\swissboundaries3d_2023-01_2056_5728.shp\swissBOUNDARIES3D_1_5_TLM_KANTONSGEBIET.shp'
using driver `ESRI Shapefile'
Simple feature collection with 26 features and 19 fields
Geometry type: MULTIPOLYGON
Dimension: XY, XYZ
Bounding box: xmin: 2485410 ymin: 1075268 xmax: 2833858 ymax: 1295934
z_range: zmin: 193.398 zmax: 4613.686
Projected CRS: CH1903+ / LV95 + LN02 height
#3. joining mapdata mit kantonen
map_sf <- map_sf |>
#Für die navien Schätzer von oben, aber unnötig, so mMn besser
# left_join(tztab_sex_cant, by = c("KANTONSNUM" = "cant_nr")) |>
# left_join(caretab_sex_cant, by = c("KANTONSNUM" = "cant_nr")) |>
# left_join(hwtab_sex_cant, by = c("KANTONSNUM" = "cant_nr")) |>
left_join(within_sex_cant, by = c("KANTONSNUM" = "cant_nr_person"))
#4. Plot -tm_map beste
karte_hw <-
tm_shape(map_sf) +
tm_borders()+
# tm_style("white", frame.lwd = 0, )+
tm_fill(col = "wimean_hw", palette = "Purples", title = "Anzahl mehr geleistete Stunden \npro Woche als Parntner", style = "cont", breaks = c(5, 9, 13)) +
tm_layout(main.title = "Wie viele Stunden Hausarbeit leisten Schweizerinnen mehr als \nihre Partner?")+
tm_layout(main.title.size = 1.8) +
tm_layout(frame = FALSE) + #ACHTUNG GEHT NICHT IN EINE ZEILE
tm_legend(legend.show = TRUE)+
tm_legend(legend.title.size = 1) +
tm_legend(position = c("right", "bottom"))
karte_hw
tmap_save(karte_hw, filename = "Graphs/Karte_hw.png", width = 10, height = 7, dpi = 500)
Map saved to C:\Users\camil\Desktop\Aa_MA-Semester1\Forschungsseminar\finale-Arbeit\Graphs\Karte_hw.png
Resolution: 5000 by 3500 pixels
Size: 10 by 7 inches (500 dpi)
Karten zu anderen Variablen
karte_carearbeit <-
tm_shape(map_sf) +
tm_borders()+
tm_fill(col = "wimean_care", palette = "Purples") +
tm_layout(title = "Unterschiede bei der Care-Arbeit")
tmap_save(karte_carearbeit, filename = "Graphs/Karte_care-work.png", width = 10, height = 8, dpi = 300)
karte_tz <-
tm_shape(map_sf) +
tm_borders()+
tm_fill(col = "wimean_tz", palette = "Greens") +
tm_layout(title = "Frauen gehen öfters in Teilzeit")
tmap_save(karte_tz, filename = "Graphs/Karte_tz.png", width = 10, height = 8, dpi = 300)
palette_2 <- c("#784A8C", "#BD79A3", "#759DC1")
karte_satis <-
tm_shape(map_sf) +
tm_borders()+
tm_fill(col = "wimean_sathw", palette = palette_2, n = 2) +
tm_layout(title = "Frauen sind tendenziell unglücklicher mit der Aufteilung")
tmap_save(karte_satis, filename = "Graphs/Karte_satis.png", width = 10, height = 8, dpi = 300)
Grafik 3: Entwicklung über Zeit
Die Datensätze wurden in einem Loop über mehrere Wellen hinweg zusammengefügt.
shp_time <- read.csv("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB_rc-pair.csv")
shp_time <- shp_time |>
filter(sex_person == "female" & beziehung == "hetero") |>
mutate(age_pers_cat = case_when(
is.na(age_person) ~ NA_character_,
age_person >= 18 & age_person < 35 ~ "18-34",
age_person >= 35 & age_person <= 54 ~ "35-54",
age_person > 55 ~ "55+",
TRUE ~ NA_character_))
# variable.names(shp_time)
shp_time$year_person <- as.numeric(shp_time$year_person)
shp_time$hw_widif_person <- as.numeric(shp_time$hw_widif_person)
shp_time_plot <- shp_time |>
group_by(year_person) |>
summarise(mean_hw_widif = mean(hw_widif_person, na.rm = TRUE))
# Plot für alle daten, aber nicht in Arbeit verwendet
# plot_time <- ggplot(data = shp_time_plot, aes(x = year_person, y = mean_hw_widif)) +
# geom_line() +
# geom_point() +
# labs(x = "Mean Year", y = "Mean hw_widif_person", title = "Housework Difference Over Time by Age Category") +
# scale_x_continuous(breaks = seq(1999, 2022, by = 2)) +
# scale_y_continuous(limits = c(0, max(shp_time_plot$mean_hw_widif) * 1.1))
#
# ggsave(plot_time, filename = "Graphs/plot_time.png", width = 6, height = 3)
shp_time_plotGR <- shp_time |>
filter(!is.na(age_pers_cat)) |>
filter(!is.na(hw_widif_person)) |>
group_by(year_person, age_pers_cat) |>
summarise(
mean_hw_widif = mean(hw_widif_person),
sd_hw_widif = sd(hw_widif_person),
n = n()
) |>
mutate(
se_hw_widif = sd_hw_widif / sqrt(n),
lower_CI = mean_hw_widif - 1.96 * se_hw_widif,
upper_CI = mean_hw_widif + 1.96 * se_hw_widif
) |>
group_by(year_person)
shp_time_plotGR_all <- merge(shp_time_plot, shp_time_plotGR, by = "year_person", all = TRUE)
shp_time_plotGR_all <- shp_time_plotGR_all |>
rename(Mittel = mean_hw_widif.x)
graph_time_all <- ggplot(data = shp_time_plotGR_all, aes(x = year_person)) +
geom_ribbon(aes(ymin = lower_CI, ymax = upper_CI, fill = age_pers_cat), alpha = 0.2, show.legend = FALSE) +
geom_line(aes(y = Mittel, color = "Mittel"), linetype = "dotted") +
geom_line(aes(y = mean_hw_widif.y, group = age_pers_cat, color = age_pers_cat)) +
geom_point(aes(y = mean_hw_widif.y, color = age_pers_cat)) +
labs(x = "", y = "Anzahl zusätzlicher Stunden pro Woche", title = "Entwicklung über die Zeit") +
scale_color_manual(name = "Altersgruppen", values = c("18-34" = "#CC7A8B", "35-54" = "#A52A2A", "55+" = "#7E587E", "Mittel" = "black")) +
scale_fill_manual(name = "Altersgruppen", values = c("18-34" = "#CC7A8B", "35-54" = "#A52A2A", "55+" = "#7E587E")) +
scale_x_continuous(breaks = seq(1999, 2023, by = 3)) +
scale_y_continuous(limits = c(0, max(shp_time_plotGR_all$mean_hw_widif.y) * 1.1)) +
theme_minimal() +
theme(
text = element_text(family = "sans", size = 10),
axis.title.y = element_text(size = 8),
title = element_text(size = 12),
panel.grid = element_line(color = "gray", linetype = "dotted")
)
ggsave(graph_time_all, filename = "Graphs/graph_time_all.png", width = 6, height = 3)
Religösität über Zeit
shp_time <- shp_time |>
mutate(rel_freq_pers_rc = case_when(
is.na(rel_freq_person) ~ NA_real_, # Correcting NA condition
rel_freq_person == "never" ~ 0,
rel_freq_person == "only for family ceremonies" ~ 1,
rel_freq_person == "only for religious celebrations" ~ 2,
rel_freq_person == "religious celebrations and family events" ~ 3,
rel_freq_person == "a few times a year" ~ 4,
rel_freq_person == "about once a month" ~ 5,
rel_freq_person == "every two weeks" ~ 6,
rel_freq_person == "once a week" ~ 7,
rel_freq_person == "several times a week" ~ 8
),
rel_freq_pers_rc <- as.numeric(rel_freq_pers_rc)
)
shp_time_reli <- shp_time |>
group_by(year_person, age_pers_cat) |>
summarise(mean_reli = mean(rel_freq_pers_rc, na.rm = TRUE)) |>
na.omit()
graph_reli <- ggplot(data = shp_time_reli, aes(x = as.factor(year_person))) +
geom_line(aes(y = mean_reli, group = age_pers_cat, color = age_pers_cat)) +
geom_point(aes(y = mean_reli, color = age_pers_cat), size = 2) +
labs(x = "", y = "Häufigkeit der Teilnahme an religiösen Zeremonien", title = "Entwicklung über die Zeit") +
scale_color_manual(name = "Altersgruppen", values = c("18-35" = "#CC7A8B", "35-55" = "#A52A2A", "55+" = "#7E587E")) +
scale_y_continuous(limits = c(0, 8)) +
scale_x_discrete(limits = c("2015", "2018", "2021")) +
theme_minimal() +
theme(
text = element_text(family = "sans", size = 10),
axis.title.y = element_text(size = 8),
title = element_text(size = 12),
panel.grid = element_line(color = "gray", linetype = "dotted")
)
ggsave(graph_reli, filename = "Graphs/graph_reli.png", width = 6, height = 3)
Diese Grafik wurde ebenfalls nicht in der Arbeit verwendet, weil zu wenig Datenpunkte vorhanden waren und es nicht oft genug abgefragt wurde.