Foschungsseminar Politischer Datenjournalismus: Data-Cleaning

Name
Matrikelnummer

Camilla Bellmann

20-732-988

Date

25 June 2024

Da die Datenbereinigung lange Ladezeiten am Arbeitsgerät braucht wurde sie in einem seperaten Skript durchgeführt. Alle verwendeten Daten wurden vom Group (2024) bezogen.

Libraries & set up

library(tidyverse)
library(maps)
library(mapproj)
library(sf)
library(viridis)
library(cowplot)
library(raster)
library(stars)
library(swissdd)
library(ggplot2)
library(foreign)
library(stringr)
library(conflicted)
library(rnaturalearth)
conflicts_prefer(
  dplyr::filter, 
 # dplyr::lag,
  dplyr::select,
  dplyr::mutate 
)
  # setting Working Directory
setwd("C:/Users//camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel")

Reading datasets für 2021

shp_house_2021 <- haven::read_sav("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel/swissubase_932_14_0/data/Data_SPSS/Data_SPSS/SHP-Data-W1-W24-SPSS/W23_2021/SHP21_H_USER.sav")
shp_pers_2021 <- haven::read_sav("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel/swissubase_932_14_0/data/Data_SPSS/Data_SPSS/SHP-Data-W1-W24-SPSS/W23_2021/SHP21_P_USER.sav")
    # joining household and individual data
shp_2021 <- right_join(shp_pers_2021, shp_house_2021, by = "IDHOUS21")

Auswahl und Umbenennung der Variablen

  #nur relevanten Variablen umbenannt und enthalten 

shp_21_breve <- shp_2021 |>
  dplyr::select(id_person = IDPERS,
         id_house = IDHOUS21,
         id_partner = IDSPOU21,
         age = AGE21,
         sex = SEX21,
         civstatus = CIVSTA21,
         child = P21D80,
         Educ_17 = EDCAT21,
         Educ_19 = EDGR21,
         Educ_11_plusgrid = EDUCAT21,
         Educ_11_grid = EDUGR21,
         Educ_11 = EDU_1_21,
         kids_num = NBADUL21,
         canton = CANTON21,
         house_type = HLDFFS21,
         cohab = COHAST21,
      ## Variablen
         tz = P21W39,
         tz_p = P21W42,
         tz_reason = P21W43,
         # opin_working_mom = P21D92,
         care_work = P21F63,
         opin_work = P21F64,
         satis_together_house = P21F03,
         satis_housework_share = P21F04,  
         household_time = P21F08,
      ## Femal Tasks
         # opin_working_mom = P21D92,
         volunt = P21N35,
         volunt_time = P21N38,
      ## Reg. Modell Variablen
         lr_pos = P21P10,         # 0 = left, 10 = right
         party_vote = P21P19,
        # wo_penalised = P21P20 #nur 2020
      ## Religion
         confession = P21R01,
         rel_freq = P21R02,
         rel_party = P21R03,
         rel_frequ = P21R04,
         christ_att = P21R23,
         islam_att = P21R24,
         hindu_att = P21R25,
         budda_att = P21R26,
         jew_att = P21R27,
         atheism_att = P21R28,
         rel_shift = P21R34,
      ## parents 
         lr_dad = P21P46,
         lr_mom = P21P47,
         # alive_dad = P21N82,
         # parctical_dad = P21N88,
         # emo_dad = P21N89,
         # alive_mom = P21N73,
         # practical_mom =  P21N79,
         # emotional_mom = P21N80,
      ## weights
         weights = WI21LS20,
         weights_2 = WI21CSS
        )

Recoding der Variablen für Lesbarkeit.

  # -1 bis -8 sind meistens "don´t know", "inapplicable" etc.
shp_21_breve_rc <- shp_21_breve |>
  mutate(sex = case_when(
      sex == 1 ~ "male",
      sex == 2 ~ "female",
      sex == 3 ~ "other",
      TRUE ~ NA_character_), 
    civstatus = case_when(
      civstatus == 1 ~ "single",
      civstatus == 2 & 6 ~ "married", #registered partnership und married wird als dasselbe gesehen.
      civstatus == 3 & 4 & 7 ~ "seperated", #seperated, divorced, dissolved partnership sind dasselbe.
      civstatus == 5 ~ "widowER",
      TRUE ~ NA_character_),
    lr_mom = case_when(
      lr_mom < 0 ~ NA_real_, TRUE ~ lr_mom),
    lr_dad = case_when(
      lr_dad < 0 ~ NA_real_, TRUE ~ lr_dad),
    household_time = case_when(
      household_time > 80 ~ NA_real_, TRUE ~ household_time),
    confession = case_when(
      confession < 0 ~ NA_real_,
      TRUE ~ confession),
    canton = case_when(
      canton < 0 ~ NA_real_,
      TRUE ~ canton),
    cohab = case_when(
      cohab == 1 ~ "married",
      cohab == 2 ~ "not_married",
      TRUE ~ NA_character_)) |>
  mutate(  # als Faktor machen, damit es die kofessionen anzeigt und nicht nur Nummern
    confession = haven::as_factor(confession),
    canton = haven::as_factor(canton),
    house_type = haven::as_factor(house_type)) |>
    # 2 neue Variablen:
        # tz_rate = Wenn die tz 2 ist (=Vollzeit arbeiten), dann wird es zu 100%, für nicht-vollzeit arbeitende (= 1) gibt es dann das genaue Pensum von Teilzeitarbeit an (also tz_p)
        # cant_nr = dieselbe Nummerierung, wie die des BFS, für die Karten in den Darstellungen.
  mutate(tz_rate = NA, cant_nr = NA, sex_bin = NA, kmk = NA) |>
  mutate(
    tz_rate = case_when(
      tz == 2 ~ 100,
      tz == 1 ~ tz_p,
      TRUE ~ NA_real_),
    sex_bin = case_when(
      sex == "female" ~ "female",
      sex == "male" ~ "male",
      sex == "other" ~ NA_character_),
    cant_nr = case_when(
      canton == "ZH  Zurich" ~ 1,
      canton == "BE  Berne"  ~ 2,
      canton == "LU  Lucerne" ~ 3,
      canton == "UR  Uri" ~ 4,
      canton == "SZ  Schwyz" ~ 5,
      canton == "OW  Obwalden" ~ 6,
      canton == "NW  Nidwalden" ~ 7,
      canton == "GL  Glarus" ~ 8,
      canton == "ZG  Zug" ~ 9,
      canton == "FR  Fribourg" ~ 10,
      canton == "SO  Solothurn" ~ 11,
      canton == "BS  Basle-Town" ~ 12,
      canton == "BL  Basle-Country" ~ 13,
      canton == "SH  Schaffhausen" ~ 14,
      canton == "AR  Appenzell Outer-Rhodes" ~ 15,
      canton == "AI  Appenzell Inner-Rhodes" ~ 16,
      canton == "SG  St. Gall" ~ 17,
      canton == "GR  Grisons" ~ 18,
      canton == "AG  Argovia" ~ 19,
      canton == "TG Thurgovia" ~ 20,
      canton == "TI  Ticino" ~ 21,
      canton == "VD  Vaud" ~ 22,
      canton == "VS  Valais" ~ 23,
      canton == "NE  Neuchatel" ~ 24,
      canton == "GE  Geneva" ~ 25,
      canton == "JU  Jura" ~ 26,
      TRUE ~ NA_real_)) |>
  mutate(kmk = case_when(
       kids_num > 0 ~ "children",
       kids_num == 0 & cohab == "not_married" ~ "together",
       kids_num == 0 & cohab == "married" ~ "married",
      TRUE ~ NA_character_ 
    )
  )

# unique(shp_21_breve_rc$house_type)
#Variable ist nicht eindeutig, weil kids_num für Personen ohne Kinder NA ist, darum kann nicht zwischen wirklichen NAs und Personen ohne Kindern unterschieden werden.

write.csv(shp_21_breve_rc, file = "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_21_recoded.csv", row.names = FALSE)

Couple-Data

Für die Within Couple Analyse muss man zuerst *_person* und *_partner* ergänzen

shp_21_partners <- merge(shp_21_breve_rc, shp_21_breve_rc, by.x = c("id_person", "id_partner"), by.y = c("id_partner", "id_person"), suffixes = c("_person", "_partner"), all = FALSE) #merge, weil Probleme mit inner joint


shp_21_partners <- shp_21_partners |>
  mutate(hw_widif_person = household_time_person - household_time_partner) |>
  mutate(tz_widif_person =  tz_rate_person - tz_rate_partner ) |>
  mutate(care_widif_person = care_work_person - care_work_partner) |>
  mutate(hwsat_widif_person = satis_housework_share_person - satis_housework_share_partner) |>
  mutate(beziehung = case_when(
    sex_bin_partner == sex_bin_person ~ "gleich",
    sex_bin_partner != sex_bin_person ~ "hetero"
  )) |>
  mutate(live_together = case_when(
    id_house_person == id_house_partner ~ "same",
    id_house_person != id_house_partner ~ "dif"
  ))
unique(shp_21_partners$kmk_person)

write.csv(shp_21_partners, file = "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_21_partners.csv")
haven::write_sav(shp_21_partners,  "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_21_partners.sav")

Zeitraum längere Ladedauer

Loop, der überall durch geht und jene Variablen und nimmt (falls vorhanden), die für die Berechnungen benötigt werden. (Note für Replikation: Hat auf meinem Rechner relativ lange gedauert)

shp_data_list_2 <- list()
shp_time_2 <- data.frame()
x <- c("99", "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22")
i <- 1

# Loop durch from 1999 to 2022
for (year in 1999:2022) {
  person_file <- paste0("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel/swissubase_932_14_0/data/Data_SPSS/Data_SPSS/SHP-Data-W1-W24-SPSS/W", i, "_", year, "/SHP", x[i], "_P_USER.sav")

  house_file <- paste0("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel/swissubase_932_14_0/data/Data_SPSS/Data_SPSS/SHP-Data-W1-W24-SPSS/W", i, "_", year, "/SHP", x[i], "_H_USER.sav")
  
  shp_person <- haven::read_sav(person_file)
  shp_house <- haven::read_sav(house_file)
    
  # Variablen namen
      id_house <- paste0("IDHOUS", x[i])
  shp_data_2 <- left_join(shp_person, shp_house, by = id_house, copy = FALSE, suffix = c("", ""))
  
      id_person <- paste0("IDPERS")
      id_partner <- paste0("IDSPOU", x[i])
      age <- paste0("AGE", x[i])
      sex <- paste0("SEX", x[i])
      civstatus <- paste0("CIVSTA", x[i])
      canton <- paste0("CANTON", x[i])
      kids_num <- paste0("NBADUL", x[i])
      cohab <- paste0("COHAST", x[i])
      tz <- paste0("P", x[i], "W39")
      tz_p <- paste0("P", x[i], "W42")
      opin_working_mom <- paste0("P", x[i], "D92")
      care_work <- paste0("P", x[i], "F63")
      satis_housework_share <- paste0("P", x[i], "F04")
      household_time <- paste0("P", x[i], "F08")
      volunt <- paste0("P", x[i], "N35")
      lr_pos <- paste0("P", x[i], "P10")
      party_vote <- paste0("P", x[i], "P19")
      wo_penalised <- paste0("P", x[i], "P20")
      volunt_time <- paste0("P", x[i], "N38")
      confession <- paste0("P", x[i], "R01")
      rel_freq <- paste0("P", x[i], "R02")
      rel_party <- paste0("P", x[i], "R03")
      rel_frequ <- paste0("P", x[i], "R04")
     

    #datasets joinen
  shp_data_2 <- shp_data_2 |>
    dplyr::mutate(
      id_house_1 = if_else("id_house" %in% names(shp_data_2), id_house, NA_character_),
      id_person = if_else("id_person" %in% names(shp_data_2), id_person, NA_character_),
      id_partner = if_else("id_partner" %in% names(shp_data_2), id_partner, NA_character_),
      age = if_else("age" %in% names(shp_data_2), age, NA_character_),
      sex = if_else("sex" %in% names(shp_data_2), sex, NA_character_),
      civstatus = if_else("civstatus" %in% names(shp_data_2), civstatus, NA_character_),
      canton = if_else("canton" %in% names(shp_data_2), canton, NA_character_),
      kids_num = if_else("kids_num" %in% names(shp_data_2), kids_num, NA_character_),
      cohab = if_else("cohab" %in% names(shp_data_2), cohab, NA_character_),
      tz = if_else("tz" %in% names(shp_data_2), tz, NA_character_),
      tz_p = if_else("tz_p" %in% names(shp_data_2), tz_p, NA_character_),
      opin_working_mom = if_else("opin_working_mom" %in% names(shp_data_2), opin_working_mom, NA_character_),
      care_work = if_else("care_work" %in% names(shp_data_2), care_work, NA_character_),
      satis_housework_share = if_else("satis_housework_share" %in% names(shp_data_2), satis_housework_share, NA_character_),
      household_time = if_else("household_time" %in% names(shp_data_2), household_time, NA_character_),
      volunt = if_else("volunt" %in% names(shp_data_2), volunt, NA_character_),
      lr_pos = if_else("lr_pos" %in% names(shp_data_2), lr_pos, NA_character_),
      party_vote = if_else("party_vote" %in% names(shp_data_2), party_vote, NA_character_),
      wo_penalised = if_else("wo_penalised" %in% names(shp_data_2), wo_penalised, NA_character_),
      volunt_time = if_else("volunt_time" %in% names(shp_data_2), volunt_time, NA_character_),
      confession = if_else("confession" %in% names(shp_data_2), confession, NA_character_),
      rel_freq = if_else("rel_freq" %in% names(shp_data_2), rel_freq, NA_character_),
      rel_party = if_else("rel_party" %in% names(shp_data_2), rel_party, NA_character_),
      rel_frequ = if_else("rel_frequ" %in% names(shp_data_2), rel_frequ, NA_character_), 
      year = year,
      wave = i
    ) 

    #For some reason funktioniert des nur, mit base R & nicht tidy
  names(shp_data_2)[which(names(shp_data_2) == "IDPERS")] <- "id_person"
  names(shp_data_2)[which(names(shp_data_2) == paste0("IDHOUS", x[i]))] <- "id_house"
  names(shp_data_2)[which(names(shp_data_2) == paste0("IDSPOU", x[i]))] <- "id_partner"
  names(shp_data_2)[which(names(shp_data_2) == paste0("AGE", x[i]))] <- "age"
  names(shp_data_2)[which(names(shp_data_2) == paste0("SEX", x[i]))] <- "sex"
  names(shp_data_2)[which(names(shp_data_2) == paste0("CIVSTA", x[i]))] <- "civstatus"
  names(shp_data_2)[which(names(shp_data_2) == paste0("CANTON", x[i]))] <- "canton"
  names(shp_data_2)[which(names(shp_data_2) == paste0("NBADUL", x[i]))] <- "kids_num"
  names(shp_data_2)[which(names(shp_data_2) == paste0("COHAST", x[i]))] <- "cohab"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "W39"))] <- "tz"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "W42"))] <- "tz_p"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "D92"))] <- "opin_working_mom"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "F63"))] <- "care_work"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "F04"))] <- "satis_housework_share"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "F08"))] <- "household_time"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "N35"))] <- "volunt"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "P10"))] <- "lr_pos"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "P19"))] <- "party_vote"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "P20"))] <- "wo_penalised"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "N38"))] <- "volunt_time"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "R01"))] <- "confession"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "R02"))] <- "rel_freq"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "R03"))] <- "rel_party"
  names(shp_data_2)[which(names(shp_data_2) == paste0("P", x[i], "R04"))] <- "rel_frequ"

  #Anführungszeichen sind hier wichtig!
  shp_data_2 <- shp_data_2 |>
    select("year", "wave", "id_person", "id_house", "id_partner", "age", "sex", "civstatus", "canton", "kids_num", "cohab", "tz", "tz_p", "opin_working_mom", "care_work", "satis_housework_share", "household_time", "volunt", "lr_pos", "party_vote", "wo_penalised", "volunt_time", "confession", "rel_freq", "rel_party", "rel_frequ") 
  # |>
  #   as_factor()
 
    shp_data_list_2[[i]] <- shp_data_2
    i = i+1
}

saveRDS(shp_data_list_2, file = "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/shp_time_some-Variables_2RDS.rds")

#rowbind
i <- 24
shp_long_CB <- data.frame()
df <- data.frame()
for (i in 1:24)  {
  df <- as.data.frame(shp_data_list_2[[i]])
  df <- df |> haven::as_factor()
  shp_long_CB <- dplyr::bind_rows(shp_long_CB, df)
  i = (i - 1)
  }


write.csv(shp_long_CB, file = "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB.csv", row.names = FALSE)
saveRDS(shp_long_CB, "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB_rds.rds" )

Recoding der Zeitanalyse:

shp_long_CB <- read.csv("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB.csv")


shp_long_CB_rc <- shp_long_CB |>
  mutate(sex = case_when(
      sex == "man" ~ "male",
      sex == "woman" ~ "female",
      sex == "other" ~ "other",
      TRUE ~ NA_character_), 
  civstatus = case_when(
        civstatus == "single, never married" ~ "single",
        civstatus == "married" | civstatus == "registered partnership" ~ "married",
        civstatus == "dissolved partnership" | civstatus == "divorced" | civstatus == "separated" ~ "separated",
        civstatus == "widower/widow" ~ "widower",
        TRUE ~ NA_character_),
    household_time = case_when(
      household_time > 80 ~ NA_real_, TRUE ~ household_time))|>
  mutate(tz_rate = NA, cant_nr = NA, sex_bin = NA, kmk = NA) |>
  mutate(
    tz_rate = case_when(
      tz == 2 ~ 100,
      tz == 1 ~ tz_p,
      TRUE ~ NA_real_),
    sex_bin = case_when(
      sex == "female" ~ "female",
      sex == "male" ~ "male",
      sex == "other" ~ NA_character_),
    cant_nr = case_when(
      canton == "ZH  Zurich" ~ 1,
      canton == "BE  Berne"  ~ 2,
      canton == "LU  Lucerne" ~ 3,
      canton == "UR  Uri" ~ 4,
      canton == "SZ  Schwyz" ~ 5,
      canton == "OW  Obwalden" ~ 6,
      canton == "NW  Nidwalden" ~ 7,
      canton == "GL  Glarus" ~ 8,
      canton == "ZG  Zug" ~ 9,
      canton == "FR  Fribourg" ~ 10,
      canton == "SO  Solothurn" ~ 11,
      canton == "BS  Basle-Town" ~ 12,
      canton == "BL  Basle-Country" ~ 13,
      canton == "SH  Schaffhausen" ~ 14,
      canton == "AR  Appenzell Outer-Rhodes" ~ 15,
      canton == "AI  Appenzell Inner-Rhodes" ~ 16,
      canton == "SG  St. Gall" ~ 17,
      canton == "GR  Grisons" ~ 18,
      canton == "AG  Argovia" ~ 19,
      canton == "TG Thurgovia" ~ 20,
      canton == "TI  Ticino" ~ 21,
      canton == "VD  Vaud" ~ 22,
      canton == "VS  Valais" ~ 23,
      canton == "NE  Neuchatel" ~ 24,
      canton == "GE  Geneva" ~ 25,
      canton == "JU  Jura" ~ 26,
      TRUE ~ NA_real_),
  kmk = case_when(
    cohab == "not married" & kids_num == 0 ~ "together",
    cohab == "married" & kids_num == 0 ~ "married",
    kids_num > 0 ~ "children",
    TRUE ~ NA_character_ ))


write.csv(shp_long_CB_rc, "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB_recoded.csv", row.names = FALSE)

Couple-Data-Zeit

shp_long_CB_rc <- read.csv("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB_recoded.csv")

shp_long_partners <- merge(shp_long_CB_rc, shp_long_CB_rc, by.x = c("id_person", "id_partner"), by.y = c("id_partner", "id_person"), suffixes = c("_person", "_partner"), all = FALSE)


#openxlsx::write.xlsx(shp_21_partners, "partners_shp.xlsx")

shp_long_partners <- shp_long_partners |>
  mutate(hw_widif_person = household_time_person - household_time_partner) |>
  mutate(tz_widif_person =  tz_rate_person - tz_rate_partner ) |>
  mutate(care_widif_person = care_work_person - care_work_partner) |>
  #mutate(hwsat_widif_person = satis_housework_share_person - satis_housework_share_partner) |>
  mutate(beziehung = case_when(
    sex_bin_partner == sex_bin_person ~ "gleich",
    sex_bin_partner != sex_bin_person ~ "hetero"
  )) |>
  mutate(live_together = case_when(
    id_house_person == id_house_partner ~ "same",
    id_house_person != id_house_partner ~ "dif"
  ))

write.csv(shp_long_partners, "C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/finale-Arbeit/Data/shp_time-long_CB_rc-pair.csv", row.names = FALSE)

Gewichtung for Zeitanalyse

Die Gewichtungsvariablen könnten nicht entnommen werden. Es gibt keine einheitliche Gewichtungsvariable über die Zeit. Die IDINT entspricht nicht den IDs der einzelnen Wellen. Die Gewichtungen sind nicht für die Wellen 1 bis 24 einheitlich. Deshalb muss auf eine Gewichtung verzichtet werden. Der Gewichtungsreport des Group (2024) legt nahe, dass einheitliche Gewichtungen erst ab Welle 16 vorhanden sind.

shp_house_long <- haven::read_sav("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel/swissubase_932_14_0/data/Data_SPSS/Data_SPSS/SHP-Data-Longfile-SPSS/SHPLONG_H_USER.sav")
shp_person_long <- haven::read_sav("C:/Users/camil/Desktop/Aa_MA-Semester1/Forschungsseminar/Swisshouseholdpanel/swissubase_932_14_0/data/Data_SPSS/Data_SPSS/SHP-Data-Longfile-SPSS/SHPLONG_H_USER.sav")

    # joining household and individual data
shp_long <- left_join(shp_house_long, shp_person_long, by = c("IDHOUS", "YEAR"), copy = FALSE, suffix = c("", ""))
summary(shp_long$HWEIGHT) #Gewichtungsvariable!!

shp_weights <- shp_long |>
  dplyr::select(id_person = IDINT, 
                year = YEAR,
                weights_long = HWEIGHT) #IDINT und id_person entsprechen nicht einander.
  
shp_time_w <- merge(shp_long_CB, shp_weights, by = c("id_person", "year"), copy = FALSE, suffix = c("", ""))

#nicht möglich, zu wenig Observationen?
variable.names(shp_person_long)

Bibliografie

Group, SHP. 2024. “Living in Switzerland Waves 1-24 (Including a Long File) + Covid 19 Data.” [FORS Center]. https://doi.org/10.48573/58NW-6A50.