SHP-Daten importieren & mergen
# SHP Daten importieren ----
# Container/allgemeine Objekte für den Loop erstellen
survey.year.container2 <- c("02","03","04","05","06","07","08","09",as.character(c(10:22)))
survey.year.container4 <- 2002:2022
dataset.year.container.num <- 4:24
# Erstellen einer leeren Liste zum Speichern der Datensätze
dataset_list <- list()
# Loop zum Laden jedes Datensatzes und zur Auswahl der relevanten Variablen
for(i in 1:21){
# Datenpfade generieren
file_path_p <- paste0("shp/data/Data_STATA/SHP-Data-W1-W24-STATA/W",
dataset.year.container.num[i],"_",
survey.year.container4[i],"/shp",
survey.year.container2[i],"_p_user.dta")
file_path_h <- paste0("shp/data/Data_STATA/SHP-Data-W1-W24-STATA/W",
dataset.year.container.num[i],"_",
survey.year.container4[i],"/shp",
survey.year.container2[i],"_h_user.dta")
# Datensätze einlesen
dataset_p <- read_dta(file_path_p)
dataset_h <- read_dta(file_path_h)
# Hinzufügen einer Variable "Jahr" zum Datensatz_p
dataset_p$year <- survey.year.container4[i]
# Hinzufügen von NA-Variablen zum dataset_p, damit die Umbenennung für diejenigen Variablen funktioniert,
# die nicht jedes Jahr enthalten sind
dataset_p$MISSING_workstart <- NA
dataset_p$MISSING_jobsec <- NA
dataset_p$MISSING_jobchangereas <- NA
dataset_p$MISSING_satjob <- NA
dataset_p$MISSING_wellmental <- NA
dataset_p$MISSING_wellphysical <- NA
dataset_p$MISSING_wellsocial <- NA
# Umbenennen der Variablen von Interesse (identische Namen für alle dfs/Erhebungsjahre vergeben)
dataset_p <- dataset_p %>%
rename(YEAR = year, # survey year
IDPERS = idpers, # "Identification number of person"
IDHOUS = paste0("idhous", survey.year.container2[i]), # "Identification number of household"
# soziodemografische Daten
SEX = paste0("sex", survey.year.container2[i]), # "Sex"
AGE = paste0("age", survey.year.container2[i]), # "Age in year of interview"
CIVIL = paste0("civsta", survey.year.container2[i]), # "Civil status in year of interview"
KIDS_N = paste0("ownkid", survey.year.container2[i]), #" Number of children born"
EDU_YEAR = paste0("edyear", survey.year.container2[i]), # "Years of Education based on ISCED Classification"
EDU_HIGH = paste0("educat", survey.year.container2[i]), # "Highest level of education achieved, grid + individual 11 codes"
# Infos zur Berufssituation
WORKSTAT = paste0("wstat", survey.year.container2[i]), # "Working status"
WORKSTAT_DET = paste0("occupa", survey.year.container2[i]), # "Actual occupation, from grid"
WORKSTART = if_else(paste0("p", survey.year.container2[i], "w608") %in% names(dataset_p),
paste0("p", survey.year.container2[i], "w608"), "MISSING_workstart"), # "Alter beim Beginn erster
# regelm. Arbeit"
JOB_TITLE = paste0("is4maj", survey.year.container2[i]), # "ISCO classification: Main current job: 4-digit-position"
JOB_FIELD = paste0("noga2m", survey.year.container2[i]), # "Current main job: Nomenclature of economic activities"
JOB_SOCIOPROF = paste0("cspmaj", survey.year.container2[i]), # "Swiss socio-professional category: Main job"
JOB_SEC = if_else(paste0("p", survey.year.container2[i], "w86a") %in% names(dataset_p),
paste0("p", survey.year.container2[i], "w86a"), "MISSING_jobsec"), # "CMJ: Job security: Estimation"
JOB_CHANGE = paste0("p", survey.year.container2[i], "w18"), # "Change of employer in the last 12 months"
JOB_CHANGE_REAS = if_else(paste0("p", survey.year.container2[i], "w600") %in% names(dataset_p),
paste0("p", survey.year.container2[i], "w600"), "MISSING_jobchangereas"), # "Change of job or employer:
# first reason"
INCOME_NET = paste0("i", survey.year.container2[i], "wyn"), # "Yearly work income, net"
PARTTIME = paste0("p", survey.year.container2[i], "w39"), # "CMJ: Parttime or Fulltime"
PARTTIME_PERC = paste0("p", survey.year.container2[i], "w42"), # "CMJ: Percentage of Parttime"
WORK_HOURS = paste0("p", survey.year.container2[i], "w77"), # "Number of hours worked per week"
WORK_HOURS_CONTRACT = paste0("p", survey.year.container2[i], "w74"), # "Aktuelle Haupttätigkeit:
# Anzahl vereinbarter Arbeitsstunden pro Woche"
# Infos zur Work-Life-Balance
INTERFER_WL = paste0("p", survey.year.container2[i], "f50"), # "Interference work <-> private activities/family obligations"
EXHAUST = paste0("p", survey.year.container2[i], "f51"), # "Exhausted after work to do what you would like"
DISCONNECT = paste0("p", survey.year.container2[i], "f52"), # "How difficult to disconnect from work"
# Informationen zur (psychischen) Gesundheit
WELL_MENTAL = if_else(paste0("x", survey.year.container2[i], "c15") %in% names(dataset_p),
paste0("x", survey.year.container2[i], "c15"), "MISSING_wellmental"), # "Mental well-being", only from 2016
WELL_PHYSICAL = if_else(paste0("x", survey.year.container2[i], "c16") %in% names(dataset_p),
paste0("x", survey.year.container2[i], "c16"), "MISSING_wellphysical"), # "Physical well-being",
# only from 2016
WELL_SOCIAL = if_else(paste0("x", survey.year.container2[i], "c17") %in% names(dataset_p),
paste0("x", survey.year.container2[i], "c17"), "MISSING_wellsocial"), # "Social well-being", only from 2016
HEALTHSTAT = paste0("p", survey.year.container2[i], "c01"), # "Health status"
DEPRESS = paste0("p", survey.year.container2[i], "c17"), # "Depression, blues, anxiety: Frequency"
OPTIM = paste0("p", survey.year.container2[i], "c18"), # "Frequency of energy and optimism"
# Infos zur Zufriedenheit
SAT_JOB = if_else(paste0("p", survey.year.container2[i], "w228") %in% names(dataset_p),
paste0("p", survey.year.container2[i], "w228"), "MISSING_satjob"), # "Satisfaction with job in general",
# only available from 2004
SAT_FIN = paste0("p", survey.year.container2[i], "i01"), # "Satisfaction with financial situation"
SAT_LIFE = paste0("p", survey.year.container2[i], "c44"), # "Satisfaction with life in general"
SAT_FREE = paste0("p", survey.year.container2[i], "a05"), # "Satisfaction with free time"
SAT_HOBBY = paste0("p", survey.year.container2[i], "a06"), # "Satisfaction with leisure activities"
SAT_RELATIONS = paste0("p", survey.year.container2[i], "ql04"), # "Satisfaction with personal relationships"
#WEIGHT_1 = paste0("wi", survey.year.container2[i], "ls20"),
#WEIGHT_2 = paste0("wi", survey.year.container2[i], "css")
)
dataset_h <- dataset_h %>%
rename(IDHOUS = paste0("idhous", survey.year.container2[i]),
CANTON = paste0("canton", survey.year.container2[i]),
REGION = paste0("region", survey.year.container2[i]),
COMTYP = paste0("com2_", survey.year.container2[i]), # "Community typology 2"
HTYP = paste0("hldtyp", survey.year.container2[i]), # "Household typology"
HFINSIT = paste0("h", survey.year.container2[i], "i51"), # "Financial situation manageable"
)
# Variablen von Interesse auswählen
dataset_p <- dataset_p %>%
select("YEAR", "IDPERS", "IDHOUS",
"SEX",
"AGE", "CIVIL", "KIDS_N", "EDU_YEAR", "EDU_HIGH",
"INCOME_NET",
"WORKSTAT", "WORKSTAT_DET", "WORKSTART", "JOB_TITLE", "JOB_FIELD", "JOB_SOCIOPROF",
"JOB_SEC", "JOB_CHANGE", "JOB_CHANGE_REAS",
"PARTTIME", "PARTTIME_PERC",
"WORK_HOURS", "WORK_HOURS_CONTRACT",
"INTERFER_WL", "EXHAUST", "DISCONNECT",
"HEALTHSTAT","DEPRESS", "OPTIM",
"WELL_MENTAL", "WELL_PHYSICAL", "WELL_SOCIAL",
"SAT_JOB", "SAT_FIN",
"SAT_LIFE", "SAT_FREE", "SAT_HOBBY", "SAT_RELATIONS")
dataset_h <- dataset_h %>%
select("IDHOUS", "CANTON", "REGION", "COMTYP", "HTYP", "HFINSIT")
# Zusammenführen von dataset_p und dataset_h
dataset <- left_join(dataset_p, dataset_h, by = "IDHOUS")
# Den Datensatz einem Listenelement zuordnen
dataset_list[[i]] <- dataset
# Jedem Datensatz einen Namen zuweisen
names(dataset_list)[i] <- paste0("df_", survey.year.container2[i])
}
# Extrahieren von Datensätzen aus der Liste in die Environment
list2env(dataset_list, envir = .GlobalEnv)
## <environment: R_GlobalEnv>
# Daten zusammenfügen ----
shp_merged <- bind_rows(df_02, df_03, df_04, df_05, df_06, df_07, df_08, df_09,
df_10, df_11, df_12, df_13, df_14, df_15, df_16, df_17,
df_18, df_19, df_20, df_21, df_22)
Daten inspizieren
str(shp_merged)
## tibble [292,960 × 43] (S3: tbl_df/tbl/data.frame)
## $ YEAR : int [1:292960] 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 ...
## $ IDPERS : dbl+lbl [1:292960] 4101, 4102, 4103, 4104, 4105, 5101, 11101, 11...
## ..@ label : chr "Identification number of person"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:5] -8 -7 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:5] "other error" "filter error" "inapplicable" "no answer" ...
## $ IDHOUS : dbl+lbl [1:292960] 41, 41, 41, 41, 41, 51, 111, 111, 111, 131, 1...
## ..@ labels: Named num [1:5] -8 -7 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:5] "other error" "filter error" "inapplicable" "no answer" ...
## ..@ label : chr "Identification number of household"
## $ SEX : dbl+lbl [1:292960] 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, ...
## ..@ label : chr "Sex"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:8] -8 -7 -3 -2 -1 1 2 3
## .. ..- attr(*, "names")= chr [1:8] "other error" "filter error" "inapplicable" "no answer" ...
## $ AGE : dbl+lbl [1:292960] 37, 34, 11, 9, 6, 41, 77, 74, 40, 30, 29, 80, 77,...
## ..@ label : chr "Age in year of interview"
## ..@ format.stata: chr "%18.0g"
## ..@ labels : Named num [1:6] -8 -7 -4 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:6] "other error" "filter error" "no personal income" "inapplicable" ...
## $ CIVIL : dbl+lbl [1:292960] 2, 2, 1, 1, 1, 4, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, ...
## ..@ labels: Named num [1:12] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:12] "other error" "filter error" "inapplicable" "no answer" ...
## ..@ label : chr "Civil status in year of interview"
## $ KIDS_N : dbl+lbl [1:292960] 3, 3, -3, -3, -3, 2, -3, -3, -3, -3, 2, 1, 0,...
## ..@ label : chr "Number of children born"
## ..@ format.stata: chr "%18.0g"
## ..@ labels : Named num [1:6] -8 -7 -4 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:6] "other error" "filter error" "no personal income" "inapplicable" ...
## $ EDU_YEAR : dbl+lbl [1:292960] 16, 10, 0, 0, 9, 16, 10, 8, 12, 12, 12, 9, 12,...
## ..@ label : chr "Years of Education based on ISCED Classification"
## ..@ format.stata: chr "%35.0g"
## ..@ labels : Named num [1:6] -8 -7 -6 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:6] "other error" "filter error" "Specialized school for handicapped" "inapplicable" ...
## $ EDU_HIGH : dbl+lbl [1:292960] 8, 1, -5, -5, 1, 9, 3, 0, 4, 4, 4, 1, 4,...
## ..@ label : chr "Highest level of education achieved, grid + individual 11 codes"
## ..@ format.stata: chr "%67.0g"
## ..@ labels : Named num [1:19] -8 -7 -6 -5 -4 -3 -2 -1 0 1 ...
## .. ..- attr(*, "names")= chr [1:19] "other error" "filter error" "specialized school for handicapped" "pre-obligatory schooling" ...
## $ INCOME_NET : dbl+lbl [1:292960] 87400, -3, -3, -3, -3, 84000, ...
## ..@ label : chr "Yearly work income, net"
## ..@ format.stata: chr "%27.0g"
## ..@ labels : Named num [1:7] -8 -7 -5 -4 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:7] "other error" "filter error" "irregular, difficult to say" "no personal income" ...
## $ WORKSTAT : dbl+lbl [1:292960] 1, 3, -3, -3, -3, 1, -3, -3, -3, -3, 1, 3, 3,...
## ..@ label : chr "Working status"
## ..@ format.stata: chr "%18.0g"
## ..@ labels : Named num [1:8] -8 -7 -3 -2 -1 1 2 3
## .. ..- attr(*, "names")= chr [1:8] "other error" "filter error" "inapplicable" "no answer" ...
## $ WORKSTAT_DET : dbl+lbl [1:292960] 1, 7, 4, 4, 4, 1, 8, 8, 1, 1, 2, 8, 8,...
## ..@ labels: Named num [1:16] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## ..@ label : chr "Actual occupation, from grid"
## $ WORKSTART : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ label : chr "First regular job: at what age"
## ..@ format.stata: chr "%55.0g"
## ..@ labels : Named num [1:7] -9 -8 -7 -3 -2 ...
## .. ..- attr(*, "names")= chr [1:7] "Never worked" "other error" "filter error" "inapplicable" ...
## $ JOB_TITLE : dbl+lbl [1:292960] 1321, -3, -3, -3, -3, 2144, -3, -3, -...
## ..@ label : chr "ISCO classification: Main current job: 4-digit-position"
## ..@ format.stata: chr "%81.0g"
## ..@ labels : Named num [1:597] -9 -8 -3 -2 0 1 2 3 11 21 ...
## .. ..- attr(*, "names")= chr [1:597] "no corresponding ISCO-value" "other error, unplausible value" "inapplicable" "no answer" ...
## $ JOB_FIELD : dbl+lbl [1:292960] -3, -3, -3, -3, -3, 13, -3, -3, -3, -3, 7, -3, -3,...
## ..@ label : chr "Current main job: Nomenclature of economic activities"
## ..@ format.stata: chr "%57.0g"
## ..@ labels : Named num [1:22] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:22] "other error" "filter error" "inapplicable" "no answer" ...
## $ JOB_SOCIOPROF : dbl+lbl [1:292960] 4, -3, -3, -3, -3, 5, -3, -3, -3, -3, 6, -3, -3,...
## ..@ label : chr "Swiss socio-professional category: Main job"
## ..@ format.stata: chr "%42.0g"
## ..@ labels : Named num [1:12] -4 -3 -2 -1 1 2 3 4 5 6 ...
## .. ..- attr(*, "names")= chr [1:12] "active occupied but not classified" "inapplicable" "no answer" "does not know" ...
## $ JOB_SEC : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ label : chr "CMJ: Job security: Estimation"
## ..@ format.stata: chr "%14.0g"
## ..@ labels : Named num [1:9] -8 -7 -3 -2 -1 1 2 3 4
## .. ..- attr(*, "names")= chr [1:9] "other error" "filter error" "inapplicable" "no answer" ...
## $ JOB_CHANGE : dbl+lbl [1:292960] 4, -3, -3, -3, -3, 4, -3, -3, -3, -3, 4, -3, -3,...
## ..@ label : chr "Change of job or employer: Last 12 months"
## ..@ format.stata: chr "%39.0g"
## ..@ labels : Named num [1:9] -8 -7 -3 -2 -1 1 2 3 4
## .. ..- attr(*, "names")= chr [1:9] "other error" "filter error" "inapplicable" "no answer" ...
## $ JOB_CHANGE_REAS : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ labels: Named num [1:21] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:21] "other error" "filter error" "inapplicable" "no answer" ...
## ..@ label : chr "Change of job or employer: first reason"
## $ PARTTIME : dbl+lbl [1:292960] 2, -3, -3, -3, -3, 1, -3, -3, -3, -3, 1, -3, -3,...
## ..@ label : chr "CMJ: Part-time or Full-time"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:7] -8 -7 -3 -2 -1 1 2
## .. ..- attr(*, "names")= chr [1:7] "other error" "filter error" "inapplicable" "no answer" ...
## $ PARTTIME_PERC : dbl+lbl [1:292960] -3, -3, -3, -3, -3, 80, -3, -3, -3, -3, 85, -3, -3,...
## ..@ label : chr "CMJ: Percentage of part-time work"
## ..@ format.stata: chr "%18.0g"
## ..@ labels : Named num [1:6] -8 -7 -4 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:6] "other error" "filter error" "no personal income" "inapplicable" ...
## $ WORK_HOURS : dbl+lbl [1:292960] 42, -3, -3, -3, -3, 42, -3, -3, -3, -3, 35, -3, -3,...
## ..@ label : chr "CMJ: Number of hours worked per week"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:6] -8 -7 -5 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:6] "other error" "filter error" "number of hours vary" "inapplicable" ...
## $ WORK_HOURS_CONTRACT: dbl+lbl [1:292960] 40, -3, -3, -3, -3, 40, -3, -3, -3, -3, 35, -3, -3,...
## ..@ label : chr "CMJ: N contractual hours per week"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:6] -8 -7 -5 -3 -2 -1
## .. ..- attr(*, "names")= chr [1:6] "other error" "filter error" "number of hours vary" "inapplicable" ...
## $ INTERFER_WL : dbl+lbl [1:292960] 4, -3, -3, -3, -3, 5, -3, -3, -3, -3, 2, -3, -3,...
## ..@ label : chr "Interference work <-> private activities/family obligations"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ EXHAUST : dbl+lbl [1:292960] 4, -3, -3, -3, -3, 7, -3, -3, -3, -3, 2, -3, -3,...
## ..@ label : chr "Exhausted after work to do what you would like"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ DISCONNECT : dbl+lbl [1:292960] 2, -3, -3, -3, -3, 5, -3, -3, -3, -3, 4, -3, -3,...
## ..@ label : chr "How difficult to deconnect from work"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ HEALTHSTAT : dbl+lbl [1:292960] 1, 1, -3, -3, -3, 2, -3, -3, -3, -3, 2, 3, 2,...
## ..@ label : chr "Health status"
## ..@ format.stata: chr "%16.0g"
## ..@ labels : Named num [1:10] -8 -7 -3 -2 -1 1 2 3 4 5
## .. ..- attr(*, "names")= chr [1:10] "other error" "filter error" "inapplicable" "no answer" ...
## $ DEPRESS : dbl+lbl [1:292960] 0, 0, -3, -3, -3, 1, -3, -3, -3, -3, 3, 0, 0,...
## ..@ label : chr "Depression, blues, anxiety: Frequency"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ OPTIM : dbl+lbl [1:292960] 8, 10, -3, -3, -3, 7, -3, -3, -3, -3, 7, 5, 10,...
## ..@ label : chr "Frequency of energy and optimism"
## ..@ format.stata: chr "%13.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ WELL_MENTAL : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ label : chr "Mental well-being"
## ..@ format.stata: chr "%8.0g"
## ..@ labels : Named num [1:7] -8 -7 -3 -2 -1 0 10
## .. ..- attr(*, "names")= chr [1:7] "other error" "filter error" "inapplicable" "no answer" ...
## $ WELL_PHYSICAL : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ label : chr "Physical well-being"
## ..@ format.stata: chr "%8.0g"
## ..@ labels : Named num [1:7] -8 -7 -3 -2 -1 0 10
## .. ..- attr(*, "names")= chr [1:7] "other error" "filter error" "inapplicable" "no answer" ...
## $ WELL_SOCIAL : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ label : chr "Social well-being"
## ..@ format.stata: chr "%8.0g"
## ..@ labels : Named num [1:7] -8 -7 -3 -2 -1 0 10
## .. ..- attr(*, "names")= chr [1:7] "other error" "filter error" "inapplicable" "no answer" ...
## $ SAT_JOB : dbl+lbl [1:292960] NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## ..@ label : chr "CMJ: Satisfaction: Job in general"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ SAT_FIN : dbl+lbl [1:292960] 7, 9, -3, -3, -3, 6, -3, -3, -3, -3, 8, 10, 10,...
## ..@ label : chr "Satisfaction with financial situation"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ SAT_LIFE : dbl+lbl [1:292960] 10, 10, -3, -3, -3, 8, -3, -3, -3, -3, 9, 5, 10,...
## ..@ label : chr "Satisfaction with life in general"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ SAT_FREE : dbl+lbl [1:292960] 8, 8, -3, -3, -3, 6, -3, -3, -3, -3, 8, 10, 10,...
## ..@ label : chr "Satisfaction with free time"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ SAT_HOBBY : dbl+lbl [1:292960] 7, 10, -3, -3, -3, 8, -3, -3, -3, -3, 9, 10, 10,...
## ..@ label : chr "Satisfaction with leisure activities"
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## $ SAT_RELATIONS : dbl+lbl [1:292960] 10, 10, -3, -3, -3, 6, -3, -3, -3, -3, 9, 10, 10,...
## ..@ labels: Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
## ..@ label : chr "Satisfaction with personal relationships"
## $ CANTON : dbl+lbl [1:292960] 8, 8, 8, 8, 8, 8, 24, 24, 24, 8, 8, 24, 24,...
## ..@ label : chr "Canton of residence"
## ..@ format.stata: chr "%26.0g"
## ..@ labels : Named num [1:31] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:31] "other error" "filter error" "inapplicable" "no answer" ...
## $ REGION : dbl+lbl [1:292960] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## ..@ label : chr "Region of residence"
## ..@ format.stata: chr "%46.0g"
## ..@ labels : Named num [1:12] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:12] "other error" "filter error" "inapplicable" "no answer" ...
## $ COMTYP : dbl+lbl [1:292960] 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 5, 5, 1, 1, 1, 1, ...
## ..@ label : chr "Community typology 2"
## ..@ format.stata: chr "%39.0g"
## ..@ labels : Named num [1:14] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:14] "other error" "filter error" "inapplicable" "no answer" ...
## $ HTYP : dbl+lbl [1:292960] 10, 10, 10, 10, 10, 2, 11, 11, 11, 7, 7, 6, 6,...
## ..@ label : chr "Type of household (PACO)"
## ..@ format.stata: chr "%57.0g"
## ..@ labels : Named num [1:18] -8 -7 -3 -2 -1 1 2 3 4 5 ...
## .. ..- attr(*, "names")= chr [1:18] "other error" "filter error" "inapplicable" "no answer" ...
## $ HFINSIT : dbl+lbl [1:292960] 7, 7, 7, 7, 7, 7, -3, -3, -3, 7, 7, 10, 10,...
## ..@ label : chr "Financial situation manageable"
## ..@ format.stata: chr "%21.0g"
## ..@ labels : Named num [1:16] -8 -7 -3 -2 -1 0 1 2 3 4 ...
## .. ..- attr(*, "names")= chr [1:16] "other error" "filter error" "inapplicable" "no answer" ...
summary(shp_merged)
## YEAR IDPERS IDHOUS SEX
## Min. :2002 Min. :4.101e+03 Min. : 41 Min. :-3.000
## 1st Qu.:2008 1st Qu.:7.343e+06 1st Qu.: 73431 1st Qu.: 1.000
## Median :2014 Median :2.029e+07 Median : 202901 Median : 2.000
## Mean :2013 Mean :2.541e+07 Mean : 255846 Mean : 1.513
## 3rd Qu.:2019 3rd Qu.:4.392e+07 3rd Qu.: 439331 3rd Qu.: 2.000
## Max. :2022 Max. :1.637e+09 Max. :16374512 Max. : 3.000
##
## AGE CIVIL KIDS_N EDU_YEAR
## Min. : -3.00 Min. :-8.000 Min. :-3.0000 Min. :-6.00
## 1st Qu.: 21.00 1st Qu.: 1.000 1st Qu.:-3.0000 1st Qu.: 9.00
## Median : 44.00 Median : 2.000 Median : 0.0000 Median :12.00
## Mean : 41.66 Mean : 1.839 Mean :-0.2326 Mean :11.09
## 3rd Qu.: 59.00 3rd Qu.: 2.000 3rd Qu.: 2.0000 3rd Qu.:16.00
## Max. :105.00 Max. : 7.000 Max. :38.0000 Max. :21.00
##
## EDU_HIGH INCOME_NET WORKSTAT WORKSTAT_DET
## Min. :-6.000 Min. : -8 Min. :-3.00000 Min. :-3.000
## 1st Qu.: 1.000 1st Qu.: -3 1st Qu.:-3.00000 1st Qu.: 1.000
## Median : 4.000 Median : -3 Median : 1.00000 Median : 4.000
## Mean : 3.902 Mean : 23243 Mean :-0.06707 Mean : 3.953
## 3rd Qu.: 7.000 3rd Qu.: 39000 3rd Qu.: 1.00000 3rd Qu.: 7.000
## Max. :10.000 Max. :2430000 Max. : 3.00000 Max. :11.000
##
## WORKSTART JOB_TITLE JOB_FIELD JOB_SOCIOPROF
## Min. :-9.000 Min. : -9 Min. :-3.000 Min. :-4.0000
## 1st Qu.:-3.000 1st Qu.: -3 1st Qu.:-3.000 1st Qu.:-3.0000
## Median :16.000 Median : -3 Median :-3.000 Median :-3.0000
## Mean : 9.506 Mean :1627 Mean : 1.885 Mean : 0.2529
## 3rd Qu.:18.000 3rd Qu.:3000 3rd Qu.: 8.000 3rd Qu.: 4.0000
## Max. :96.000 Max. :9629 Max. :17.000 Max. : 8.0000
## NA's :18013
## JOB_SEC JOB_CHANGE JOB_CHANGE_REAS PARTTIME
## Min. :-3.000 Min. :-3.0000 Min. :-3.000 Min. :-3.000
## 1st Qu.:-3.000 1st Qu.:-3.0000 1st Qu.:-3.000 1st Qu.:-3.000
## Median :-3.000 Median :-3.0000 Median :-3.000 Median :-3.000
## Mean :-1.049 Mean :-0.3563 Mean :-2.631 Mean :-1.127
## 3rd Qu.: 1.000 3rd Qu.: 4.0000 3rd Qu.:-3.000 3rd Qu.: 1.000
## Max. : 4.000 Max. : 4.0000 Max. :16.000 Max. : 2.000
## NA's :18013 NA's :18013
## PARTTIME_PERC WORK_HOURS WORK_HOURS_CONTRACT INTERFER_WL
## Min. :-3.000 Min. :-5.00 Min. :-7.000 Min. :-3.0000
## 1st Qu.:-3.000 1st Qu.:-3.00 1st Qu.:-3.000 1st Qu.:-3.0000
## Median :-3.000 Median :-3.00 Median :-3.000 Median :-3.0000
## Mean : 7.742 Mean :11.12 Mean : 7.654 Mean :-0.2274
## 3rd Qu.:-3.000 3rd Qu.:30.00 3rd Qu.:20.000 3rd Qu.: 3.0000
## Max. :97.000 Max. :96.00 Max. :96.000 Max. :10.0000
##
## EXHAUST DISCONNECT HEALTHSTAT DEPRESS
## Min. :-3.0000 Min. :-3.0000 Min. :-3.0000 Min. :-3.0000
## 1st Qu.:-3.0000 1st Qu.:-3.0000 1st Qu.:-3.0000 1st Qu.:-3.0000
## Median :-3.0000 Median :-3.0000 Median : 1.0000 Median : 0.0000
## Mean : 0.0109 Mean :-0.4821 Mean : 0.1221 Mean : 0.2278
## 3rd Qu.: 4.0000 3rd Qu.: 2.0000 3rd Qu.: 2.0000 3rd Qu.: 2.0000
## Max. :10.0000 Max. :10.0000 Max. : 5.0000 Max. :10.0000
##
## OPTIM WELL_MENTAL WELL_PHYSICAL WELL_SOCIAL
## Min. :-3.000 Min. :-3 Min. :-3.00 Min. :-3.00
## 1st Qu.:-3.000 1st Qu.:-3 1st Qu.:-3.00 1st Qu.:-3.00
## Median : 6.000 Median :-3 Median :-3.00 Median :-3.00
## Mean : 3.376 Mean :-2 Mean :-1.96 Mean :-1.99
## 3rd Qu.: 8.000 3rd Qu.:-3 3rd Qu.:-3.00 3rd Qu.:-3.00
## Max. :10.000 Max. :10 Max. :10.00 Max. :10.00
## NA's :159067 NA's :159067 NA's :159067
## SAT_JOB SAT_FIN SAT_LIFE SAT_FREE
## Min. :-3.000 Min. :-3.000 Min. :-3.000 Min. :-3.000
## 1st Qu.:-3.000 1st Qu.:-3.000 1st Qu.:-3.000 1st Qu.:-3.000
## Median :-3.000 Median : 6.000 Median : 7.000 Median : 5.000
## Mean : 1.568 Mean : 3.422 Mean : 3.959 Mean : 3.298
## 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## NA's :18013
## SAT_HOBBY SAT_RELATIONS CANTON REGION
## Min. :-3.000 Min. :-3.000 Min. :-8.00 Min. :1.00
## 1st Qu.:-3.000 1st Qu.:-3.000 1st Qu.: 6.00 1st Qu.:2.00
## Median : 6.000 Median : 7.000 Median :16.00 Median :3.00
## Mean : 3.598 Mean : 4.057 Mean :14.57 Mean :3.27
## 3rd Qu.: 8.000 3rd Qu.: 9.000 3rd Qu.:23.00 3rd Qu.:5.00
## Max. :10.000 Max. :10.000 Max. :26.00 Max. :7.00
##
## COMTYP HTYP HFINSIT
## Min. :-3.000 Min. :-8.000 Min. :-3.000
## 1st Qu.: 2.000 1st Qu.: 6.000 1st Qu.: 6.000
## Median : 3.000 Median : 8.000 Median : 8.000
## Mean : 3.741 Mean : 7.841 Mean : 7.139
## 3rd Qu.: 6.000 3rd Qu.:11.000 3rd Qu.: 9.000
## Max. : 9.000 Max. :13.000 Max. :10.000
##
Daten bereinigen/rekodieren
# Daten bereinigen & rekodieren ----
shp <- shp_merged %>%
filter(WORKSTAT == 1) %>% # "active occupied" persons only, no "unemployed" or "not in labor force"
mutate_all(~ ifelse(. %in% c(-1, -2, -3, -7, -8), NA, .)) %>% # replace -1,-2,-3,-7,-8 with NA across whole df
mutate(
SEX = case_when(
SEX %in% 1 ~ "Männer",
SEX %in% 2 ~ "Frauen",
TRUE ~ NA_character_), # no nonbinary cat bc too few observations
AGE_CAT = case_when(
AGE <= 15 ~ "<= 15",
AGE >= 16 & AGE <= 25 ~ "16-25",
AGE >= 26 & AGE <= 35 ~ "26-35",
AGE >= 36 & AGE <= 45 ~ "36-45",
AGE >= 46 & AGE <= 55 ~ "46-55",
AGE >= 56 & AGE <= 65 ~ "56-65",
AGE >= 65 ~ "<= 66"),
AGE_CAT = factor(AGE_CAT,
level = c("16-25", "26-35","36-45",
"46-55", "56-65")),
BIRTHY = YEAR - AGE,
GEN = case_when(
BIRTHY >= 1922 & BIRTHY <= 1955 ~ "Traditionals ('22-'55)",
BIRTHY >= 1956 & BIRTHY <= 1965 ~ "Babyboomer ('56-'65)",
BIRTHY >= 1966 & BIRTHY <= 1980 ~ "Gen X ('66-'80)",
BIRTHY >= 1981 & BIRTHY <= 1995 ~ "Gen Y ('81-'95)",
BIRTHY >= 1995 & BIRTHY <= 2009 ~ "Gen Z ('96-'10)",
BIRTHY >= 2010 ~ "Gen Alpha ('10-heute)"),
GEN = factor(GEN,
level = c("Gen Alpha ('10-heute)", "Gen Z ('96-'10)",
"Gen Y ('81-'95)", "Gen X ('66-'80)",
"Babyboomer ('56-'65)", "Traditionals ('22-'55)")),
CIVIL = case_when(
CIVIL == 1 ~ "Ledig",
CIVIL == 2 & 6 ~ "Verheiratet", #married, registered partnership
CIVIL == 3 & 4 & 7 ~ "Getrennt/Geschieden", #seperated, divorced, dissolved partnership
CIVIL == 5 ~ "Verwitwet",
TRUE ~ NA_character_),
KIDS = ifelse(KIDS_N == 0, "Keine Kinder", "Kinder"),
EDU_HIGH = case_when(
EDU_HIGH == 0 & 1 ~ "bis und mit Sekundarstufe I",
EDU_HIGH == 2 & 3 & 4 & 5 ~ "bis und mit Sekundarstufe II",
EDU_HIGH == 7 & 8 & 9~ "Höhere Berufsbildung",
EDU_HIGH == 6 & 10 ~ "Hochschulbildung"),
WORKSTAT_DET = case_when(
WORKSTAT_DET == 1 ~ "Bezahlte Erwerbstätigkeit, vollzeit (reguläre Arbeitszeit 37 Stunden pro Woche oder mehr)",
WORKSTAT_DET == 2 & 3 ~ "Bezahlte Erwerbstätigkeit, teilzeit (reguläre Arbeitszeit 1-36 Stunden pro Woche)",
WORKSTAT_DET == 4 ~ "In Ausbildung (Lehrling, Schüler/In, Student/In)",
WORKSTAT_DET == 5 ~ "Mitarbeit im Familienbetrieb",
WORKSTAT_DET == 7 ~ "Kind/Frau/Mann im Haushalt (nur bis maximal 64 bzw. 65 Jahre)",
WORKSTAT_DET == 8 & 9 ~ "Rentner (AHV, IV usw.)",
WORKSTAT_DET == 10 ~ "Arbeitslos",
WORKSTAT_DET == 6 & 11 ~ "Andere Tätigkeit (Weiterbildung, unbezahlter Urlaub)"),
JOB_SOCIOPROF = case_when(
JOB_SOCIOPROF == 1 ~ "Oberstes Management",
JOB_SOCIOPROF == 2 & 3 ~ "Freie Berufe und Selbständige",
JOB_SOCIOPROF == 4 ~ "Akademiker und oberes Kader",
JOB_SOCIOPROF == 5 ~ "Intermediäre Berufe",
JOB_SOCIOPROF == 6 ~ "Qualifizierte nicht-manuelle Berufe",
JOB_SOCIOPROF == 7 ~ "Qualifizierte manuelle Berufe",
JOB_SOCIOPROF == 8 ~ "Ungelernte Arbeitskräfte"),
JOB_FIELD = case_when(
JOB_FIELD == 1 ~ "Land- und Forstwirtschaft, Jagd",
JOB_FIELD == 2 ~ "Fischerei und Fischzucht",
JOB_FIELD == 3 ~ "Bergbau und Gewinnung von Steinen und Erden",
JOB_FIELD == 4 ~ "Verarbeitendes Gewerbe; Industrie",
JOB_FIELD == 5 ~ "Energie- und Wasserversorgung",
JOB_FIELD == 6 ~ "Baugewerbe",
JOB_FIELD == 7 ~ "Handel; Reparatur von Automobilen und Gebrauchsgütern",
JOB_FIELD == 8 ~ "Gastgewerbe",
JOB_FIELD == 9 ~ "Verkehr und Nachrichtenübermittlung",
JOB_FIELD == 10 ~ "Kredit- und Versicherungsgewerbe",
JOB_FIELD == 11 ~ "Immobilienwesen; Informatik; Forschung",
JOB_FIELD == 12 ~ "Öffentliche Verwaltung; Landesverteidigung; Sozialversicherung",
JOB_FIELD == 13 ~ "Unterrichtswesen",
JOB_FIELD == 14 ~ "Gesundheits- und Sozialwesen",
JOB_FIELD == 15 ~ "Erbringung von sonstigen öffentl, persönl Dienstleistungen",
JOB_FIELD == 16 ~ "Private Haushalte",
JOB_FIELD == 17 ~ "Exterritoriale Organisationen und Körperschaften"),
JOB_SEC = case_when(
JOB_SEC == 1 ~ "sehr sicher",
JOB_SEC == 2 ~ "ziemlich sicher",
JOB_SEC == 3 ~ "ziemlich unsicher",
JOB_SEC == 4 ~ "sehr unsicher"),
JOB_SEC = factor(JOB_SEC, level = c("sehr unsicher", "ziemlich unsicher",
"ziemlich sicher", "sehr sicher")),
PARTTIME = case_when(
PARTTIME %in% 2 ~ "Vollzeit",
PARTTIME %in% 1 ~ "Teilzeit",
TRUE ~ NA),
WORK_HOURS_DIF = WORK_HOURS - WORK_HOURS_CONTRACT,
JOB_CHANGE = case_when(
JOB_CHANGE == 1 & 2 & 3 ~ "Ja", #Arbeitgeber- oder Berufswechsel
JOB_CHANGE == 4 ~ "Nein"),
JOB_CHANGE_REAS = case_when(
JOB_CHANGE_REAS == 1 ~ "Um eine bessere Stelle anzutreten (Lust auf Veränderung, neue Herausforderung,
berufliches Weiterkommen, Unzufriedenheit mit dem vorherigen Arbeitgeber)",
JOB_CHANGE_REAS == 2 ~ "Ende eines befristeten Arbeitsvertrages",
JOB_CHANGE_REAS == 3 ~ "Bedingt durch den Arbeitgeber (Betriebsauflösung,
Restrukturierung, Entlassung, vorzeitige Pensionierung, etc.)",
JOB_CHANGE_REAS == 15 ~ "Bedingt durch den Arbeitgeber (Betriebsauflösung,
Restrukturierung, Entlassung, vorzeitige Pensionierung, etc.)",
JOB_CHANGE_REAS == 16 ~ "Bedingt durch den Arbeitgeber (Betriebsauflösung,
Restrukturierung, Entlassung, vorzeitige Pensionierung, etc.)",
JOB_CHANGE_REAS == 4 ~ "Verkauf oder Schliessung des eigenen Geschäfts/Familienbetriebes, Ende der Selbständigkeit",
JOB_CHANGE_REAS == 5 ~ "Betreuung von Kindern oder anderer Personen",
JOB_CHANGE_REAS == 6 ~ "Wegzug in andere Region wegen Beruf des Partners (der Partnerin) oder Heirat",
JOB_CHANGE_REAS == 8 ~ "Für kürzeren Arbeitsweg",
JOB_CHANGE_REAS == 9 ~ "Änderung der Arbeitszeit (Erhöhung oder Verringerung der Stellenprozente,
regelmässigere Arbeitszeiten, Schichtarbeit vermeiden)",
JOB_CHANGE_REAS == 10 ~ "Ausbildung, Praktikum, Studium (Ende, Anfang, Abbruch, Weiterbildung)",
JOB_CHANGE_REAS == 11 ~ "Sich selbständig gemacht/eigene Firma gegründet/um in Familienbetrieb zu arbeiten",
JOB_CHANGE_REAS == 12 ~ "Gesundheitliche Probleme (Burnout, Unfall, etc.)",
JOB_CHANGE_REAS == 13 ~ "Konflikte, Mobbing, Probleme mit Vorgesetzten, schlechtes Arbeitsklima",
JOB_CHANGE_REAS == 14 ~ "Vorher nicht erwerbstätig (Arbeitslosigkeit, Militär, Zivildienst, Hausfrau/-mann)",
JOB_CHANGE_REAS == 7 ~ "Anderer Grund (Saisonarbeit, längere Reise, Auszeit, Pensionierung)"),
HEALTHSTAT = case_when(
HEALTHSTAT == 1 ~ "sehr gut",
HEALTHSTAT == 2 ~ "gut ",
HEALTHSTAT == 3 ~ "es geht so (mittelmässig)",
HEALTHSTAT == 4 ~ "schlecht",
HEALTHSTAT == 4 ~ "sehr schlecht"),
HEALTHSTAT = factor(HEALTHSTAT, level = c("sehr schlecht", "schlecht",
"es geht so (mittelmässig)", "gut", "sehr gut")),
HTYP = case_when(
HTYP == 1 ~ "Einzelhaushalt (über 65J)",
HTYP == 2 & 3 ~ "Einzelhaushalt (unter 65J)",
HTYP == 4 & 5 ~ "alleinerziehende Person mit Kind/ern",
HTYP == 6 & 7 ~ "Paar ohne Kinder",
HTYP == 8 & 9 & 10 & 11 ~ "Paar mit Kind/ern",
HTYP == 12 ~ "andere Haushalte, alle Mitglieder verwandt",
HTYP == 13 ~ "andere Haushalte, nicht alle Mitglieder verwandt"),
COMTYP = case_when(
COMTYP == 1 ~ "Zentren",
COMTYP == 2 ~ "Suburbane Gemeinden",
COMTYP == 3 ~ "Reiche Gemeinden",
COMTYP == 4 ~ "Periurbane Gemeinden",
COMTYP == 5 ~ "Touristische Gemeinden",
COMTYP == 6 ~ "Industriell-tertiäre Gemeinden",
COMTYP == 7 ~ "Ländliche Pendlergemeinden",
COMTYP == 8 ~ "Agrarisch-gemischte Gemeinden",
COMTYP == 9 ~ "Agrarisch-periphere Gemeinden")
) %>%
relocate(AGE_CAT, .after = AGE) %>%
relocate(KIDS, .before = KIDS_N) %>%
relocate(BIRTHY, .before = AGE) %>%
relocate(GEN, .before = BIRTHY) %>%
relocate(WORK_HOURS_DIF, .after = WORK_HOURS_CONTRACT) %>%
filter(!AGE<16) %>%
filter(!AGE>65) %>%
filter(!GEN=="Traditionals ('22-'55)")
shp[sapply(shp, is.character)] <- lapply(shp[sapply(shp, is.character)], as.factor)