#################################### ## Project by Johanna Burger ## ## Subject: data journalism ## ## Date: 15.12.2017 ## #################################### rm(list = ls()) library(dplyr) library(tidyr) library(ggridges) install.packages("ggplot2") library(ggplot2) library(foreign) library(haven) library(scales) library(plotly) library(lattice) library(reshape2) devtools::install_github("jcheng5/bubbles") library(bubbles) setwd("C://Users//J//Desktop//Daten_Datenjournalismus//Daten") ## merge data influ <- read.csv("zuzug.csv") #popul<- read.csv("bevoelkerung.csv") popinflu <- merge(influ, popul, by = c("PersNum", "StichtagDatJahr")) langstrinflu <- subset(popinflu, QuarLang == "Langstrasse") popul2016 <- subset(popinflu, StichtagDatJahr== "2016") #write.csv(langstrinflu, file = "langstrinflu.csv") move <- read.csv("wegzug.csv") popmove <- merge(move, popul, by = c("PersNum", "StichtagDatJahr")) langstrmove <- subset(popmove, QuarLang == "Langstrasse") ## influx every 10 years ##1996 langstrinflu <- read.csv("langstrinflu.csv") langstrinflu_table <- as.data.frame(table(langstrinflu$ZuzLandHistLang)) langstrinflu1996 <- subset(langstrinflu, StichtagDatJahr == 1996) ## just choose value from 1996 langstrinflu1996 <- as.data.frame(table(langstrinflu1996$ZuzLandHistLang)) orderlangstrinflu1996 <- order(langstrinflu1996$Freq, decreasing =T) langstrinflu1996 <- langstrinflu1996[orderlangstrinflu1996, ] langstrinflu1996 langstrinflu1996<- langstrinflu1996[-c(1, 3, 5, 9:26),] langstrinflu1996 langstrinflu1996$Var1 <-c("Deutschland","Asien","Portugal","Kosovo", "Amerika*") plotlangstrinflu1996 <-barplot(langstrinflu1996$Freq, names.arg=langstrinflu1996$Var1, col=c("#FFFF00","#DF0101","#31B404",4,9), ylab="Anzahl Zuzüge", main="Top 5 der Zuzüge aus dem Ausland in die Langstrasse 1996" , ylim=c(0,65) ) text(plotlangstrinflu1996,langstrinflu1996$Freq+3 , paste(langstrinflu1996$Freq) ,cex=1) ## 2006 langstrinflu2006 <- subset(langstrinflu, StichtagDatJahr == 2006) langstrinflu2006 <- as.data.frame(table(langstrinflu2006$ZuzLandHistLang)) orderlangstrinflu2006 <- order(langstrinflu2006$Freq, decreasing =T) langstrinflu2006 <- langstrinflu2006[orderlangstrinflu2006, ] langstrinflu2006 langstrinflu2006<- langstrinflu2006[-c(1 ,3, 4, 9:26),] langstrinflu2006 langstrinflu2006$Var1 <-c("Deutschland", "Asien","Grossbritannien","Italien", "Amerika*") plotlangstrinflu2006 <-barplot(langstrinflu2006$Freq, names.arg=langstrinflu2006$Var1, col=c("#FFFF00","#DF0101","#A9D0F5","#CC2EFA",1), ylab="Anzahl Zuzüge", main="Top 5 der Zuzüge aus dem Ausland in die Langstrasse 2006" , ylim=c(0,65) ) text(plotlangstrinflu2006,langstrinflu2006$Freq+3 , paste(langstrinflu2006$Freq) ,cex=1) ## 2016 langstrinflu2016 <- subset(langstrinflu, StichtagDatJahr == 2016) langstrinflu2016 <- as.data.frame(table(langstrinflu2016$ZuzLandHistLang)) orderlangstrinflu2016 <- order(langstrinflu2016$Freq, decreasing =T) langstrinflu2016 <- langstrinflu2016[orderlangstrinflu2016, ] langstrinflu2016 langstrinflu2016<- langstrinflu2016[-c(1 ,3, 8:26),] langstrinflu2016 langstrinflu2016$Var1 <-c("Deutschland", "Italien","Asien","Grossbritannien", "Spanien") plotlangstrinflu2016 <-barplot(langstrinflu2016$Freq, names.arg=langstrinflu2016$Var1, col=c("#FFFF00","#CC2EFA", "#DF0101","#A9D0F5","#0B610B"), ylab="Anzahl Zuzüge", main="Top 5 der Zuzüge aus dem Ausland in die Langstrasse 2016" , ylim=c(0,65) ) text(plotlangstrinflu2016,langstrinflu2016$Freq+3 , paste(langstrinflu2016$Freq) ,cex=1) ## influx all in one influ_all <- table(langstrinflu$ZuzLandHistLang, order) sort(influ_all) ## move all in one move <- read.csv("wegzug.csv") popmove <- merge(move, popul, by = c("PersNum", "StichtagDatJahr")) langstrmove <- subset(popmove, QuarLang == "Langstrasse") move_all <- table(langstrmove$WegLandHistLang) sort(move_all) sum(move_all) ## move in other canton move_ct_all <- table(langstrmove$WegKtHistLang) sort(move_ct_all) langstrmove$ct_agg1 <- 1 move_ct_agg = aggregate (ct_agg1 ~ WegKtHistLang, langstrmove, sum) move_ct_agg ordermove_ct_agg <- order(move_ct_agg$ct_agg, decreasing =T) move_ct_agg <- move_ct_agg[ordermove_ct_agg, ] move_ct_agg langstrmove$ct_agg2 <- 1 langstrmove = subset(langstrmove, WegKtHistLang != "Ausland") langstrmove = subset(langstrmove, WegKtHistLang != "Unbekannt") langstrmove = subset(langstrmove, WegKtHistLang != "Schweiz Unbekannt") move_ct_agg_years = aggregate (ct_agg2 ~ WegKtHistLang + StichtagDatJahr, langstrmove, sum) move_ct_agg_years myplot <- ggplot(data=move_ct_agg_years)+ aes(x= StichtagDatJahr, y = WegKtHistLang, fill = ct_agg2) + geom_density_ridges(aes(rel_min_height=0.025)) myplot ##population for just one year (2016) #popul <- read.csv("bevoelkerung.csv") popul2016 <- subset(popul, StichtagDatJahr == 2016) popul2016langstr <- subset(popul2016, QuarLang == "Langstrasse") ##write.csv(popul2016langstr, file = "popul2016langstr.csv") popul2016langstr <- read.csv("popul2016langstr.csv") popul2016langstr$agecl <- c() popul2016langstr$agecl[popul2016langstr$AlterV05Kurz == "0-4" |popul2016langstr$AlterV05Kurz == "5-9" |popul2016langstr$AlterV05Kurz == "10-14" |popul2016langstr$AlterV05Kurz == "15-19"] <- "0-19" popul2016langstr$agecl[popul2016langstr$AlterV05Kurz == "20-24" |popul2016langstr$AlterV05Kurz == "25-29" |popul2016langstr$AlterV05Kurz == "30-34" |popul2016langstr$AlterV05Kurz == "35-39"] <- "20-39" popul2016langstr$agecl[popul2016langstr$AlterV05Kurz == "40-44" |popul2016langstr$AlterV05Kurz == "45-49" |popul2016langstr$AlterV05Kurz == "50-54" |popul2016langstr$AlterV05Kurz == "55-59"] <- "40-59" popul2016langstr$agecl[popul2016langstr$AlterV05Kurz == "60-64" |popul2016langstr$AlterV05Kurz == "65-69" |popul2016langstr$AlterV05Kurz == "70-74" |popul2016langstr$AlterV05Kurz == "75-79"] <- "6-79" popul2016langstr$agecl[popul2016langstr$AlterV05Kurz == "80-84" |popul2016langstr$AlterV05Kurz == "85-89" |popul2016langstr$AlterV05Kurz == "90-44" |popul2016langstr$AlterV05Kurz == "95-99"|popul2016langstr$AlterV05Kurz == "100 u. älter"] <- "grössergleich 80" #### other way of presenting the data table(popul2016langstr$agecl) popul2016langstr$aggage <- 1 agg_age_popul2016langstr <- aggregate(popul2016langstr$aggage ~popul2016langstr$agecl,popul2016langstr, sum ) ##population for every year #popullangstr <- subset(popul, QuarLang == "Langstrasse") #write.csv(popullangstr, file = "popullangstr.csv") popullangstr <- read.csv("popullangstr.csv") popullangstr$agecl <- c() popullangstr$agecl[popullangstr$AlterV05Kurz == "0-4" |popullangstr$AlterV05Kurz == "5-9" |popullangstr$AlterV05Kurz == "10-14" |popullangstr$AlterV05Kurz == "15-19"] <- "4: 0-19" popullangstr$agecl[popullangstr$AlterV05Kurz == "20-24" |popullangstr$AlterV05Kurz == "25-29" |popullangstr$AlterV05Kurz == "30-34" |popullangstr$AlterV05Kurz == "35-39"] <- "1: 20-39" popullangstr$agecl[popullangstr$AlterV05Kurz == "40-44" |popullangstr$AlterV05Kurz == "45-49" |popullangstr$AlterV05Kurz == "50-54" |popullangstr$AlterV05Kurz == "55-59"] <- "2: 40-59" popullangstr$agecl[popullangstr$AlterV05Kurz == "60-64" |popullangstr$AlterV05Kurz == "65-69" |popullangstr$AlterV05Kurz == "70-74" |popullangstr$AlterV05Kurz == "75-79"] <- "3: 60-79" popullangstr$agecl[popullangstr$AlterV05Kurz == "80-84" |popullangstr$AlterV05Kurz == "85-89" |popullangstr$AlterV05Kurz == "90-44" |popullangstr$AlterV05Kurz == "95-99"|popullangstr$AlterV05Kurz == "100 u. älter"] <- "5: 80+" #### other way of presenting the data popullangstr$aggage <- 1 agg_age_popullangstr <- aggregate(aggage ~agecl +StichtagDatJahr,popullangstr, sum ) age_plot <- ggplot(data = agg_age_popullangstr) + aes(x = StichtagDatJahr, y = aggage,group=agecl, color=agecl) + geom_point() + geom_line()+ labs(title = "Altersverteilung im Quartier Langstrasse", x = "Jahr", y = "Anzahl Personen") + scale_color_manual(labels = c("20-39", "40-59", "60-79", "0-19", "80+"), values = c("#80d4ff", "#00cc00", "#ff33cc", "#3333cc", "#993399")) + scale_x_continuous(breaks = seq(from=1993, to=2016, by = 2)) + theme(legend.position = "right", legend.title.align = "") age_plot ##household popul2016 <- subset(popul, StichtagDatJahr == 2016) popul2016$hhagg <- 1 hh2016all <- aggregate(hhagg ~HHtypLang ,popul2016, sum ) hh2016all <- hh2016all[-c(1),] hh2016all$Freqperall <- 1 hh2016all$Freqperall = hh2016all$hhagg/ sum(hh2016all$hhagg) hh2016all$numhh <- 1 popul2016langstr$hhagglangstr <- 1 hh2016langstr <- aggregate(hhagglangstr ~HHtypLang ,popul2016langstr, sum ) hh2016langstr <- hh2016langstr[-c(1),] hh2016langstr$Freqperall <- 1 hh2016langstr$Freqperall = hh2016langstr$hhagg/ sum(hh2016langstr$hhagglangstr) hh2016langstr$numhh <- 2 mergehh2016 <- bind_rows(hh2016all, hh2016langstr) mergehh2016$Freqperall <- round(mergehh2016$Freqperall, digits = 3) #plot_hh <-barchart(Freqperall~HHtypLang,data=mergehh2016,groups=numhh, main="Haushaltstypen 2016 in Zürich und dem Bezirk Langstrasse", # ylab ="Relative Häufigkeit", # scales=list(x=list(rot=90,cex=0.8))) #plot_hh mergehh2016$HHtypLangnew <-1 mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Einpersonenhaushalt"] <- "01. Einpersonenhaushalt" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Ehepaar mit Kinder"] <- "04. Ehepaar mit Kinder" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Zweipersonenhaushalt"] <- "02. Zweipersonenhaushalt" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Wohngemeinschaft"] <- "03. Wohngemeinschaft" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Ehepaar"] <- "05. Ehepaar" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Ein-Elternhaushalt"] <- "08. Ein-Elternhaushalt" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Wohngemeinschaft mit Kindern"] <- "07.Wohngemeinschaft mit Kindern" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Paar mit Kind(ern)"] <- "06. Paar mit Kind(ern)" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Eingetragenes Paar"] <- "10. Eingetragenes Paar" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Generationenhaushalt"] <- "09. Generationenhaushalt" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Patchwork-Haushalt"] <- "11. Patchwork-Haushalt" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Eingetragenes Paar mit Kinder"] <- "12. Eingetragenes Paar mit Kindern" mergehh2016$HHtypLangnew[mergehh2016$HHtypLang == "Familien-Verbund"] <- "13. Familien-verbund" plot_hh <-barchart(Freqperall~HHtypLangnew,data=mergehh2016,groups=numhh, main="Haushaltstypen 2016 in Zürich und dem Bezirk Langstrasse", ylab ="Relative Häufigkeit", scales=list(x=list(rot=90,cex=0.8))) plot_hh #Multikulti 1993 popul1993 <- subset (popul, StichtagDatJahr == "1993") popul1993 <- subset (popul, QuarLang != "Langstrasse") popullangstr <- subset (popul, QuarLang == "Langstrasse") popul1993langstr <- subset( popul, StichtagDatJahr =="1993") popul1993$multiagg_all93 <- 1 popul1993multi_all <- aggregate(multiagg_all93 ~ GebLandHistLang, popul1993, sum ) popul1993multi_all <- popul1993multi_all[-c(25),] popul1993multi_all$popul1993multi_allfreq <- 1 popul1993multi_all$popul1993multi_allfreq = popul1993multi_all$multiagg_all93/ sum(popul1993multi_all$multiagg_all93) popul1993multi_all$popul1993multi_allfreq <- round(popul1993multi_all$popul1993multi_allfreq , digits = 4) popul1993langstr$multiagg_langstr93 <- 1 popul1993multi_langstr <- aggregate(multiagg_langstr93 ~ GebLandHistLang, popul1993langstr, sum ) popul1993multi_langstr <- popul1993multi_langstr[-c(23),] popul1993multi_langstr$popul1993multi_langstrfreq <- 1 popul1993multi_langstr$popul1993multi_langstrfreq = popul1993multi_langstr$multiagg_langstr93/ sum(popul1993multi_langstr$multiagg_langstr93) popul1993multi_langstr$popul1993multi_langstrfreq <- round(popul1993multi_langstr$popul1993multi_langstrfreq , digits = 4) #Multikulti 2016 popul2016 <- subset (popul, StichtagDatJahr == "2016") popul2016 <- subset (popul, QuarLang != "Langstrasse") popullangstr <- subset (popul, QuarLang == "Langstrasse") popul2016langstr <- subset(popul, StichtagDatJahr =="2016") popul2016$multiagg_all16 <- 1 popul2016multi_all <- aggregate(multiagg_all16 ~ GebLandHistLang, popul2016, sum ) popul2016multi_all <- popul2016multi_all[-c(25),] popul2016multi_all$popul2016multi_allfreq <- 1 popul2016multi_all$popul2016multi_allfreq = popul2016multi_all$multiagg_all16/ sum(popul2016multi_all$multiagg_all16) popul2016multi_all$popul2016multi_allfreq <- round(popul2016multi_all$popul2016multi_allfreq , digits = 4) popul2016langstr$multiagg_langstr16 <- 1 popul2016multi_langstr <- aggregate(multiagg_langstr16 ~ GebLandHistLang, popul2016langstr, sum ) popul2016multi_langstr <- popul2016multi_langstr[-c(25),] popul2016multi_langstr$popul2016multi_langstrfreq <- 1 popul2016multi_langstr$popul2016multi_langstrfreq = popul2016multi_langstr$multiagg_langstr16/ sum(popul2016multi_langstr$multiagg_langstr16) popul2016multi_langstr$popul2016multi_langstrfreq <- round(popul2016multi_langstr$popul2016multi_langstrfreq , digits = 4)