# R script for loading and preparing of census data and outlines # for Brexit analysis. # Author: Roger Beecham ##################################### library(rmapshaper) library(sf) library(tidyverse) # Read in shapefile containing GB LA boundaries -- made available from ONS Open Geography Portal. # We simplify the geometries using the "rmapshaper" library. download.file("http://geoportal.statistics.gov.uk/datasets/8edafbe3276d4b56aec60991cbddda50_3.zip", "boundaries_gb.zip") unzip("boundaries_gb.zip") gb_boundaries <- read_sf("Local_Authority_Districts_December_2015_Super_Generalised_Clipped_Boundaries_in_Great_Britain.shp") # Set CRS to OSGB. gb_boundaries <- st_transform(gb_boundaries, crs=27700) # Simplify polygon. This may take a little time to execute. gb_boundaries <- ms_simplify(gb_boundaries, keep=0.2) # Read in Census data that we use to regress on. census_data <- read_csv("http://homepages.see.leeds.ac.uk/~georjb/ppd/r/data/2011_census_oa.csv") oa_la_lookup <- read.csv("http://homepages.see.leeds.ac.uk/~georjb/ppd/r/data/oa_la_lookup.csv") oa_la_lookup$OA <- as.character(oa_la_lookup$OA) census_data <- left_join(census_data, oa_la_lookup) # Iterate over OA level data and compute summary statistics on relevant variables to LA level. census_data <- census_data %>% group_by(LOCAL_AUTHORITY_CODE) %>% summarise( total_pop = sum(Total_Population), younger_adults = sum(Age_20_to_24, Age_25_to_29, Age_30_to_44) / sum(Total_Population), white = sum(White_British_and_Irish) / sum(Total_Population), christian = sum(Christian) / sum(Total_Population), english_speaking = sum(Main_language_is_English_or_Main_language_not_English__Can_speak_English_very_well) / sum(Total_Population), single_ethnicity_household = sum(All_household_members_have_the_same_ethnic_group) / sum(Total_Households), own_home = sum(Owned_and_Shared_Ownership) / sum(Total_Households), not_good_health = sum(Fair_health, Bad_health, Very_bad_health) / sum(Total_Population), degree_educated = sum(Highest_level_of_qualification_Level_4_qualifications_and_above) / sum(Highest_level_of_qualification_Level_4_qualifications_and_above, Highest_level_of_qualification_Level_3_qualifications, Highest_level_of_qualification_Level_1_Level_2_or_Apprenticeship, No_qualifications), no_car = sum(No_cars_or_vans_in_household) / sum(Total_Households), private_transport_to_work = sum(Private_Transport) / sum(Total_Employment_16_to_74), professionals = sum(Managers_directors_and_senior_officials, Professional_occupations) / sum(Total_Employment_16_to_74) ) birth_country_11 <- read_csv("http://homepages.see.leeds.ac.uk/~georjb/ppd/r/data/country_of_birth_2011.csv") birth_country_11 <- left_join(birth_country_11, oa_la_lookup, by=c("oa_code"="OA")) birth_country_11 <- birth_country_11 %>% group_by(LOCAL_AUTHORITY_CODE) %>% summarise(total_pop = sum(POPULATION), eu_born = sum(eu_born)) %>% transmute(geo_code = LOCAL_AUTHORITY_CODE, total_pop = total_pop, eu_born = eu_born/total_pop) attribute_data <- inner_join(census_data, birth_country_11, by=c("LOCAL_AUTHORITY_CODE"="geo_code")) attribute_data$total_pop.y<- NULL colnames(attribute_data)[1:2]<- c("geo_code","total_pop") attribute_data$geo_code <- as.character(attribute_data$geo_code) # Recode 2011 Census codes to match with codes in referendum data and boundary data attribute_data$geo_code[attribute_data$geo_code=="E07000097"] <- "E07000242" attribute_data$geo_code[attribute_data$geo_code=="E07000101"] <- "E07000243" attribute_data$geo_code[attribute_data$geo_code=="E07000104"] <- "E07000241" attribute_data$geo_code[attribute_data$geo_code=="E07000100"] <- "E07000240" attribute_data$geo_code[attribute_data$geo_code=="E08000020"] <- "E08000037" attribute_data$geo_code[attribute_data$geo_code=="E06000048"] <- "E06000057" # Join with referendum data. attribute_data <- inner_join(attribute_data, referendum_data, by=c("geo_code"="Area_Code")) # Join attribute data with gb boundaries for mapping data_gb <- gb_boundaries %>% inner_join(attribute_data, by=c("lad15cd"="geo_code")) # In order keep a clean workspace, remove the redundant data. rm(census_data) rm(referendum_data) rm(attribute_data) rm(gb_boundaries)