# Mapping of census demographic data to Baltimore neighborhoods
setwd("~/Desktop/Bmore_demo")
# libraries
library(ggplot2)
library(ggthemes)
library(dplyr)
library(magrittr)
library(tidyr)
library(viridis)
library(scales)
# Latitudinal and longitudinal coordinates for neighborhods scraped from
# Crime and Safety (2010-2013) KML file
# (original file: https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk).
# Square region without data is designated as "Unassigned -- Jail" in KML file.
# Note: fill argument for read.table determines row length from only first five
# rows of data, thereby mistakenly slicing longer rows later in file; moved the
# three longer rows being sliced to the beginning of the file as a work-around
census.map <- read.table("census_basemaps/Bmore_neighborhoods_from_Crime_and_Safety_kml.txt",
header = FALSE,
sep = " ",
quote = "\"",
fill = TRUE,
row.names = 1) %>%
t() %>%
data.frame(check.names = FALSE) %>%
gather(key = Neighborhood, value = coords)
# remove empty coords generated by fill argument
census.map <- census.map[census.map$coords != "", ]
# separte coords into latitute and longitude and convert to numeric
census.map %<>% separate(col = coords, into = c("longitude", "latitude"), sep = ",")
census.map[[2]] %<>% as.numeric()
census.map[[3]] %<>% as.numeric()
# Create column with unique IDs used for mapping. Mapping without unique IDs
# results in erroneous connecting lines, or "tears," for neighborhoods composed
# of multiple polygons.
census.map$unique.mapping <- census.map$Neighborhood
# Format neighborhood IDs to match those in the census in order to merge data.
census.map$Neighborhood <- gsub(pattern = "\\*",
replacement = " ",
x = census.map$Neighborhood)
for (i in 1:3){
rep1 <- c("Brooklyn/Curtis Bay/Hawkins Point1",
"Brooklyn/Curtis Bay/Hawkins Point2",
"Brooklyn/Curtis Bay/Hawkins Point3")
census.map$Neighborhood %<>% gsub(pattern = rep1[i],
replacement = "Brooklyn/Curtis Bay/Hawkins Point", .)
}
for (i in 1:2){
rep2 <- c("South Baltimore1",
"South Baltimore2")
census.map$Neighborhood %<>% gsub(pattern = rep2[i],
replacement = "South Baltimore", .)
}
# read in census data
demo <- read.csv("Census_Demographics__2010-2013_.csv")
# demo formatting
for (i in c(5:16, 18, 19, 22:28)) {
demo[, i] %<>% as.character() %>% gsub(pattern = "%", replacement = "", .) %>%
as.numeric()
demo[, i] <- demo[, i]/100
}
for (i in c(17, 21)) {
demo[, i] %<>% as.character() %>% gsub(pattern = "\\* ", replacement = "", .) %>%
gsub(pattern = ",", replacement = "", .) %>%
as.numeric()
}
### Median household income: 2013
# Median household income: 2013 - geographic
med.inc <- demo %>%
select(Neighborhood, mhhi13) %>%
right_join(census.map, by = "Neighborhood") %>%
ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) +
geom_polygon(aes(fill = mhhi13), color = "white") +
theme_map() +
coord_map("mercator") +
labs(title = "Median household income") +
theme(plot.title = element_text(size=30)) +
scale_fill_gradientn(colours = viridis(256),
labels = dollar,
guide = guide_colorbar(barwidth = 4,
barheight = 19,
title = NULL,
label.position= "left",
label.theme = element_text(size = 20,
angle = 0)))
# plot
png("baltimore_census_median_income_neighborhoods.png", height = 1152, width = 1152)
med.inc
dev.off()
# Median household income: 2013 - bar graph
med.inc.bar <- demo %>%
select(Neighborhood, mhhi13) %>%
ggplot(aes(x = reorder(Neighborhood, mhhi13), y = mhhi13, fill = mhhi13)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Median household income", x = NULL, y = NULL) +
theme_bw() +
scale_y_continuous (expand = c(0,0), label = dollar) +
theme(plot.title = element_text(size = 30),
axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20)) +
scale_fill_gradientn (colours = viridis(256),
label = dollar,
guide = FALSE)
# plot
png("baltimore_census_median_income_neighborhoods_bar.png", height = 1920, width = 1152)
med.inc.bar
dev.off()