# Mapping of census demographic data to Baltimore neighborhoods

setwd("~/Desktop/Bmore_demo")

# libraries
library(ggplot2)
library(ggthemes)
library(dplyr)
library(magrittr)
library(tidyr)
library(viridis)
library(scales)

# Latitudinal and longitudinal coordinates for neighborhods scraped from 
# Crime and Safety (2010-2013) KML file 
# (original file: https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk).
# Square region without data is designated as "Unassigned -- Jail" in KML file.

# Note: fill argument for read.table determines row length from only first five 
# rows of data, thereby mistakenly slicing longer rows later in file; moved the 
# three longer rows being sliced to the beginning of the file as a work-around

census.map <- read.table("census_basemaps/Bmore_neighborhoods_from_Crime_and_Safety_kml.txt", 
                         header = FALSE, 
                         sep = " ", 
                         quote = "\"", 
                         fill = TRUE, 
                         row.names = 1) %>% 
                t() %>% 
                data.frame(check.names = FALSE) %>% 
                gather(key = Neighborhood, value = coords)

# remove empty coords generated by fill argument
census.map <- census.map[census.map$coords != "", ]

# separte coords into latitute and longitude and convert to numeric
census.map %<>% separate(col = coords, into = c("longitude", "latitude"), sep = ",")
census.map[[2]] %<>% as.numeric()
census.map[[3]] %<>% as.numeric()

# Create column with unique IDs used for mapping.  Mapping without unique IDs 
# results in erroneous connecting lines, or "tears," for neighborhoods composed 
# of  multiple polygons.  
census.map$unique.mapping <- census.map$Neighborhood

# Format neighborhood IDs to match those in the census in order to merge data.
census.map$Neighborhood <- gsub(pattern = "\\*", 
                                replacement = " ", 
                                x = census.map$Neighborhood)

for (i in 1:3){
  rep1 <- c("Brooklyn/Curtis Bay/Hawkins Point1", 
            "Brooklyn/Curtis Bay/Hawkins Point2", 
            "Brooklyn/Curtis Bay/Hawkins Point3")
  census.map$Neighborhood %<>% gsub(pattern = rep1[i], 
                                    replacement = "Brooklyn/Curtis Bay/Hawkins Point", .)
}

for (i in 1:2){
  rep2 <- c("South Baltimore1", 
            "South Baltimore2")
  census.map$Neighborhood %<>% gsub(pattern = rep2[i], 
                                    replacement = "South Baltimore", .)
}

# read in census data
demo <- read.csv("Census_Demographics__2010-2013_.csv") 

# demo formatting
for (i in c(5:16, 18, 19, 22:28)) {
  demo[, i] %<>% as.character() %>% gsub(pattern = "%", replacement = "", .) %>% 
    as.numeric()
  demo[, i] <- demo[, i]/100
}

for (i in c(17, 21)) {
  demo[, i] %<>% as.character() %>% gsub(pattern = "\\* ", replacement = "", .) %>% 
    gsub(pattern = ",", replacement = "", .) %>% 
    as.numeric() 
}

### Median household income: 2013

# Median household income: 2013 - geographic
med.inc <- demo %>% 
  select(Neighborhood, mhhi13) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = mhhi13), color = "white") + 
  theme_map() + 
  coord_map("mercator") +
  labs(title = "Median household income") + 
  theme(plot.title = element_text(size=30)) +
  scale_fill_gradientn(colours = viridis(256), 
                       labels = dollar,
                       guide = guide_colorbar(barwidth = 4, 
                                              barheight = 19, 
                                              title = NULL, 
                                              label.position= "left", 
                                              label.theme = element_text(size = 20, 
                                                                         angle = 0)))

# plot
png("baltimore_census_median_income_neighborhoods.png", height = 1152, width = 1152)
med.inc
dev.off()

# Median household income: 2013 - bar graph
med.inc.bar <- demo %>% 
  select(Neighborhood, mhhi13) %>% 
  ggplot(aes(x = reorder(Neighborhood, mhhi13), y = mhhi13, fill = mhhi13)) + 
  geom_bar(stat = "identity") + 
  coord_flip() +
  labs(title = "Median household income", x = NULL, y = NULL) + 
  theme_bw() + 
  scale_y_continuous (expand = c(0,0), label = dollar) + 
  theme(plot.title = element_text(size = 30),
        axis.text.x = element_text(size = 20), 
        axis.text.y = element_text(size = 20)) + 
  scale_fill_gradientn (colours = viridis(256), 
                        label = dollar, 
                        guide = FALSE)


# plot
png("baltimore_census_median_income_neighborhoods_bar.png", height = 1920, width = 1152)
med.inc.bar
dev.off()