# Mapping of census demographic data to Baltimore neighborhoods

setwd("~/Desktop/Bmore_demo")

# libraries
library(ggplot2)
library(ggthemes)
library(dplyr)
library(magrittr)
library(tidyr)
library(viridis)
library(scales)

# Latitudinal and longitudinal coordinates for neighborhods scraped from 
# Crime and Safety (2010-2013) KML file 
# (original file: https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk).
# Square region without data is designated as "Unassigned -- Jail" in KML file.

# Note: fill argument for read.table determines row length from only first five 
# rows of data, thereby mistakenly slicing longer rows later in file; moved the 
# three longer rows being sliced to the beginning of the file as a work-around

census.map <- read.table("census_basemaps/Bmore_neighborhoods_from_Crime_and_Safety_kml.txt", 
                         header = FALSE, 
                         sep = " ", 
                         quote = "\"", 
                         fill = TRUE, 
                         row.names = 1) %>% 
                t() %>% 
                data.frame(check.names = FALSE) %>% 
                gather(key = Neighborhood, value = coords)

# remove empty coords generated by fill argument
census.map <- census.map[census.map$coords != "", ]

# separte coords into latitute and longitude and convert to numeric
census.map %<>% separate(col = coords, into = c("longitude", "latitude"), sep = ",")
census.map[[2]] %<>% as.numeric()
census.map[[3]] %<>% as.numeric()

# Create column with unique IDs used for mapping.  Mapping without unique IDs 
# results in erroneous connecting lines, or "tears," for neighborhoods composed 
# of  multiple polygons.  
census.map$unique.mapping <- census.map$Neighborhood

# Format neighborhood IDs to match those in the census in order to merge data.
census.map$Neighborhood <- gsub(pattern = "\\*", 
                                replacement = " ", 
                                x = census.map$Neighborhood)

for (i in 1:3){
  rep1 <- c("Brooklyn/Curtis Bay/Hawkins Point1", 
            "Brooklyn/Curtis Bay/Hawkins Point2", 
            "Brooklyn/Curtis Bay/Hawkins Point3")
  census.map$Neighborhood %<>% gsub(pattern = rep1[i], 
                                    replacement = "Brooklyn/Curtis Bay/Hawkins Point", .)
}

for (i in 1:2){
  rep2 <- c("South Baltimore1", 
            "South Baltimore2")
  census.map$Neighborhood %<>% gsub(pattern = rep2[i], 
                                    replacement = "South Baltimore", .)
}

# read in census data and codebook
demo <- read.csv("Census_Demographics__2010-2013_.csv") 

# demo formatting
for (i in c(5:16, 18, 19, 22:28)) {
  demo[, i] %<>% as.character() %>% gsub(pattern = "%", replacement = "", .) %>% 
    as.numeric()
  demo[, i] <- demo[, i]/100
}

for (i in c(17, 21)) {
  demo[, i] %<>% as.character() %>% 
    gsub(pattern = "\\* ", replacement = "", .) %>% 
    gsub(pattern = ",", replacement = "", .) %>% 
    as.numeric() 
}

### Percent who Self-Identify as African-American, White, 
### Asian-American or Hispanic/Latino: 2010

# scale gradient
fill_grad <- scale_fill_gradientn(colours = viridis(256), 
                                  labels = percent, 
                                  limits = c(0, 1), 
                                  breaks = seq(from = 0, to = 100, by = 0.20),
                                  guide = guide_colorbar(barwidth = 4, 
                                                         barheight = 20, 
                                                         title = NULL, 
                                                         label.position = "left", 
                                                         label.theme = element_text(size = 20, 
                                                                                    angle = 0)))
# Percent Identify African-American: 2010
paa <- demo %>% 
  select(Neighborhood, paa10) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = paa10), color = "white") + 
  theme_map() + 
  coord_map("mercator") + 
  fill_grad + 
  labs(title = "Percent African-American") + 
  theme(plot.title = element_text(size=30))

# png
png("baltimore_census_neighborhoods_percent_african_american.png", height = 1152, width = 1152)
paa
dev.off()

# Percent Identify White: 2010
pwhite <- demo %>% 
  select(Neighborhood, pwhite10) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = pwhite10), color = "white") + 
  theme_map() + 
  coord_map("mercator") + 
  fill_grad +
  labs(title = "Percent White") + 
  theme(plot.title = element_text(size=30))

png("baltimore_census_neighborhoods_percent_white.png", height = 1152, width = 1152)
pwhite
dev.off()

# Percent Identify Hispanic or Latino: 2010
phisp <- demo %>% 
  select(Neighborhood, phisp10) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = phisp10), color = "white") + 
  theme_map() + 
  coord_map("mercator") +
  labs(title = "Percent Hispanic/Latino") + 
  theme(plot.title = element_text(size=30)) +
  scale_fill_gradientn(colours = viridis(256), 
                       labels = percent, 
                       limits = c(0, 0.40), 
                       breaks = seq(from = 0, to = 0.40, by = 0.10),
                       guide = guide_colorbar(barwidth = 4, 
                                              barheight = 20, 
                                              title = NULL, 
                                              label.position = "left", 
                                              label.theme = element_text(size = 20, 
                                                                         angle = 0)))

png("baltimore_census_neighborhoods_percent_hispanic.png", height = 1152, width = 1152)
phisp
dev.off()

# Percent Identify Asian-American: 2010
pasi <- demo %>% 
  select(Neighborhood, pasi10) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = pasi10), color = "white") + 
  theme_map() + 
  coord_map("mercator") +
  labs(title = "Percent Asian-American") + 
  theme(plot.title = element_text(size=30)) +
  scale_fill_gradientn(colours = viridis(256), 
                       labels = percent, 
                       limits = c(0, 0.20), 
                       breaks = seq(from = 0, to = 0.20, by = 0.05),
                       guide = guide_colorbar(barwidth = 4, 
                                              barheight = 20, 
                                              title = NULL, 
                                              label.position= "left", 
                                              label.theme = element_text(size = 20, 
                                                                         angle = 0)))

png("baltimore_census_neighborhoods_percent_asian_american.png", height = 1152, width = 1152)
pasi
dev.off()

### Select age demographics

# Percent of people 19-24 years of age
age24 <- demo %>% 
  select(Neighborhood, age24_10) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = age24_10), color = "white") + 
  theme_map() + 
  coord_map("mercator") +
  labs(title = "Persons 19-24 years of age") + 
  theme(plot.title = element_text(size=30)) +
  scale_fill_gradientn(colours = viridis(256), 
                       labels = percent, 
                       limits = c(0, 0.40), 
                       breaks = seq(from = 0, to = 0.40, by = 0.10),
                       guide = guide_colorbar(barwidth = 4, 
                                              barheight = 20, 
                                              title = NULL, 
                                              label.position= "left", 
                                              label.theme = element_text(size = 20, 
                                                                         angle = 0)))

png("baltimore_census_neighborhoods_percent_19_to_24.png", height = 1152, width = 1152)
age24
dev.off()

# Percent of people 25-64 years of age
age64 <- demo %>% 
  select(Neighborhood, age64_10) %>% 
  right_join(census.map, by = "Neighborhood") %>%
  ggplot(mapping = aes(x = longitude, y = latitude, group = unique.mapping)) + 
  geom_polygon(aes(fill = age64_10), color = "white") + 
  theme_map() + 
  coord_map("mercator") +
  labs(title = "Persons 25-64 years of age") + 
  theme(plot.title = element_text(size=30)) +
  scale_fill_gradientn(colours = viridis(256), 
                       labels = percent,
                       guide = guide_colorbar(barwidth = 4, 
                                              barheight = 20, 
                                              title = NULL, 
                                              label.position= "left", 
                                              label.theme = element_text(size = 20, 
                                                                         angle = 0)))

png("baltimore_census_neighborhoods_percent_25_to_64.png", height = 1152, width = 1152)
age64
dev.off()