library(magrittr)
library(tidyr)
library(ggplot2)
library(ggthemes)
library(dplyr)
library(sparcl)
library(RColorBrewer)
library(gridExtra)
library(ggmap)

# data spans from 01/01/2013 to 10/24/2015, which is when the data 
# was downloaded; the link contains arrests after this date
arrests <- read.csv("BPD_Arrests.csv")

# Neighborhods scraped from Crime and Safety (2010-2013) KML file (original file: 
# https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk).
# Square region without data is designated as "Unassigned -- Jail" in KML file.

bmore_map <- read.table("~/Desktop/Bmore_demo/census_basemaps/Bmore_neighborhoods_from_Crime_and_Safety_kml.txt", 
                        header=FALSE, 
                        sep=" ", 
                        quote="\"", 
                        fill=TRUE, 
                        row.names=1) %>% 
              t() %>% 
              data.frame(check.names=FALSE) %>% 
              gather(key=Neighborhood, value=coords)

# remove empty coords generated by fill argument
bmore_map <- bmore_map[bmore_map$coords != "", ]

# separate coords into latitute and longitude and convert to numeric
bmore_map %<>% separate(col=coords, into=c("longitude", "latitude"), sep=",")
bmore_map[[2]] %<>% as.numeric()
bmore_map[[3]] %<>% as.numeric()

# subset of arrests classified as prositution
sex_workers = arrests[arrests$IncidentOffense == "55A-Prostitution", ]

# separate lat. and long. coords to individual columns
sex_workers$Location.1 <- gsub(pattern = "\\(", 
                               replacement = "", 
                               x = sex_workers$Location.1)

sex_workers$Location.1 <- gsub(pattern = "\\)", 
                               replacement = "", 
                               x = sex_workers$Location.1)

sex_workers %<>% separate(col = Location.1, 
                          into = c("longitude", "latitude"), 
                          sep = ",")

sex_workers[[15]] %<>% as.numeric()
sex_workers[[16]] %<>% as.numeric()

# only arrests with lat. and long. coords: 522 arrests from 
# 01/01/2013 to 08/15/2014
sex_workers <- sex_workers[complete.cases(sex_workers), ] 

# geographic plot - point
# NB: if used basemap as below, error was thrown
sex_workers_geo_point <- ggplot(data = sex_workers, 
                                aes(x = latitude, y = longitude)) + 
                          geom_polygon(data = bmore_map, 
                                       mapping = aes(x = longitude, 
                                                     y = latitude, 
                                                     group = Neighborhood), 
                                       fill = "white", 
                                       color = "grey40") +
                          theme_map() +
                          geom_point(size = 5, 
                                     color = "#2D2D83", 
                                     alpha = 0.5) + 
                          coord_map("mercator")

png("baltimore_sex_workers_geo_point_map.png")
sex_workers_geo_point
dev.off()

# geographic plot - density2d
sex_workers_geo_density <- ggplot(data = sex_workers, 
                                  aes(x = latitude, y = longitude)) + 
                            geom_polygon(data = bmore_map, 
                                         mapping = aes(x = longitude, 
                                                       y = latitude, 
                                                       group = Neighborhood), 
                                         fill = "white", 
                                         color = "grey40") +
                            theme_map() +
                            geom_density2d(color = "#2D2D83", 
                                           size = 1.1) +
                            coord_map("mercator")

png("baltimore_sex_workers_geo_density_map.png")
sex_workers_geo_density
dev.off()

# heirarchical clustering
# NB: euclidean distances calculated for distances although data are 
#     geographic; negligible effect due to small geographic distance between 
#     points
sex_workers_hc <- sex_workers[ , c(9, 15, 16)]
d <- dist(as.matrix(sex_workers_hc[ , c(2, 3)]))
hc <- hclust(d, method = "complete") 

# cut dendrogram at a height of 0.08, creating 6 clusters
y = cutree(hc, h = 0.08)

# add cluster ID for each arrest
sex_workers$cluster <- as.factor(y)

# set colors for dendrogram to be used for geographic plot
dendro_colors <- brewer.pal(6, "Set1")
for (i in 1:length(y)) {
  color <- as.numeric(y[i])
  y[i] <- dendro_colors[color]
}

# dendrogram
png("baltimore_sex_workers_dendrogram.png")
ColorDendrogram(hc, y = y, xlab = "", sub = "")
abline(0.08, 0, lty = 2)
dev.off()

# geographic plot of clusters
# NB: if plots reversed [i.e. ggplot(data = sex_workers) + geom_poly(...)]
#     like other plots, error is thrown

basemap <- ggplot(data = bmore_map, 
                  mapping = aes(x = longitude, 
                                y = latitude, 
                                group = Neighborhood)) + 
              geom_polygon(fill = "white", color = "grey40") +
              theme_map() + 
              coord_map("mercator")

sex_workers_geo2 <- basemap + 
                    geom_point(data = sex_workers, 
                               aes(x = latitude, 
                                   y = longitude),
                               size = 3,
                               color = "black") +
                    geom_point(data = sex_workers, 
                               aes(x = latitude, 
                                   y = longitude, 
                                   color = cluster)) +
                    scale_color_manual(values = brewer.pal(6, "Set1"), 
                                       guide = FALSE)

png("baltimore_sex_workers_geo_point_cluster_map.png")
sex_workers_geo2
dev.off()

# combined cluster and density plot
cluster_density_plot <- ggplot(data = sex_workers, 
                               aes(x = latitude, y = longitude)) + 
                          geom_polygon(data = bmore_map, 
                                       mapping = aes(x = longitude, 
                                                     y = latitude, 
                                                     group = Neighborhood), 
                                       fill = "white", 
                                       color = "grey40") +
                          theme_map() +
                          geom_density2d(color = "#2D2D83", 
                                         size = 0.7) +
                          coord_map("mercator") +
                          geom_point(data = sex_workers, 
                                     aes(x = latitude, 
                                         y = longitude),
                                     size = 3,
                                     color = "black") +
                          geom_point(data = sex_workers, 
                                     aes(x = latitude, 
                                         y = longitude, 
                                         color = cluster)) +
                          scale_color_manual(values = brewer.pal(6, "Set1"), 
                                             guide = FALSE)

png("baltimore_sex_workers_geo_point_cluster_density_map.png")
cluster_density_plot
dev.off()

# street map with arrests within a group
map_cluster <- function(coords, zoom, clust_num, pt_sz) {
  
  # inputs:
  #   coords (center of map): vector of lat and long coords 
  #   zoom (zoom of map): integer from 3 to 21 
  #   clust_num (cluster number): integer
  #   pt_sz (point size): integer for size of point
  #
  # output:
  #   saved png image file of google map overlaid with arrest locations
  
  clust_df <- sex_workers[sex_workers$cluster == clust_num, ]
  clust_map <- get_map(location = coords,
                       color = "bw",
                       source = "google",
                       maptype = "roadmap",
                       zoom = zoom)
  plot <- ggmap(clust_map) + 
            geom_point(data = clust_df, 
                       aes(x = latitude, 
                           y = longitude),
                       size = pt_sz * 1.33,
                       color = "black") +
            geom_point(data = clust_df, 
                       aes(x = latitude, 
                           y = longitude), 
                       color = dendro_colors[clust_num],
                       size = pt_sz) +
            theme(axis.text.x = element_blank(), 
                  axis.ticks.x = element_blank(),
                  axis.title.x = element_blank(),
                  axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank())
  
  file_name = paste0("baltimore_sex_workers_cluster", clust_num, "_street.png", sep = "")
  
  png(file_name)
  print(plot)
  dev.off()
}

map_cluster(c(-76.682506, 39.325812), 14, 1, 4)
map_cluster(c(-76.550619, 39.364685), 15, 2, 4)
map_cluster(c(-76.652445, 39.280209), 15, 4, 4)
map_cluster(c(-76.600464, 39.230500), 15, 6, 4)