library(magrittr)
library(tidyr)
library(ggplot2)
library(ggthemes)
library(dplyr)
library(sparcl)
library(RColorBrewer)
library(gridExtra)
library(ggmap)
# data spans from 01/01/2013 to 10/24/2015, which is when the data
# was downloaded; the link contains arrests after this date
arrests <- read.csv("BPD_Arrests.csv")
# Neighborhods scraped from Crime and Safety (2010-2013) KML file (original file:
# https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk).
# Square region without data is designated as "Unassigned -- Jail" in KML file.
bmore_map <- read.table("~/Desktop/Bmore_demo/census_basemaps/Bmore_neighborhoods_from_Crime_and_Safety_kml.txt",
header=FALSE,
sep=" ",
quote="\"",
fill=TRUE,
row.names=1) %>%
t() %>%
data.frame(check.names=FALSE) %>%
gather(key=Neighborhood, value=coords)
# remove empty coords generated by fill argument
bmore_map <- bmore_map[bmore_map$coords != "", ]
# separate coords into latitute and longitude and convert to numeric
bmore_map %<>% separate(col=coords, into=c("longitude", "latitude"), sep=",")
bmore_map[[2]] %<>% as.numeric()
bmore_map[[3]] %<>% as.numeric()
# subset of arrests classified as prositution
sex_workers = arrests[arrests$IncidentOffense == "55A-Prostitution", ]
# separate lat. and long. coords to individual columns
sex_workers$Location.1 <- gsub(pattern = "\\(",
replacement = "",
x = sex_workers$Location.1)
sex_workers$Location.1 <- gsub(pattern = "\\)",
replacement = "",
x = sex_workers$Location.1)
sex_workers %<>% separate(col = Location.1,
into = c("longitude", "latitude"),
sep = ",")
sex_workers[[15]] %<>% as.numeric()
sex_workers[[16]] %<>% as.numeric()
# only arrests with lat. and long. coords: 522 arrests from
# 01/01/2013 to 08/15/2014
sex_workers <- sex_workers[complete.cases(sex_workers), ]
# geographic plot - point
# NB: if used basemap as below, error was thrown
sex_workers_geo_point <- ggplot(data = sex_workers,
aes(x = latitude, y = longitude)) +
geom_polygon(data = bmore_map,
mapping = aes(x = longitude,
y = latitude,
group = Neighborhood),
fill = "white",
color = "grey40") +
theme_map() +
geom_point(size = 5,
color = "#2D2D83",
alpha = 0.5) +
coord_map("mercator")
png("baltimore_sex_workers_geo_point_map.png")
sex_workers_geo_point
dev.off()
# geographic plot - density2d
sex_workers_geo_density <- ggplot(data = sex_workers,
aes(x = latitude, y = longitude)) +
geom_polygon(data = bmore_map,
mapping = aes(x = longitude,
y = latitude,
group = Neighborhood),
fill = "white",
color = "grey40") +
theme_map() +
geom_density2d(color = "#2D2D83",
size = 1.1) +
coord_map("mercator")
png("baltimore_sex_workers_geo_density_map.png")
sex_workers_geo_density
dev.off()
# heirarchical clustering
# NB: euclidean distances calculated for distances although data are
# geographic; negligible effect due to small geographic distance between
# points
sex_workers_hc <- sex_workers[ , c(9, 15, 16)]
d <- dist(as.matrix(sex_workers_hc[ , c(2, 3)]))
hc <- hclust(d, method = "complete")
# cut dendrogram at a height of 0.08, creating 6 clusters
y = cutree(hc, h = 0.08)
# add cluster ID for each arrest
sex_workers$cluster <- as.factor(y)
# set colors for dendrogram to be used for geographic plot
dendro_colors <- brewer.pal(6, "Set1")
for (i in 1:length(y)) {
color <- as.numeric(y[i])
y[i] <- dendro_colors[color]
}
# dendrogram
png("baltimore_sex_workers_dendrogram.png")
ColorDendrogram(hc, y = y, xlab = "", sub = "")
abline(0.08, 0, lty = 2)
dev.off()
# geographic plot of clusters
# NB: if plots reversed [i.e. ggplot(data = sex_workers) + geom_poly(...)]
# like other plots, error is thrown
basemap <- ggplot(data = bmore_map,
mapping = aes(x = longitude,
y = latitude,
group = Neighborhood)) +
geom_polygon(fill = "white", color = "grey40") +
theme_map() +
coord_map("mercator")
sex_workers_geo2 <- basemap +
geom_point(data = sex_workers,
aes(x = latitude,
y = longitude),
size = 3,
color = "black") +
geom_point(data = sex_workers,
aes(x = latitude,
y = longitude,
color = cluster)) +
scale_color_manual(values = brewer.pal(6, "Set1"),
guide = FALSE)
png("baltimore_sex_workers_geo_point_cluster_map.png")
sex_workers_geo2
dev.off()
# combined cluster and density plot
cluster_density_plot <- ggplot(data = sex_workers,
aes(x = latitude, y = longitude)) +
geom_polygon(data = bmore_map,
mapping = aes(x = longitude,
y = latitude,
group = Neighborhood),
fill = "white",
color = "grey40") +
theme_map() +
geom_density2d(color = "#2D2D83",
size = 0.7) +
coord_map("mercator") +
geom_point(data = sex_workers,
aes(x = latitude,
y = longitude),
size = 3,
color = "black") +
geom_point(data = sex_workers,
aes(x = latitude,
y = longitude,
color = cluster)) +
scale_color_manual(values = brewer.pal(6, "Set1"),
guide = FALSE)
png("baltimore_sex_workers_geo_point_cluster_density_map.png")
cluster_density_plot
dev.off()
# street map with arrests within a group
map_cluster <- function(coords, zoom, clust_num, pt_sz) {
# inputs:
# coords (center of map): vector of lat and long coords
# zoom (zoom of map): integer from 3 to 21
# clust_num (cluster number): integer
# pt_sz (point size): integer for size of point
#
# output:
# saved png image file of google map overlaid with arrest locations
clust_df <- sex_workers[sex_workers$cluster == clust_num, ]
clust_map <- get_map(location = coords,
color = "bw",
source = "google",
maptype = "roadmap",
zoom = zoom)
plot <- ggmap(clust_map) +
geom_point(data = clust_df,
aes(x = latitude,
y = longitude),
size = pt_sz * 1.33,
color = "black") +
geom_point(data = clust_df,
aes(x = latitude,
y = longitude),
color = dendro_colors[clust_num],
size = pt_sz) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank())
file_name = paste0("baltimore_sex_workers_cluster", clust_num, "_street.png", sep = "")
png(file_name)
print(plot)
dev.off()
}
map_cluster(c(-76.682506, 39.325812), 14, 1, 4)
map_cluster(c(-76.550619, 39.364685), 15, 2, 4)
map_cluster(c(-76.652445, 39.280209), 15, 4, 4)
map_cluster(c(-76.600464, 39.230500), 15, 6, 4)