library(magrittr)
library(reshape2)
library(dplyr)
library(chron)
library(ggplot2)
library(ggthemes)
library(rgdal)
library(viridis)
library(RColorBrewer)
library(ggmap)
library(grid)
library(gridExtra)
library(scales)
# downloaded from:
# https://data.baltimorecity.gov/Transportation/DOT-Towing/k78j-azhn
DOT_Towing <- read.csv("DOT_Towing.csv")
# create POSIXlt object with time data
DOT_Towing$towedDateTime %<>% strptime(tz = "", format = "%m/%d/%Y %I:%M:%S %p")
# keep data points with time data
DOT_Towing <- DOT_Towing[complete.cases(DOT_Towing$towedDateTime$hour), ]
dim(DOT_Towing)[1] # 61906 data points
# approximately 5 years of data
DOT_Towing$towedDateTime %>% head() # 2015-11-12 20:50:00
DOT_Towing$towedDateTime %>% tail() # 2010-10-23 10:50:00
# geocoding of addresses
source("towing_geocoding.R")
# read in geocoded data and reorder Lat/Lng
coords <- read.csv("~/Desktop/Bmore_demo/DOT_towing/geocoded_addresses.csv") %>%
select(Postal.Address, Lng, Lat)
# number of addresses not returning coordinates
NAs <- coords$Lat %>% is.na() %>% sum()
num_towings <- coords %>% nrow()
(NAs/num_towings) * 100 # 1.67%
# remove NAs from coords and corresponding rows in DOT_Towing, join dataframes
keep <- coords$Lat %>% complete.cases()
coords <- coords[keep, ]
DOT_Towing <- DOT_Towing[keep, ]
tow <- cbind(DOT_Towing, coords)
### create boolean vector if coordinates are inside of city limits, or
### inside of city limits but not downtown or midtown
# WGS84 (EPSG: 4326) is the coordinate reference system used by Google Earth
coords_sp <- SpatialPoints(coords[ ,2:3], proj4string = CRS("+init=epsg:4326"))
# Shapefile of Baltimore city neighborhoods transformed to CRS of coordinates
# (downloaded from: https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk)
bmore_poly <- readOGR("/Users/michaelfeyder/Desktop/Bmore_demo/DOT_towing/Crime___Safety__2010-2013__-_Shape",
"VSCrime_2010-2013")
bmore_poly <- spTransform(bmore_poly, proj4string(coords_sp))
# coordinates within city limits
in_city <- over(coords_sp, bmore_poly) %>% complete.cases()
# coordinates within city limits but not downtown or midtown
in_city_not_dt_mt <- bmore_poly[!(bmore_poly[[1]] == "Downtown/Seton Hill" | bmore_poly[[1]] == "Midtown"), ] %>%
over(coords_sp, .) %>%
complete.cases()
# baltimore neighborhoods used as basemap
source("bmore_map.R")
# towing locations with neighborhood map
tow_pts_nhood <- ggplot() +
basemap +
geom_point(data = tow[in_city, ],
aes(x = Lng,
y = Lat),
size = 1.3,
color = "#2D2D83",
alpha = 0.7) +
theme_map() +
coord_map("mercator")
png("baltimore_towings_pts_w_nhood.png",
width = 700,
height = 700,
units = "px")
tow_pts_nhood
dev.off()
# 2d density of towings
tow_density <- ggplot() +
stat_density2d(data = tow[in_city, ],
aes(x = Lng, y = Lat, fill = ..level..),
bins = 30,
geom = "polygon") +
scale_fill_gradientn(colours = viridis(256),
guide = FALSE) +
basemap +
theme_map() +
coord_map("mercator")
png("baltimore_towings_density.png",
width = 700,
height = 700,
units = "px")
tow_density
dev.off()
# 2d density of towings excluding downtown and midtown
tow_density_wo_dt_mt <- ggplot() +
stat_density2d(data = tow[in_city_not_dt_mt, ],
aes(x = Lng, y = Lat, fill = ..level..),
bins = 20,
geom = "polygon") +
scale_fill_gradientn(colours = viridis(256),
guide = FALSE) +
basemap +
theme_map() +
coord_map("mercator")
png("baltimore_towings_density_wo_dt_mt.png",
width = 700,
height = 700,
units = "px")
tow_density_wo_dt_mt
dev.off()
#### TOP LAT/LNG COORDINATES
# coordinates with greatest number of towings
top_coords <- tow[in_city, ] %>%
select(Lat, Lng) %>%
unite(Lat, Lng, sep = ",", col = combined_coords) %>%
group_by(combined_coords) %>%
tally() %>%
arrange(desc(n)) %>%
separate(combined_coords, c("Lat", "Lng"), sep = ",")
top_coords$Lat %<>% as.numeric()
top_coords$Lng %<>% as.numeric()
# city plot of top coordinates
top_coords_plot <- ggplot() +
basemap +
geom_point(data = top_coords[1:100, ],
aes(x = Lng,
y = Lat,
color = n,
size = n)) +
scale_color_gradientn(colours = viridis(256),
name = "Number of\nTowings",
guide = guide_colorbar(barwidth = 2,
barheigh = 14),
breaks = pretty_breaks(n = 4)) +
scale_size_area(guide = FALSE) +
theme_map() +
theme(legend.title = element_text(size = 15),
legend.text = element_text(size = 15)) +
coord_map("mercator")
png("baltimore_towings_top_coords.png",
width = 700,
height = 700,
units = "px")
top_coords_plot
dev.off()
# downtown plot of top coords
base_map_dt <- get_map(location = c(-76.620521, 39.296362),
color = "bw",
source = "google",
maptype = "roadmap",
zoom = 15)
top_coord_dt_plot <- ggmap(base_map_dt) +
geom_point(data = top_coords[1:100, ],
aes(x = Lng,
y = Lat,
size = n * 1.3),
color = "black") +
geom_point(data = top_coords[1:100, ],
aes(x = Lng,
y = Lat,
size = n,
color = n)) +
scale_size_area(max_size = 13, guide = FALSE) +
scale_color_gradientn(colours = viridis(256),
name = "Number of Towings",
guide = guide_colorbar(barwidth = 20,
barheigh = 2),
breaks = pretty_breaks(n = 4)) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
legend.title = element_text(size = 16),
legend.text = element_text(size = 16),
legend.position = "top")
png("towing_UMD_medical_center_and_downtown.png",
width = 700,
height = 700,
units = "px")
top_coord_dt_plot
dev.off()
#### TOP 10 STREETS
# all addresses
addresses <- coords[complete.cases(coords$Postal.Address), ] %>%
select(Postal.Address)
# roads (addresses without number)
rd <- addresses[[1]] %>%
sub(pattern = "[0-9]* ", replacement = "", x = . ) %>%
as.data.frame()
colnames(rd) <- "rd_name"
# top 10 roads
rd_top10 <- rd %>%
group_by(rd_name) %>%
tally() %>%
arrange(desc(n)) %>%
slice(1:10)
# boolen vector of whether towing occurred for a top 10 road
top10_bool <- rd$rd_name %in% rd_top10$rd_name
total_top10_rd <- top10_bool %>% sum() # 7635 towings or ...
total_top10_rd/length(rd$rd_name)*100 # ~12.5%
# plot of road map overlaid with neighborhoods and towings from top 10 streets
rd_map <- get_map(location = c(-76.613790, 39.295919),
source = "google",
maptype = "roadmap",
zoom = 12,
color = "bw")
top10_map <- ggmap(rd_map) +
geom_point(data = tow[(in_city & top10_rd), ],
aes(x = Lng,
y = Lat),
size = 1.8,
color = "#2D2D83") +
geom_polygon(data = bmore_map,
mapping = aes(x = longitude,
y = latitude,
group = Neighborhood),
color = "grey20",
alpha = 0.0) +
theme_map() +
coord_map("mercator")
png("baltimore_towing_top10_rd.png",
width = 700,
height = 700,
units = "px")
top10_map
dev.off()
# plot of road map overlaid with neighborhoods, towings from top 10 streets,
# and density of towings
top10_map_density <- ggmap(rd_map) +
stat_density2d(data = tow[in_city, ],
aes(x = Lng, y = Lat, fill = ..level..),
bins = 30, # 100 for entire city
geom = "polygon") +
scale_fill_gradientn(colours = viridis(256),
guide = FALSE) +
geom_point(data = tow[(in_city & top10_rd), ],
aes(x = Lng,
y = Lat),
size = 1.5,
color = "#cc0000") +
geom_polygon(data = bmore_map,
mapping = aes(x = longitude,
y = latitude,
group = Neighborhood),
color = "grey20",
alpha = 0.0) +
theme_map() +
coord_map("mercator")
png("baltimore_towing_top10_rd_density.png",
width = 700,
height = 700,
units = "px")
top10_map_density
dev.off()
#### STOLEN VEHICLES
stolen <- tow$stolenVehicleFlag == 1
stolen[is.na(stolen)] <- FALSE
coords[(in_city & stolen), ] %>% nrow() # 4526 within city limits
stolen_density <- ggplot() +
stat_density2d(data = tow[(in_city & stolen), ],
aes(x = Lng, y = Lat, fill = ..level..),
bins = 13,
geom = "polygon") +
scale_fill_gradientn(colours = viridis(256),
guide = FALSE) +
basemap +
theme_map() +
coord_map("mercator")
png("baltimore_towed_stolen.png",
width = 700,
height = 700,
units = "px")
stolen_density
dev.off()
#### TOW COMPANIES
companies <- tow %>%
select(towCompany, towedFromLocation) %>%
group_by(towCompany) %>%
summarise_each(funs(length)) %>%
arrange(desc(towedFromLocation))
# companies with over 4000 towings
c1 <- grep(pattern = "McDels", x = companies$towCompany, value = TRUE)
c2 <- "City"
c3 <- grep(pattern = "Frankford", x = companies$towCompany, value = TRUE)
c4 <- "Universal Towing LLC"
c5 <- grep(pattern = "Elliotts", x = companies$towCompany, value = TRUE)
c6 <- "Greenwood Towing"
top_companies <- c(c1, c2, c3, c4, c5, c6)
big_six <- tow$towCompany %in% top_companies
big_six %>% sum() # 57782 or 95%
company1 <- tow$towCompany %in% c1
company2 <- tow$towCompany %in% c2
company3 <- tow$towCompany %in% c3
company4 <- tow$towCompany %in% c4
company5 <- tow$towCompany %in% c5
company6 <- tow$towCompany %in% c6
tow$company_factor <- "not_big_six"
for (i in 1:nrow(tow)) {
if (tow$towCompany[i] %in% c1){
tow$company_factor[i] <- "McDel's Towing & Recovery"
}
if (tow$towCompany[i] %in% c2){
tow$company_factor[i] <- "City"
}
if (tow$towCompany[i] %in% c3){
tow$company_factor[i] <- "Frankford Towing"
}
if (tow$towCompany[i] %in% c4){
tow$company_factor[i] <- "Universal Towing"
}
if (tow$towCompany[i] %in% c5){
tow$company_factor[i] <- "Jim Elliott's Towing"
}
if (tow$towCompany[i] %in% c6){
tow$company_factor[i] <- "Greenwood Towing"
}
else {
next
}
}
# individual company plots
plot_map <- function(company, title){
# Input:
# company: boolen array of rows to for subset
# title: string for ggplot title
# Output
# ggplot map
ggplot() +
basemap +
geom_point(data = tow[(in_city & company), ],
aes(x = Lng,
y = Lat),
size = 2.4,
alpha = 0.1,
show_guide = FALSE) +
ggtitle(title) +
theme_map() +
coord_map("mercator") +
theme(plot.title = element_text(size = 20))
}
c1_plot <- plot_map(company1, "McDel's Towing")
c2_plot <- plot_map(company2, "City Towing")
c3_plot <- plot_map(company3, "Frankford Towing")
c4_plot <- plot_map(company4, "Universal Towing")
c5_plot <- plot_map(company5, "Jim Elliotts Towing")
c6_plot <- plot_map(company6, "Greenwood Towing")
png("baltimore_towings_by_company.png",
width = 800,
height = 1200,
units = "px")
grid.arrange(c1_plot, c2_plot, c3_plot, c4_plot, c5_plot, c6_plot,
nrow = 3,
ncol = 2,
respect = TRUE)
dev.off()
# plot for c1, c3, c4, c6
c1_3_4_5 <- tow$towCompany %in% c(c1, c3, c4, c5)
periphery_tow <- ggplot() +
basemap +
geom_point(data = tow[(in_city & c1_3_4_5), ],
aes(x = Lng,
y = Lat,
color = company_factor),
size = 2.4,
alpha = 1) +
scale_color_manual(values = brewer.pal(4, "Dark2")) +
theme_map() +
coord_map("mercator") +
theme(legend.key = element_blank(),
legend.title = element_blank(),
legend.key.height = unit(1.1, "cm"),
legend.text = element_text(size = 20)) +
guides(colour = guide_legend(override.aes = list(size=6)))
png("baltimore_periphery_towings_by_company.png",
width = 700,
height = 700,
units = "px")
periphery_tow
dev.off()