library(magrittr)
library(reshape2)
library(dplyr)
library(chron)
library(ggplot2)
library(ggthemes)
library(rgdal)
library(viridis)
library(RColorBrewer)
library(ggmap)
library(grid)
library(gridExtra)
library(scales)

# downloaded from:
# https://data.baltimorecity.gov/Transportation/DOT-Towing/k78j-azhn
DOT_Towing <- read.csv("DOT_Towing.csv")

# create POSIXlt object with time data
DOT_Towing$towedDateTime %<>% strptime(tz = "", format = "%m/%d/%Y %I:%M:%S %p")

# keep data points with time data
DOT_Towing <- DOT_Towing[complete.cases(DOT_Towing$towedDateTime$hour), ]
dim(DOT_Towing)[1]     # 61906 data points

# approximately 5 years of data
DOT_Towing$towedDateTime %>% head()  # 2015-11-12 20:50:00
DOT_Towing$towedDateTime %>% tail()  # 2010-10-23 10:50:00

# geocoding of addresses
source("towing_geocoding.R")

# read in geocoded data and reorder Lat/Lng
coords <- read.csv("~/Desktop/Bmore_demo/DOT_towing/geocoded_addresses.csv") %>% 
  select(Postal.Address, Lng, Lat)

# number of addresses not returning coordinates
NAs <- coords$Lat %>% is.na() %>% sum()
num_towings <- coords %>% nrow()
(NAs/num_towings) * 100   # 1.67%

# remove NAs from coords and corresponding rows in DOT_Towing, join dataframes
keep <- coords$Lat %>% complete.cases()
coords <- coords[keep, ]
DOT_Towing <- DOT_Towing[keep, ]
tow <- cbind(DOT_Towing, coords)

### create boolean vector if coordinates are inside of city limits, or 
### inside of city limits but not downtown or midtown

# WGS84 (EPSG: 4326) is the coordinate reference system used by Google Earth 
coords_sp <- SpatialPoints(coords[ ,2:3], proj4string = CRS("+init=epsg:4326"))

# Shapefile of Baltimore city neighborhoods transformed to CRS of coordinates 
# (downloaded from: https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2013-Shape/y3aa-iwrk)
bmore_poly <- readOGR("/Users/michaelfeyder/Desktop/Bmore_demo/DOT_towing/Crime___Safety__2010-2013__-_Shape", 
                      "VSCrime_2010-2013")
bmore_poly <- spTransform(bmore_poly, proj4string(coords_sp))

# coordinates within city limits
in_city <- over(coords_sp, bmore_poly) %>% complete.cases()

# coordinates within city limits but not downtown or midtown
in_city_not_dt_mt <- bmore_poly[!(bmore_poly[[1]] == "Downtown/Seton Hill" | bmore_poly[[1]] == "Midtown"), ] %>%
  over(coords_sp, .) %>%
  complete.cases()

# baltimore neighborhoods used as basemap
source("bmore_map.R")

# towing locations with neighborhood map
tow_pts_nhood <- ggplot() + 
                  basemap + 
                  geom_point(data = tow[in_city, ], 
                             aes(x = Lng,
                                 y = Lat),
                             size = 1.3, 
                             color = "#2D2D83",
                             alpha = 0.7) +
                  theme_map() +
                  coord_map("mercator")

png("baltimore_towings_pts_w_nhood.png", 
    width = 700, 
    height = 700, 
    units = "px")
tow_pts_nhood
dev.off()

# 2d density of towings
tow_density <- ggplot() +  
  stat_density2d(data = tow[in_city, ], 
                 aes(x = Lng, y = Lat, fill = ..level..),
                 bins = 30,
                 geom = "polygon") +
  scale_fill_gradientn(colours = viridis(256),
                       guide = FALSE) +
  basemap + 
  theme_map() +
  coord_map("mercator")

png("baltimore_towings_density.png", 
    width = 700, 
    height = 700, 
    units = "px")
tow_density
dev.off()

# 2d density of towings excluding downtown and midtown
tow_density_wo_dt_mt <- ggplot() +  
  stat_density2d(data = tow[in_city_not_dt_mt, ], 
                 aes(x = Lng, y = Lat, fill = ..level..),
                 bins = 20,
                 geom = "polygon") +
  scale_fill_gradientn(colours = viridis(256),
                       guide = FALSE) +
  basemap + 
  theme_map() +
  coord_map("mercator")

png("baltimore_towings_density_wo_dt_mt.png", 
    width = 700, 
    height = 700, 
    units = "px")
tow_density_wo_dt_mt
dev.off()

#### TOP LAT/LNG COORDINATES

# coordinates with greatest number of towings
top_coords <- tow[in_city, ] %>%
  select(Lat, Lng) %>% 
  unite(Lat, Lng, sep = ",", col = combined_coords) %>%
  group_by(combined_coords) %>%
  tally() %>%
  arrange(desc(n)) %>%
  separate(combined_coords, c("Lat", "Lng"), sep = ",")

top_coords$Lat %<>% as.numeric()
top_coords$Lng %<>% as.numeric()

# city plot of top coordinates
top_coords_plot <- ggplot() + 
  basemap + 
  geom_point(data = top_coords[1:100, ], 
             aes(x = Lng,
                 y = Lat,
                 color = n,
                 size = n)) +
  scale_color_gradientn(colours = viridis(256),                          
                        name = "Number of\nTowings",
                        guide = guide_colorbar(barwidth = 2,
                                               barheigh = 14),
                        breaks = pretty_breaks(n = 4)) +
  scale_size_area(guide = FALSE) +
  theme_map() +
  theme(legend.title = element_text(size = 15), 
        legend.text = element_text(size = 15)) + 
  coord_map("mercator")

png("baltimore_towings_top_coords.png", 
    width = 700, 
    height = 700, 
    units = "px")
top_coords_plot
dev.off()

# downtown plot of top coords
base_map_dt <- get_map(location = c(-76.620521, 39.296362),
                      color = "bw",
                      source = "google",
                      maptype = "roadmap",
                      zoom = 15)
  
top_coord_dt_plot <- ggmap(base_map_dt) + 
    geom_point(data = top_coords[1:100, ],
               aes(x = Lng, 
                   y = Lat,
                   size = n * 1.3),
               color = "black") +
    geom_point(data = top_coords[1:100, ],
               aes(x = Lng, 
                   y = Lat,
                   size = n,
                   color = n)) +
    scale_size_area(max_size = 13, guide = FALSE) +
    scale_color_gradientn(colours = viridis(256),
                          name = "Number of Towings",
                          guide = guide_colorbar(barwidth = 20,
                                                 barheigh = 2),
                          breaks = pretty_breaks(n = 4)) +
    theme(axis.text.x = element_blank(), 
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.y = element_blank(),
          legend.title = element_text(size = 16), 
          legend.text = element_text(size = 16),
          legend.position = "top")
  
png("towing_UMD_medical_center_and_downtown.png", 
      width = 700, 
      height = 700, 
      units = "px")
top_coord_dt_plot
  dev.off()


#### TOP 10 STREETS
# all addresses
addresses <- coords[complete.cases(coords$Postal.Address), ] %>%
  select(Postal.Address)

# roads (addresses without number)
rd <- addresses[[1]] %>% 
  sub(pattern = "[0-9]* ", replacement = "", x = . ) %>%
  as.data.frame()
colnames(rd) <- "rd_name"

# top 10 roads
rd_top10 <- rd %>% 
  group_by(rd_name) %>% 
  tally() %>%
  arrange(desc(n)) %>%
  slice(1:10)

# boolen vector of whether towing occurred for a top 10 road
top10_bool <- rd$rd_name %in% rd_top10$rd_name
total_top10_rd <- top10_bool %>% sum()  # 7635 towings or ...
total_top10_rd/length(rd$rd_name)*100   # ~12.5%


# plot of road map overlaid with neighborhoods and towings from top 10 streets
rd_map <- get_map(location = c(-76.613790, 39.295919),
                       source = "google",
                       maptype = "roadmap",
                       zoom = 12,
                       color = "bw")

top10_map <- ggmap(rd_map) +
  geom_point(data = tow[(in_city & top10_rd), ], 
             aes(x = Lng,
                 y = Lat),
             size = 1.8, 
             color = "#2D2D83") +
  geom_polygon(data = bmore_map,
               mapping = aes(x = longitude,
                             y = latitude,
                             group = Neighborhood),
               color = "grey20",
               alpha = 0.0) +
  theme_map() +
  coord_map("mercator")

png("baltimore_towing_top10_rd.png", 
    width = 700, 
    height = 700, 
    units = "px")
top10_map
dev.off()

# plot of road map overlaid with neighborhoods, towings from top 10 streets,
# and density of towings
top10_map_density <- ggmap(rd_map) +
  stat_density2d(data = tow[in_city, ], 
                 aes(x = Lng, y = Lat, fill = ..level..),
                 bins = 30,  # 100 for entire city
                 geom = "polygon") +
  scale_fill_gradientn(colours = viridis(256),
                       guide = FALSE) +
  geom_point(data = tow[(in_city & top10_rd), ], 
             aes(x = Lng,
                 y = Lat),
             size = 1.5, 
             color = "#cc0000") +
  geom_polygon(data = bmore_map,
               mapping = aes(x = longitude,
                             y = latitude,
                             group = Neighborhood),
               color = "grey20",
               alpha = 0.0) +
  theme_map() +
  coord_map("mercator")

png("baltimore_towing_top10_rd_density.png", 
    width = 700, 
    height = 700, 
    units = "px")
top10_map_density
dev.off()

#### STOLEN VEHICLES
stolen <- tow$stolenVehicleFlag == 1
stolen[is.na(stolen)] <- FALSE
coords[(in_city & stolen), ] %>% nrow()  # 4526 within city limits

stolen_density <- ggplot() +  
  stat_density2d(data = tow[(in_city & stolen), ], 
                 aes(x = Lng, y = Lat, fill = ..level..),
                 bins = 13,
                 geom = "polygon") + 
  scale_fill_gradientn(colours = viridis(256),
                       guide = FALSE) +
  basemap +
  theme_map() +
  coord_map("mercator")

png("baltimore_towed_stolen.png", 
    width = 700, 
    height = 700, 
    units = "px")
stolen_density
dev.off()

#### TOW COMPANIES 
companies <- tow %>% 
  select(towCompany, towedFromLocation) %>% 
  group_by(towCompany) %>% 
  summarise_each(funs(length)) %>% 
  arrange(desc(towedFromLocation))

# companies with over 4000 towings
c1 <- grep(pattern = "McDels", x = companies$towCompany, value = TRUE)
c2 <- "City"
c3 <- grep(pattern = "Frankford", x = companies$towCompany, value = TRUE)
c4 <- "Universal Towing LLC"
c5 <- grep(pattern = "Elliotts", x = companies$towCompany, value = TRUE)
c6 <- "Greenwood Towing"
top_companies <- c(c1, c2, c3, c4, c5, c6)

big_six <- tow$towCompany %in% top_companies
big_six %>% sum() # 57782 or 95%
company1 <- tow$towCompany %in% c1
company2 <- tow$towCompany %in% c2
company3 <- tow$towCompany %in% c3
company4 <- tow$towCompany %in% c4
company5 <- tow$towCompany %in% c5
company6 <- tow$towCompany %in% c6

tow$company_factor <- "not_big_six"
for (i in 1:nrow(tow)) {
  if (tow$towCompany[i] %in% c1){
    tow$company_factor[i] <- "McDel's Towing & Recovery"
  }
  if (tow$towCompany[i] %in% c2){
    tow$company_factor[i] <- "City"
  }
  if (tow$towCompany[i] %in% c3){
    tow$company_factor[i] <- "Frankford Towing"
  }
  if (tow$towCompany[i] %in% c4){
    tow$company_factor[i] <- "Universal Towing"
  }
  if (tow$towCompany[i] %in% c5){
    tow$company_factor[i] <- "Jim Elliott's Towing"
  }
  if (tow$towCompany[i] %in% c6){
    tow$company_factor[i] <- "Greenwood Towing"
  }
  else {
    next
  }
}

# individual company plots 
plot_map <- function(company, title){
  
  # Input:
  #    company: boolen array of rows to for subset
  #    title: string for ggplot title
  # Output
  #    ggplot map
  
  ggplot() + 
    basemap + 
    geom_point(data = tow[(in_city & company), ], 
               aes(x = Lng,
                   y = Lat),
               size = 2.4,
               alpha = 0.1,
               show_guide = FALSE) +
    ggtitle(title) +
    theme_map() +
    coord_map("mercator") +
    theme(plot.title = element_text(size = 20))
}

c1_plot <- plot_map(company1, "McDel's Towing")
c2_plot <- plot_map(company2, "City Towing")
c3_plot <- plot_map(company3, "Frankford Towing")
c4_plot <- plot_map(company4, "Universal Towing")
c5_plot <- plot_map(company5, "Jim Elliotts Towing")
c6_plot <- plot_map(company6, "Greenwood Towing")

png("baltimore_towings_by_company.png", 
    width = 800, 
    height = 1200, 
    units = "px")
grid.arrange(c1_plot, c2_plot, c3_plot, c4_plot, c5_plot, c6_plot,
             nrow = 3,
             ncol = 2,
             respect = TRUE)
dev.off()

# plot for c1, c3, c4, c6
c1_3_4_5 <- tow$towCompany %in% c(c1, c3, c4, c5)

periphery_tow <- ggplot() + 
  basemap + 
  geom_point(data = tow[(in_city & c1_3_4_5), ], 
             aes(x = Lng,
                 y = Lat,
                 color = company_factor),
             size = 2.4,
             alpha = 1) +
  scale_color_manual(values = brewer.pal(4, "Dark2")) +
  theme_map() +
  coord_map("mercator") +
  theme(legend.key = element_blank(),
        legend.title = element_blank(),
        legend.key.height = unit(1.1, "cm"),
        legend.text = element_text(size = 20)) +
  guides(colour = guide_legend(override.aes = list(size=6)))


png("baltimore_periphery_towings_by_company.png", 
    width = 700, 
    height = 700, 
    units = "px")
periphery_tow
dev.off()