# PGH bike crash plots
#
# contact: Mike Feyder 
#         feyderm@gmail.com

library(rgdal)
library(magrittr)
library(ggplot2)
library(ggthemes)
library(dplyr)
library(scales)

# bike crashes within pgh city limits
d <- read.csv("bike_crashes_pgh_2004-2014.csv")

# pgh neighborhoods from:
#   https://data.wprdc.org/dataset/pittsburgh-neighborhoods770b7
pgh <- readOGR(dsn="Pittsburgh_Neighborhoods", 
               layer = "Pittsburgh_Neighborhoods")

# Note: confusingly, longitude is y and latitude is x:
# http://gis.stackexchange.com/questions/11626/does-y-mean-latitude-and-x-mean-longitude-in-every-gis-software

# map
map <- ggplot2::fortify(pgh) %>%
  ggplot() +
  geom_polygon(aes(x = long, y = lat, group = group)) +
  geom_point(data = d, aes(x = DEC_LONG, y = DEC_LAT), color = "#FFC324") +
  theme_map() +
  coord_map("mercator") +
  ggtitle("Bike Crashes (2004-2014)") +
  theme(plot.title = element_text(size = 15, face="bold"),
        plot.background = element_rect(fill = "grey55"))

png("pgh_bike_crashes_2004_2014_map.png", 
    width = 700, 
    height = 700, 
    units = "px")
map
dev.off()

# consistent color theme
theme_pgh <- function() 
{
  theme_hc(bgcolor = "darkunica") + 
  theme(panel.grid.major.y = element_line(color = "grey30"),
        axis.text = element_text(color = "grey70"),
        plot.title = element_text(size = 15, face="bold", color = "grey70"))
}

# crashes by year
by_year <- d %>%
  group_by(CRASH_YEAR) %>%
  summarise(n = n()) %>%
  ggplot() +
  geom_line(aes(x = CRASH_YEAR, y = n), 
            color = "#FFC324") +
  geom_point(aes(x = CRASH_YEAR, y = n), 
             color = "#FFC324", 
             size = 3.5) +
  scale_y_continuous(limits = c(0, 70), 
                     breaks = pretty_breaks(7)) +
  xlab("") +
  ylab("Number of Crashes") +
  ggtitle("Bike Crashes by Year") +
  theme_pgh()

png("pgh_bike_crashes_2004_2014_by_year.png")
by_year
dev.off()

# crashes by month
months <- c("January", "February", "March", "April",
            "May", "June", "July", "August", "September",
            "October", "November", "December")

by_month <- d %>%
  group_by(CRASH_MONTH) %>% 
  summarise(n = n()) %>%
  ggplot() +
  geom_line(aes(x = CRASH_MONTH, y = n), 
            color = "#FFC324") +
  geom_point(aes(x = CRASH_MONTH, y = n), 
             stat = "identity", 
             color = "#FFC324", 
             size = 3.5) +
  scale_x_continuous(breaks = 1:12, 
                     labels = months) +
  scale_y_continuous(limits = c(0,80), 
                     breaks = pretty_breaks(8)) +
  xlab("") +
  ylab("Number of Crashes") +
  ggtitle("Bike Crashes by Month") +
  theme_pgh() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

png("pgh_bike_crashes_2004_2014_by_month.png")
by_month
dev.off()

# crashes by weekday
weekdays = c("Sunday", "Monday", "Tuesday", "Wednesday",
              "Thursday", "Friday", "Saturday")

by_weekday <- d %>%
  group_by(DAY_OF_WEEK) %>%
  summarise(n = n()) %>%
  ggplot() +
  geom_line(aes(x = DAY_OF_WEEK, y = n), color = "#FFC324") +
  geom_point(aes(x = DAY_OF_WEEK, y = n), color = "#FFC324", size = 3.5) +
  scale_x_continuous(breaks = 1:7, labels = weekdays) +
  scale_y_continuous(limits = c(0, 100), breaks = pretty_breaks(10)) +
  xlab("") +
  ylab("Number of Crashes") +
  ggtitle("Bike Crashes by Weekday") +
  theme_pgh()

png("pgh_bike_crashes_2004_2014_by_weekday.png")
by_weekday
dev.off()


# crashes by hour
time <- c("midnight", 1:11, "noon", 1:11, "")

by_hour <- d %>%
  filter(TIME_OF_DAY != 9999) %>%
  ggplot() +
  stat_bin(geom = "point",
           aes(x = TIME_OF_DAY), 
           binwidth = 100, 
           color = "#FFC324",
           size = 3.5,
           center = 50) +
  stat_bin(geom = "line",
           aes(x = TIME_OF_DAY), 
           binwidth = 100, 
           color = "#FFC324",
           center = 50) +
  scale_x_continuous(breaks = (0:24)*100, labels = time) +
  scale_y_continuous(limits = c(0, 80), breaks = pretty_breaks(8)) +
  xlab("") +
  ylab("Number of Crashes") +
  ggtitle("Bike Crashes by Hour") +
  theme_pgh() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

png("pgh_bike_crashes_2004_2014_by_hour.png")
by_hour
dev.off()

# crashes by street
top_st <- d %>%
  group_by(STREET_NAME) %>% 
  summarise(n = n()) %>%
  arrange(desc(n)) %>%
  slice(1:10)

desc_st <- top_st$STREET_NAME %>% as.character %>% rev()
top_st$STREET_NAME <- ordered(x = top_st$STREET_NAME, 
                              levels = desc_st)

by_st <- top_st %>%
  ggplot() +
  geom_bar(aes(y = n, x = STREET_NAME), 
           stat = "identity",
           color = "#FFC324") +
  xlab("") +
  ylab("Number of Crashes") +
  scale_y_continuous(limits = c(0, 30)) +
  theme_pgh() + 
  theme(panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(color = "grey30")) +
  coord_flip() +
  ggtitle("Bike Crashes by Street\n(top 10)")

png("pgh_bike_crashes_2004_2014_by_street.png")
by_st
dev.off()

# fatal bike crashes (only 4)
d %>%
  group_by(FATAL_COUNT) %>%
  summarise(n = n())

# almost all on streets w/ 25 or 35 mph limits
d %>%
  group_by(SPEED_LIMIT) %>%
  summarise(n = n())