library(magrittr)
library(dplyr)
library(tidyr)
library(rgdal)
library(rjson)

vacant <- read.csv("Vacant_Buildings.csv")
vacant %<>% separate(col=Location.1, into=c("longitude", "latitude"), sep=",")

vacant$longitude <- gsub(pattern = "\\(", 
                       replacement = "", 
                       x = vacant$longitude)

vacant$latitude <- gsub(pattern = "\\)", 
                      replacement = "", 
                      x = vacant$latitude)

vacant$latitude %<>% as.numeric()
vacant$longitude %<>% as.numeric()

# write out clean coords
vacant %>%
  select(longitude, latitude) %>%
  write.csv("Baltimore_vacant_buildings.csv", row.names = FALSE)

#### vacant buildings per neighborhood
# Shapefile of Baltimore city neighborhoods with coords transformed to 
# WGS84 (EPSG: 4326), the coordinate reference system (CRS) used by 
# Google Earth
# downloaded from: https://data.baltimorecity.gov/Neighborhoods/Crime-Safety-2010-2012-Shape/bywi-mtiu
bmore_poly <- readOGR("Crime___Safety__2010-2012__-_Shape", "VS_Crime_2010-2012") %>%
                  spTransform(CRS("+init=epsg:4326"))

all_neighborhoods <- bmore_poly@data$CSA %>% as.character()
neighborhood_count <- vector()
for (i in all_neighborhoods) {
  neighborhood <- SpatialPolygons(bmore_poly@polygons[bmore_poly@data$CSA == i],
                                  proj4string = CRS("+init=epsg:4326"))
  count <- over(coords_sp, neighborhood) %>%
              complete.cases() %>%
              sum() 
  neighborhood_count %<>% append(count)
}

data.frame(neighborhood = all_neighborhoods, 
           vacant_buildings = neighborhood_count) %>%
  write.csv("Baltimore_vacant_buildings_by_neighborhood.csv",
            row.names = FALSE)