https://programs.online.utica.edu/programs/masters-data-science
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 542109 29.0 1238217 66.2 621331 33.2
## Vcells 1019753 7.8 8388608 64.0 1600889 12.3
rm(list = ls())
start_time <- Sys.time()
knitr::opts_chunk$set(echo = TRUE)
library(easypackages)
libraries("data.table","devtools","dplyr","ggplot2","ggmap","ggrepel","ggsn","lubridate","readr","tidyr","tidyverse","sf")
POI= read.csv('Point_Of_Interest.csv', header=T)
dim(POI)
## [1] 19010 17
str(POI)
## 'data.frame': 19010 obs. of 17 variables:
## $ SEGMENTID : int 69163 34217 34220 43117 9008897 78137 9004671 17826 67394 34220 ...
## $ LON : num -73.9 -74 -74 -73.9 -73.8 ...
## $ LAT : num 40.8 40.8 40.8 40.7 40.9 ...
## $ COMPLEXID : int 0 0 0 0 515 0 2245 0 0 0 ...
## $ SAFTYPE : Factor w/ 4 levels "","G","N","X": 3 1 1 1 1 1 4 3 1 1 ...
## $ SOS : int 2 1 1 2 1 NA 2 2 2 1 ...
## $ PLACEID : int 12171 13932 2410 3247 8230 5077 7129 13182 15765 5444 ...
## $ FACI_DOM : int 10 3 4 1 2 2 3 7 1 3 ...
## $ BIN : int 1074333 1024838 1024839 3047371 2093858 2009619 3327986 3332515 4436782 1024843 ...
## $ BOROUGH : int 1 1 1 3 2 2 3 3 4 1 ...
## $ CREATED : Factor w/ 1147 levels "01/02/2013 12:00:00 AM +0000",..: 447 447 447 447 447 447 447 447 447 447 ...
## $ MODIFIED : Factor w/ 1438 levels "01/02/2013 12:00:00 AM +0000",..: 1303 1207 1207 1207 1207 1305 1011 1379 1207 1207 ...
## $ FACILITY_T: int 2 3 7 9 1 5 1 8 9 3 ...
## $ SOURCE : Factor w/ 11 levels "DCP","DOE","DoITT",..: 2 6 9 9 3 3 1 1 5 6 ...
## $ B7SC : int 10078501 NA NA NA NA NA 32055002 33361901 NA NA ...
## $ PRI_ADD : int 5128938 1023588 1023591 3058824 2101927 2017249 3094096 5125956 0 1023594 ...
## $ NAME : Factor w/ 18875 levels "0 BOND STREET",..: 13632 5082 500 6650 3575 9090 1707 4702 16199 16527 ...
summary(POI)
## SEGMENTID LON LAT COMPLEXID SAFTYPE
## Min. : 10 Min. :-74.27 Min. :40.45 Min. : 0.0 :11219
## 1st Qu.: 39984 1st Qu.:-73.98 1st Qu.:40.67 1st Qu.: 0.0 G: 662
## Median : 109339 Median :-73.94 Median :40.73 Median : 0.0 N: 2847
## Mean :1623955 Mean :-73.94 Mean :40.73 Mean : 260.6 X: 4282
## 3rd Qu.: 268553 3rd Qu.:-73.89 3rd Qu.:40.80 3rd Qu.: 218.0
## Max. :9024271 Max. :-73.71 Max. :40.95 Max. :4300.0
##
## SOS PLACEID FACI_DOM BIN
## Min. :1.000 Min. : 1 Min. : 1.000 Min. : 0
## 1st Qu.:1.000 1st Qu.: 7054 1st Qu.: 2.000 1st Qu.:1004331
## Median :2.000 Median : 13796 Median : 3.000 Median :2010983
## Mean :1.511 Mean : 325108 Mean : 4.312 Mean :2030343
## 3rd Qu.:2.000 3rd Qu.:1010060 3rd Qu.: 6.000 3rd Qu.:3326732
## Max. :2.000 Max. :1031244 Max. :18.000 Max. :5169276
## NA's :2081
## BOROUGH CREATED
## Min. :1.000 05/14/2009 12:00:00 AM +0000:12800
## 1st Qu.:1.000 09/15/2008 12:00:00 AM +0000: 200
## Median :3.000 08/17/2011 12:00:00 AM +0000: 121
## Mean :2.564 08/16/2011 12:00:00 AM +0000: 93
## 3rd Qu.:4.000 09/03/2010 12:00:00 AM +0000: 62
## Max. :5.000 09/20/2012 12:00:00 AM +0000: 53
## NA's :221 (Other) : 5681
## MODIFIED FACILITY_T SOURCE
## 11/30/1899 12:00:00 AM +0000: 2139 Min. : 1.000 OTHER :5056
## 11/24/2014 12:00:00 AM +0000: 1543 1st Qu.: 2.000 DoITT :3392
## 12/09/2010 12:00:00 AM +0000: 148 Median : 4.000 DCP :2753
## 11/13/2017 12:00:00 AM +0000: 100 Mean : 4.784 DOE :2017
## 12/11/2012 12:00:00 AM +0000: 69 3rd Qu.: 7.000 NYPD :1993
## 01/29/2013 12:00:00 AM +0000: 68 Max. :13.000 EMS :1441
## (Other) :14943 (Other):2358
## B7SC PRI_ADD NAME
## Min. :10000201 Min. : 0 DEVRY COLLEGE : 3
## 1st Qu.:13289506 1st Qu.: 0 HOLY ROSARY CHURCH: 3
## Median :30013851 Median : 1031109 HOLY ROSARY SCHOOL: 3
## Mean :27945586 Mean : 1810248 PS 12 : 3
## 3rd Qu.:38334504 3rd Qu.: 3109959 PS 146 : 3
## Max. :57019202 Max. :10173608 PS 15 : 3
## NA's :11270 (Other) :18992
POI = POI[which(POI$BOROUGH>0),]
numbers=sort(unique(POI$BOROUGH))
boroughs=c("Manhattan","Bronx","Brooklyn","Queens","Staten Island")
names(boroughs)=numbers
POI$BOROUGHNAME=boroughs[POI$BOROUGH]
ggmap::register_google(key ="AIzaSyD9jYJNqrIoumMz9pIi6MHshWi20IYilUw")
center <- c(lon = -73.955, lat = 40.715)
#experiment with different maptypes ("roadmap", "satellite", "terrain", "hybrid"), and zoom levels (3 - 21)
p1 <- ggmap(get_googlemap(center, maptype = "satellite", zoom=13))
p2 <- ggmap(get_googlemap(center, maptype = "roadmap", zoom=13))
scalebar1 = function(x,y,w,n,d, units){
# x,y = Lower left coordinate of bar
# w = Thickness of bar
# n = Number of divisions on bar
# d = Distance along each division (longitude degree)
bar = data.frame(
xmin = seq(0.0, n*d, by=d) + x,
xmax = seq(0.0, n*d, by=d) + x + d,
ymin = y,
ymax = y+w,
z = rep(c(1,0),n)[1:(n+1)],
fill.col = rep(c("blue","pink"),n)[1:(n+1)])
labs = data.frame(
xlab = c(seq(0.0, (n+1)*d, by=d) + x, x),
ylab = c(rep(y-w*1.5, n+2), y-3*w),
text = c(as.character( seq(0.0, ((n+1)*d)*55.051, by= round(d*55.051,1) )), units)
)
list(bar, labs)
}
sb1 = scalebar1(-73.97, 40.68, 0.001, 2, 0.015, "Miles")
scalebar2 = function(x,y,w,n,d, units){
# x,y = Lower left coordinate of bar
# w = Thickness of bar
# n = Number of divisions on bar
# d = Distance along each division (longitude degree)
bar = data.frame(
xmin = seq(0.0, n*d, by=d) + x,
xmax = seq(0.0, n*d, by=d) + x + d,
ymin = y,
ymax = y+w,
z = rep(c(1,0),n)[1:(n+1)],
fill.col = rep(c("azure4","white"),n)[1:(n+1)])
labs = data.frame(
xlab = c(seq(0.0, (n+1)*d, by=d) + x, x),
ylab = c(rep(y-w*1.5, n+2), y-3*w),
text = c(as.character( seq(0.0, ((n+1)*d)*55.051, by= round(d*55.051,1) )), units)
)
list(bar, labs)
}
sb2 = scalebar2(-73.97, 40.68, 0.001, 2, 0.015, "Miles")
#experiment with different point sizes and colors
p1a <- p1 + geom_point(aes(x = LON, y = LAT, col=BOROUGHNAME), data = POI, size = 0.6,) +
scale_colour_manual(name="Borough Name",
values = c("Bronx"="black", "Brooklyn"="red",
"Queens"="green", "Manhattan"="blue",
"Staten Island"="orange")
) +
geom_segment(arrow=arrow(length=unit(4,"mm")),aes(x=-73.989,xend=-73.989,y=40.685,yend=40.695),color="pink", size = 2) +
annotate(x=-73.989, y=40.682, label="N", color="pink", geom="text", size=10, fontface = "bold") +
geom_rect(data=sb1[[1]], aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=z), inherit.aes=F, show.legend = F, color = "green", fill = sb1[[1]]$fill.col, size=0.5, alpha=5) +
geom_text(data=sb1[[2]], aes(x=xlab, y=ylab, label=text), inherit.aes=F, show.legend = F, color = "green", size=3) +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Google Satellite Map - NYC Points of Interest") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(
plot.title = element_text(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1.5)
)
p1a
## Warning: Removed 14627 rows containing missing values (geom_point).