library(raster)
library(gstat)
library(spsurvey)
library(rgdal)
library(PracTools)
library(ggplot2)
library(ggthemes)

num_boots <- 100
truth <- list()
simple_random <- list()
strat_generating <- list()
strat_imperfect <- list()
strat_random <- list()


########################################################
# Generate spatially correlated layer ##################
########################################################

for(i in 1:num_boots){
  print(i)

  # Create random habitat raster, scaled from 0 to 1, where high values are rare on the landscape
  xy <- expand.grid(1:50, 1:50)
  names(xy) <- c('x','y')
  g.dummy <- gstat(formula = z~1, locations=~x+y, dummy = T, beta = 1, model = vgm(1, "Exp", 300, anis = c(3000, 1)), nmax=20)
  yy <- predict(g.dummy, newdata=xy, nsim=1)
  gridded(yy) = ~x+y
  
  
  blank_r <- raster(vals = NA, ncols = 50, nrows = 50, xmn=0, xmx=50, ymn=0, ymx=50)
  
  habitat_r <- blank_r
  habitat_r[] <- yy$sim1
  habitat_r <- habitat_r + -cellStats(habitat_r, min)
  habitat_r <- habitat_r/cellStats(habitat_r, max)
  
  g.dummy <- gstat(formula = z~1, locations=~x+y, dummy = T, beta = 1, model = vgm(1, "Exp", 300, anis = c(3000, 1)), nmax=20)
  yy <- predict(g.dummy, newdata=xy, nsim=1)
  gridded(yy) = ~x+y
  
  # habitat raster, scaled from 0 to 1
  
  habitat_r2 <- blank_r
  habitat_r2[] <- yy$sim1
  habitat_r2 <- habitat_r2 + -cellStats(habitat_r2, min)
  habitat_r2 <- habitat_r2/cellStats(habitat_r2, max)
  
  habitat3 <- habitat_r * habitat_r2
  habitat3 <- habitat3/cellStats(habitat3, max)
  ## Habitat 3 is main habitat layer driving species abundance
  
  g.dummy <- gstat(formula = z~1, locations=~x+y, dummy = T, beta = 1, model = vgm(1, "Exp", 300, anis = c(3000, 1)), nmax=20)
  yy <- predict(g.dummy, newdata=xy, nsim=1)
  gridded(yy) = ~x+y
  
  # habitat raster, scaled from 0 to 1, secondary layer that impacts abundance
  
  habitat_4 <- blank_r
  habitat_4[] <- yy$sim1
  habitat_4 <- habitat_4 + -cellStats(habitat_4, min)
  habitat_4 <- habitat_4/cellStats(habitat_4, max)

# Species density raster
species_r <- blank_r
species_r[] <- rpois(length(yy$sim1), lambda = exp(-2 + 4.5*getValues(habitat3)+ 2*getValues(habitat_4) + 2*getValues(habitat3)*getValues(habitat_4)))

######### Possible sample points ###################

pts <- cbind(c(0.5, 0.5, 49.5, 49.5), c(49.5, 0.5, 0.5, 49.5))
smp_poly <- Polygon(coords = pts)
smp_poly <- Polygons(list(smp_poly), ID = 1)
smp_poly <- SpatialPolygons(list(smp_poly))
smp_poly <- SpatialPolygonsDataFrame(smp_poly, data = data.frame(ID = 1))

grid <- makegrid(smp_poly, cellsize = 0.5)
sample.points <- SpatialPointsDataFrame(grid[,1:2], proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs"), data=data.frame(ID = 1:dim(grid)[1]))
sample_data <- as.data.frame(sample.points)

##############################################################
###################### Simple random #########################
#############################################################


sample.points.sf <- st_as_sf(sample.points)
sample.points.sf <- st_transform(sample.points.sf, "+proj=utm +zone=19 +datum=NAD83 +units=m +no_defs
+ellps=GRS80 +towgs84=0,0,0")

simp_random <- grts(sframe = sample.points.sf, 
                    n_base = 100,
                    seltype = "equal")

simp_count <- extract(species_r, cbind(simp_random$sites_base$lon_WGS84, simp_random$sites_base$lat_WGS84))
simp_habitat1 <- extract(habitat3, cbind(simp_random$sites_base$lon_WGS84, simp_random$sites_base$lat_WGS84))
simp_habitat2 <- extract(habitat_4, cbind(simp_random$sites_base$lon_WGS84, simp_random$sites_base$lat_WGS84))
glm1 <- glm(simp_count ~ simp_habitat1*simp_habitat2, family = poisson)

names(habitat3) <- "simp_habitat1"
names(habitat_4) <- "simp_habitat2"
rasters_for_predict <- stack(habitat3, habitat_4)
sr_predict <- raster::predict(model = glm1, object = rasters_for_predict, type = "response")
simple_random[[i]] <- sum(getValues(sr_predict)) # store predicted population size from simple random population estimate raster
truth[[i]] <- sum(getValues(species_r)) # store actual population size 

#######################################################################
#### Stratified random sampling - using generating model ################
#######################################################################

generating_map <- exp(-2 + 4.5*(habitat3)+ 2*(habitat_4) + 2*(habitat3)*(habitat_4))
cut_pts <- quantile(getValues(generating_map), c(0, 0.5, 0.8, 1))
rc <- reclassify(generating_map, c(-Inf, cut_pts[2], 1, cut_pts[2], cut_pts[3], 2, cut_pts[3], Inf, 3))
polys1 <- rasterToPolygons(rc, dissolve=TRUE)

# add strata to candidate sample points
o <- over(sample.points, polys1)
grid$strata <- o$layer
sample.points$strata <- o$layer
sample_data$strata <- o$layer
sample.points.sf <- st_as_sf(sample.points)
sample.points.sf <- st_transform(sample.points.sf, "+proj=utm +zone=19 +datum=NAD83 +units=m +no_defs
+ellps=GRS80 +towgs84=0,0,0")

generating_map1 <- generating_map
generating_map1[generating_map1>cut_pts[2]] <- NA
Sh1 <- sd(getValues(generating_map1), na.rm=T)
generating_map2 <- generating_map
generating_map2[generating_map2<cut_pts[2] |generating_map2>cut_pts[3]] <- NA
Sh2 <- sd(getValues(generating_map2), na.rm=T)
generating_map3 <- generating_map
generating_map3[generating_map3<cut_pts[3]] <- NA
Sh3 <- sd(getValues(generating_map3), na.rm=T)
Sh <- c(Sh1, Sh2, Sh3) # Standard deviation per strata
Nh <- summary(factor(getValues(rc))) # area per strata

neyman <- strAlloc(n.tot = 100, Nh = Nh, Sh = Sh, alloc = "neyman")
sample_neyman <- round(neyman$nh)

#Below ensures at least 1 survey per strata with a total of 100 surveys
# Prevents error being thrown in rare cases when 0 surveys selected in a strata
sample_neyman[1] <- ifelse(sample_neyman[1]==0, 1, sample_neyman[1])
sample_neyman[2] <- ifelse(sample_neyman[2]==0, 1, sample_neyman[2]) 
sample_neyman <- round(sample_neyman/sum(sample_neyman)*100)


Stratsites <- grts(sframe = sample.points.sf, 
                    n_base = sample_neyman,
                    seltype = "equal",
                    stratum_var = "strata")


strat_generating_count <- extract(species_r, cbind(Stratsites$sites_base$lon_WGS84, Stratsites$sites_base$lat_WGS84))
strat_generating_habitat1 <- extract(habitat3, cbind(Stratsites$sites_base$lon_WGS84, Stratsites$sites_base$lat_WGS84))
strat_generating_habitat2 <- extract(habitat_4, cbind(Stratsites$sites_base$lon_WGS84, Stratsites$sites_base$lat_WGS84))

glm2 <- glm(strat_generating_count ~ strat_generating_habitat1*strat_generating_habitat2, family = poisson)

names(rasters_for_predict)[1] <- "strat_generating_habitat1"
names(rasters_for_predict)[2] <- "strat_generating_habitat2"
strat_predict <- raster::predict(model = glm2, object = rasters_for_predict, type = "response")
strat_generating[[i]] <- sum(getValues(strat_predict))

#####################################################################################
#### Stratified random sampling - using only major habitat raster   #################
### represents imperfect model of a priori density ##################################
#####################################################################################

imperfect_generating_map <- exp(-2 + 4.5*habitat3)
cut_pts <- quantile(getValues(imperfect_generating_map), c(0, 0.5, 0.85, 1))

rc2 <- reclassify(imperfect_generating_map, c(-Inf, cut_pts[2], 1, cut_pts[2], cut_pts[3], 2, cut_pts[3], Inf, 3))

imperfect_generating_map1 <- imperfect_generating_map
imperfect_generating_map1[imperfect_generating_map1>cut_pts[2]] <- NA
Sh1 <- sd(getValues(imperfect_generating_map1), na.rm=T)
imperfect_generating_map2 <- imperfect_generating_map
imperfect_generating_map2[imperfect_generating_map2<cut_pts[2] |imperfect_generating_map2>cut_pts[3]] <- NA
Sh2 <- sd(getValues(imperfect_generating_map2), na.rm=T)
imperfect_generating_map3 <- imperfect_generating_map
imperfect_generating_map3[imperfect_generating_map3<cut_pts[3]] <- NA
Sh3 <- sd(getValues(imperfect_generating_map3), na.rm=T)

Sh <- c(Sh1, Sh2, Sh3) # Standard deviation per strata
Nh <- summary(factor(getValues(rc2))) # area per strata

neyman2 <- strAlloc(n.tot = 100, Nh = Nh, Sh = Sh, alloc = "neyman")
sample_neyman2 <- round(neyman2$nh)
sample_neyman2[1] <- ifelse(sample_neyman2[1]==0, 1, sample_neyman2[1])
sample_neyman2[2] <- ifelse(sample_neyman2[2]==0, 1, sample_neyman2[2]) 
sample_neyman2 <- round(sample_neyman2/sum(sample_neyman2)*100)
polys2 <- rasterToPolygons(rc2, dissolve=TRUE)

# Adds strata column to candidate survey locations
o2 <- over(sample.points, polys2)
grid$strata2 <- o2$layer
sample.points$strata2 <- o2$layer
sample_data$strata2 <- o2$layer
sample.points.sf <- st_as_sf(sample.points)
sample.points.sf <- st_transform(sample.points.sf, "+proj=utm +zone=19 +datum=NAD83 +units=m +no_defs
+ellps=GRS80 +towgs84=0,0,0")

Stratsites_imperfect <- grts(sframe = sample.points.sf, 
                   n_base = sample_neyman2,
                   seltype = "equal",
                   stratum_var = "strata2")


strat_imperfect_count <- extract(species_r, cbind(Stratsites_imperfect$sites_base$lon_WGS84, Stratsites_imperfect$sites_base$lat_WGS84))
strat_imperfect_habitat1 <- extract(habitat3, cbind(Stratsites_imperfect$sites_base$lon_WGS84, Stratsites_imperfect$sites_base$lat_WGS84))
strat_imperfect_habitat2 <- extract(habitat_4, cbind(Stratsites_imperfect$sites_base$lon_WGS84, Stratsites_imperfect$sites_base$lat_WGS84))

glm3 <- glm(strat_imperfect_count ~ strat_imperfect_habitat1*strat_imperfect_habitat2, family = poisson)

names(rasters_for_predict)[1] <- "strat_imperfect_habitat1"
names(rasters_for_predict)[2] <- "strat_imperfect_habitat2"
strat_predict2 <- raster::predict(model = glm3, object = rasters_for_predict, type = "response")
strat_imperfect[[i]] <- sum(getValues(strat_predict2))

###################################################################################
#### Stratified random sampling - using only random layer  ########################
### represents model stratified with no relationship to true density ##############
### example might be data collected opportunistically or designed to target #######
## another species ################################################################
###################################################################################

## Generate random layer unrelated to target species abundance
yy2 <- predict(g.dummy, newdata=xy, nsim=1)
gridded(yy2) = ~x+y
habitat_random <- blank_r
habitat_random[] <- yy2$sim1
habitat_random <- habitat_random + -cellStats(habitat_random, min)
habitat_random <- habitat_random/cellStats(habitat_random, max)

random_strata_map <- exp(-2 + 4.5*habitat_random)
cut_pts <- quantile(getValues(random_strata_map), c(0, 0.5, 0.85, 1))

rc3 <- reclassify(random_strata_map, c(-Inf, cut_pts[2], 1, cut_pts[2], cut_pts[3], 2, cut_pts[3], Inf, 3))

random_strata_map1 <- random_strata_map
random_strata_map1[random_strata_map1>cut_pts[2]] <- NA
Sh1 <- sd(getValues(random_strata_map1), na.rm=T)
random_strata_map2 <- random_strata_map
random_strata_map2[random_strata_map2<cut_pts[2] |random_strata_map2>cut_pts[3]] <- NA
Sh2 <- sd(getValues(random_strata_map2), na.rm=T)
random_strata_map3 <- random_strata_map
random_strata_map3[random_strata_map3<cut_pts[3]] <- NA
Sh3 <- sd(getValues(random_strata_map3), na.rm=T)

Sh <- c(Sh1, Sh2, Sh3)
Nh <- summary(factor(getValues(rc3))) # area per strata

neyman3 <- strAlloc(n.tot = 100, Nh = Nh, Sh = Sh, alloc = "neyman")
sample_neyman3 <- round(neyman3$nh)
sample_neyman3[1] <- ifelse(sample_neyman3[1]==0, 1, sample_neyman3[1])
sample_neyman3[2] <- ifelse(sample_neyman3[2]==0, 1, sample_neyman3[2]) 
sample_neyman3 <- round(sample_neyman3/sum(sample_neyman3)*100)

polys3 <- rasterToPolygons(rc3, dissolve=TRUE)

o3 <- over(sample.points, polys3)
grid$strata3 <- o3$layer
sample.points$strata3 <- o3$layer
sample.points.sf <- st_as_sf(sample.points)
sample.points.sf <- st_transform(sample.points.sf, "+proj=utm +zone=19 +datum=NAD83 +units=m +no_defs
+ellps=GRS80 +towgs84=0,0,0")

Stratsites_random <- grts(sframe = sample.points.sf, 
                             n_base = sample_neyman3,
                             seltype = "equal",
                             stratum_var = "strata3")


strat_random_count <- extract(species_r, cbind(Stratsites_random$sites_base$lon_WGS84, Stratsites_random$sites_base$lat_WGS84))
strat_random_habitat1 <- extract(habitat3, cbind(Stratsites_random$sites_base$lon_WGS84, Stratsites_random$sites_base$lat_WGS84))
strat_random_habitat2 <- extract(habitat_4, cbind(Stratsites_random$sites_base$lon_WGS84, Stratsites_random$sites_base$lat_WGS84))

glm4 <- glm(strat_random_count ~ strat_random_habitat1*strat_random_habitat2, family = poisson)

names(rasters_for_predict)[1] <- "strat_random_habitat1"
names(rasters_for_predict)[2] <- "strat_random_habitat2"
strat_predict3 <- raster::predict(model = glm4, object = rasters_for_predict, type = "response")
strat_random[[i]] <- sum(getValues(strat_predict3))
}

# Examine mean errors (estimate of bias) and mean absolute error (estimate of percision)

boot_out <- cbind(unlist(simple_random), unlist(strat_generating), unlist(strat_imperfect), unlist(strat_random))

mean_error_sim <- boot_out - unlist(truth)
summary(mean_error_sim)
c(median(mean_error_sim[,1]), median(mean_error_sim[,2]), median(mean_error_sim[,3]), median(mean_error_sim[,4]))

boot_plot <- c(mean_error_sim[,1], mean_error_sim[,2], mean_error_sim[,3], mean_error_sim[,4])
boot_plot <- as.data.frame(boot_plot)
boot_plot$strat <- rep(c("Simple random", "Generating model", "Imperfect model", "Random model"), each = num_boots)
names(boot_plot) <- c("difference", "survey")
boot_plot$survey <- factor(boot_plot$survey, levels = c("Simple random", "Generating model", "Imperfect model", "Random model"))

p <- ggplot(boot_plot, aes(x=survey, y=difference, fill=survey)) + 
  geom_violin(show.legend = FALSE)+ theme_few(18)+ ylim(c(-3000, 3000))+
  geom_boxplot(width=.1)+
  geom_hline(yintercept = 0, linetype="dashed", size =1)+ylab("Difference between predicted \n and  observed population size")+
  xlab("") + theme(legend.position="none")
p

setwd("C:/Users/slehnen/OneDrive - DOI/GCWA/Figures")
tiff('S1_fig7.tiff', width = 3000, height = 1500,  res = 300)
p
dev.off()

system('CMD /C "ECHO The R process has finished running && PAUSE"',
       invisible=FALSE, wait=FALSE)

anova_test <- aov(difference ~ survey, data = boot_plot)

summary(anova_test)


## Compare variance among groups

mean_abs_error_sim <- abs(boot_out - unlist(truth))

mae_plot <- c(mean_abs_error_sim[,1], mean_abs_error_sim[,2], mean_abs_error_sim[,3], mean_abs_error_sim[,4])
mae_plot <- as.data.frame(mae_plot)
mae_plot$strat <- rep(c("Simple random", "Generating model", "Imperfect model", "Random model"), each = num_boots)
names(mae_plot) <- c("difference", "survey")
tapply(mae_plot$difference, list(mae_plot$survey), mean)
tapply(mae_plot$difference, list(mae_plot$survey), median)

levene.dat.aov <- aov(mae_plot$difference ~ mae_plot$survey)
summary(levene.dat.aov)
TukeyHSD(levene.dat.aov)