Purpose: demonstrate methods for analyzing data from multiple animals #### Preamble

Load libraries

library(knitr)
library(lubridate)
library(raster)
library(move)
library(amt) 
library(broom)
library(nlme)
library(lme4)
library(tidyverse)
library(geepack)
library(tictoc)
options(width=165)
opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = FALSE)

Record time for running all code

tic("total")

Read in annotated available data for RSF modeling

rsfdat<-read.csv("data/FisherRSF2018-EnvDATA-results.csv")

Simplify some variable names

names(rsfdat)[c(4,8:10)]<-c("id","LandClass", "Elevation", "PopDens")
rsfdat$case_<-as.numeric(rsfdat$case_)

Create landcover classes (as suggested by Scott Lapoint :)

rsfdat$LandClass<-as.character(rsfdat$LandClass)
rsfdat<-rsfdat %>% mutate(landC = fct_collapse(LandClass,
                                               agri = c("11", "14", "30"),
                                               forest =c("30","40","50","60", "70","80", "90","100"),
                                               shrub= c("110", "130", "150"),
                                               grass = c("120", "140"),
                                               wet= c("160"),
                                               other = c("170", "180", "190", "200", "210", "220")))
## Warning: Unknown levels in `f`: 11, 14, 40, 60, 80, 90, 150, 170, 180, 200, 220

Center and scale variables

rsfdat<-rsfdat %>% mutate(elev=as.numeric(scale(Elevation)), 
                          popD=as.numeric(scale(PopDens)))

Weight available data

rsfdat$w<-ifelse(rsfdat$case_==1, 1, 5000)

Now, fit an RSF model to data from each animal. Since not all animals experience all habitat types, lets just explore forest versus non-forest

rsfdat$forest<-ifelse(rsfdat$landC=="forest", 1, 0)

Population-level patterns

Cluster-level bootstrap

Use 100 bootstraps for illustrative purposes (ideally, you would want many more bootstrap resamples)

tic("bootstrap")
nboot<-500 
beta.hat<-matrix(NA,nboot, 3)
uids<-unique(rsfdat$id)
n.uids<-length(uids)
for(i in 1:nboot){
   # reasample individuals
   ids.boot<-data.frame(id=sample(uids, n.uids, replace=T))
   
#   # Take all obs from these individuals
   bootdat<-merge(ids.boot,rsfdat)
   
#   # Now fit lm and pull off coeficients
   boot.fit<-glm(case_~elev+popD+forest, weight=w,family=binomial(), data=bootdat)
   beta.hat[i,]<-coef(boot.fit)[-1]
}
 colnames(beta.hat)<-c("elev", "popD", "forest")
 apply(beta.hat, 2, mean)
##       elev       popD     forest 
##  0.3204555 -0.2156009  0.1103598
 apply(beta.hat, 2,sd)
##       elev       popD     forest 
## 0.56622267 0.08898874 0.14337821
 #' Compare to glm
 summary(glm(case_~elev+popD+forest,weight=w,family=binomial(), data=rsfdat))
## 
## Call:
## glm(formula = case_ ~ elev + popD + forest, family = binomial(), 
##     data = rsfdat, weights = w)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3510  -0.3384  -0.3332  -0.3086   4.9300  
## 
## Coefficients:
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) -11.576017   0.022252 -520.230  < 2e-16 ***
## elev         -0.039429   0.006228   -6.331 2.44e-10 ***
## popD         -0.218895   0.007624  -28.711  < 2e-16 ***
## forest        0.092250   0.023005    4.010 6.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 820838  on 665366  degrees of freedom
## Residual deviance: 819786  on 665363  degrees of freedom
## AIC: 819794
## 
## Number of Fisher Scoring iterations: 7
 toc()
## bootstrap: 4474.84 sec elapsed

GEE

Here is the call, but in my first attempt, it caused R to crash

#geefit<-geeglm(case_~elev+popD+forest,family=binomial(), id=id, corstr="independence", data=bootdat)

Mixed models, random intercept only. fixed intercepts, random slopes. This takes a long time to fit

tic("glmerfit.ri")
rsfdat$id<-as.factor(rsfdat$id)
glmerfit.ri<-glmer(case_~elev+popD+forest+ (1|id), weight=w,
                   family=binomial(), data=rsfdat)
summary(glmerfit.ri)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: case_ ~ elev + popD + forest + (1 | id)
##    Data: rsfdat
## Weights: w
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  818762.7  818819.8 -409376.4  818752.7    665362 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -0.30  -0.25  -0.23  -0.21 515.92 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.1719   0.4147  
## Number of obs: 665367, groups:  id, 8
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.303606   0.147366 -76.704  < 2e-16 ***
## elev          0.446318   0.018212  24.507  < 2e-16 ***
## popD         -0.330671   0.008986 -36.797  < 2e-16 ***
## forest        0.123087   0.023272   5.289 1.23e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##        (Intr) elev   popD  
## elev    0.069              
## popD   -0.003 -0.087       
## forest -0.147 -0.041 -0.060
toc()
## glmerfit.ri: 296.21 sec elapsed

A better model: fixed intercepts and random slopes. This takes a long time to fit and doesn’t converge

tic("glmerfit.rc")
glmerfit.rc<-glmer(case_~as.factor(id)+elev+popD+forest+(0+forest+elev+popD|id),
                family=binomial(),  weight=w, data=rsfdat)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00519038 (tol = 0.001, component 1)
summary(glmerfit.rc)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: case_ ~ as.factor(id) + elev + popD + forest + (0 + forest +      elev + popD | id)
##    Data: rsfdat
## Weights: w
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  817289.9  817483.9 -408628.0  817255.9    665350 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
##  -0.49  -0.25  -0.23  -0.20 574.13 
## 
## Random effects:
##  Groups Name   Variance Std.Dev. Corr       
##  id     forest  2.6352  1.6233              
##         elev   16.5978  4.0740   -0.31      
##         popD    0.7584  0.8709   -0.90  0.37
## Number of obs: 665367, groups:  id, 8
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -10.5010     0.3283 -31.984  < 2e-16 ***
## as.factor(id)F2   4.9511     0.4308  11.492  < 2e-16 ***
## as.factor(id)F3   6.8512     0.9053   7.568 3.80e-14 ***
## as.factor(id)M1   4.4933     0.5999   7.489 6.92e-14 ***
## as.factor(id)M2   1.6845     0.3850   4.376 1.21e-05 ***
## as.factor(id)M3  -3.2282     0.4349  -7.422 1.15e-13 ***
## as.factor(id)M4  -0.3853     0.3322  -1.160  0.24620    
## as.factor(id)M5  -7.9995     0.9670  -8.272  < 2e-16 ***
## elev              3.3949     0.4551   7.460 8.67e-14 ***
## popD             -0.6027     0.1915  -3.147  0.00165 ** 
## forest            0.6272     0.3479   1.803  0.07141 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) a.()F2 a.()F3 a.()M1 a.()M2 a.()M3 a.()M4 a.()M5 elev   popD  
## as.fctr()F2 -0.751                                                               
## as.fctr()F3 -0.375  0.282                                                        
## as.fctr()M1 -0.523  0.391  0.197                                                 
## as.fctr()M2 -0.862  0.648  0.321  0.449                                          
## as.fctr()M3 -0.751  0.562  0.279  0.394  0.646                                   
## as.fctr()M4 -0.973  0.730  0.364  0.509  0.838  0.731                            
## as.fctr()M5 -0.279  0.203  0.110  0.126  0.234  0.209  0.271                     
## elev         0.004 -0.008  0.051  0.017 -0.012 -0.011 -0.002  0.018              
## popD        -0.051  0.001  0.005 -0.005  0.037  0.035  0.047 -0.029  0.043       
## forest      -0.065  0.086  0.052  0.057  0.051  0.045  0.065 -0.226 -0.016 -0.678
## convergence code: 0
## Model failed to converge with max|grad| = 0.00519038 (tol = 0.001, component 1)
toc()
## glmerfit.rc: 6062.74 sec elapsed

Read in coefficients from fit to individuals

load("data/rsfcoefs.Rdata")
rsf_coefs<-rsf_coefs %>% tidyr::spread(term, estimate)

Compare fixed effects summaries:

rsf_coefs<-rsf_coefs %>% select("elev", "forest", "popD")
apply(rsf_coefs,2,mean) # from fit to individuals
##       elev     forest       popD 
##  3.4476155  1.5702467 -0.6096788
apply(rsf_coefs,2,sd) # from fit to individuals
##      elev    forest      popD 
## 4.4674490 4.0692302 0.9530373

Compare estimates for each animal

rsf_coefs
## # A tibble: 8 x 3
##      elev   forest   popD
##     <dbl>    <dbl>  <dbl>
## 1   1.98    1.54   -0.256
## 2   7.71   -0.425  -0.238
## 3  10.3     0.374  -0.427
## 4   7.43   -0.0929  0.308
## 5   1.61   -0.894  -0.637
## 6  -2.62    0.593  -0.424
## 7   0.707  -0.0139 -0.333
## 8   0.467  11.5    -2.87
fixef(glmerfit.rc)[9:11]+ranef(glmerfit.rc)$id
##        forest       elev       popD
## F1  3.4881891 -0.7431695 -0.2649493
## F2 -1.6485931  7.6606439  0.9938759
## F3  0.3737345  5.8861179  3.5709541
## M1  2.6579201  4.6016011  0.3032287
## M2 -2.0931445  1.5939326  0.6002173
## M3  0.5902234 -6.5604488  3.5714877
## M4  2.7538694 -2.0594345 -0.3335075
## M5  3.4812030  0.4664456 -1.5677636

Total Elapsed time

toc()
## total: 10841.77 sec elapsed