Purpose: demonstrate methods for analyzing data from multiple animals #### Preamble
Load libraries
library(knitr)
library(lubridate)
library(raster)
library(move)
library(amt)
library(broom)
library(nlme)
library(lme4)
library(tidyverse)
library(geepack)
library(tictoc)
options(width=165)
opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = FALSE)
Record time for running all code
tic("total")
Read in annotated available data for RSF modeling
rsfdat<-read.csv("data/FisherRSF2018-EnvDATA-results.csv")
Simplify some variable names
names(rsfdat)[c(4,8:10)]<-c("id","LandClass", "Elevation", "PopDens")
rsfdat$case_<-as.numeric(rsfdat$case_)
Create landcover classes (as suggested by Scott Lapoint :)
rsfdat$LandClass<-as.character(rsfdat$LandClass)
rsfdat<-rsfdat %>% mutate(landC = fct_collapse(LandClass,
agri = c("11", "14", "30"),
forest =c("30","40","50","60", "70","80", "90","100"),
shrub= c("110", "130", "150"),
grass = c("120", "140"),
wet= c("160"),
other = c("170", "180", "190", "200", "210", "220")))
## Warning: Unknown levels in `f`: 11, 14, 40, 60, 80, 90, 150, 170, 180, 200, 220
Center and scale variables
rsfdat<-rsfdat %>% mutate(elev=as.numeric(scale(Elevation)),
popD=as.numeric(scale(PopDens)))
Weight available data
rsfdat$w<-ifelse(rsfdat$case_==1, 1, 5000)
Now, fit an RSF model to data from each animal. Since not all animals experience all habitat types, lets just explore forest versus non-forest
rsfdat$forest<-ifelse(rsfdat$landC=="forest", 1, 0)
Use 100 bootstraps for illustrative purposes (ideally, you would want many more bootstrap resamples)
tic("bootstrap")
nboot<-500
beta.hat<-matrix(NA,nboot, 3)
uids<-unique(rsfdat$id)
n.uids<-length(uids)
for(i in 1:nboot){
# reasample individuals
ids.boot<-data.frame(id=sample(uids, n.uids, replace=T))
# # Take all obs from these individuals
bootdat<-merge(ids.boot,rsfdat)
# # Now fit lm and pull off coeficients
boot.fit<-glm(case_~elev+popD+forest, weight=w,family=binomial(), data=bootdat)
beta.hat[i,]<-coef(boot.fit)[-1]
}
colnames(beta.hat)<-c("elev", "popD", "forest")
apply(beta.hat, 2, mean)
## elev popD forest
## 0.3204555 -0.2156009 0.1103598
apply(beta.hat, 2,sd)
## elev popD forest
## 0.56622267 0.08898874 0.14337821
#' Compare to glm
summary(glm(case_~elev+popD+forest,weight=w,family=binomial(), data=rsfdat))
##
## Call:
## glm(formula = case_ ~ elev + popD + forest, family = binomial(),
## data = rsfdat, weights = w)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3510 -0.3384 -0.3332 -0.3086 4.9300
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.576017 0.022252 -520.230 < 2e-16 ***
## elev -0.039429 0.006228 -6.331 2.44e-10 ***
## popD -0.218895 0.007624 -28.711 < 2e-16 ***
## forest 0.092250 0.023005 4.010 6.07e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 820838 on 665366 degrees of freedom
## Residual deviance: 819786 on 665363 degrees of freedom
## AIC: 819794
##
## Number of Fisher Scoring iterations: 7
toc()
## bootstrap: 4474.84 sec elapsed
Here is the call, but in my first attempt, it caused R to crash
#geefit<-geeglm(case_~elev+popD+forest,family=binomial(), id=id, corstr="independence", data=bootdat)
Mixed models, random intercept only. fixed intercepts, random slopes. This takes a long time to fit
tic("glmerfit.ri")
rsfdat$id<-as.factor(rsfdat$id)
glmerfit.ri<-glmer(case_~elev+popD+forest+ (1|id), weight=w,
family=binomial(), data=rsfdat)
summary(glmerfit.ri)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: case_ ~ elev + popD + forest + (1 | id)
## Data: rsfdat
## Weights: w
##
## AIC BIC logLik deviance df.resid
## 818762.7 818819.8 -409376.4 818752.7 665362
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.30 -0.25 -0.23 -0.21 515.92
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.1719 0.4147
## Number of obs: 665367, groups: id, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.303606 0.147366 -76.704 < 2e-16 ***
## elev 0.446318 0.018212 24.507 < 2e-16 ***
## popD -0.330671 0.008986 -36.797 < 2e-16 ***
## forest 0.123087 0.023272 5.289 1.23e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) elev popD
## elev 0.069
## popD -0.003 -0.087
## forest -0.147 -0.041 -0.060
toc()
## glmerfit.ri: 296.21 sec elapsed
A better model: fixed intercepts and random slopes. This takes a long time to fit and doesn’t converge
tic("glmerfit.rc")
glmerfit.rc<-glmer(case_~as.factor(id)+elev+popD+forest+(0+forest+elev+popD|id),
family=binomial(), weight=w, data=rsfdat)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00519038 (tol = 0.001, component 1)
summary(glmerfit.rc)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: case_ ~ as.factor(id) + elev + popD + forest + (0 + forest + elev + popD | id)
## Data: rsfdat
## Weights: w
##
## AIC BIC logLik deviance df.resid
## 817289.9 817483.9 -408628.0 817255.9 665350
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.49 -0.25 -0.23 -0.20 574.13
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id forest 2.6352 1.6233
## elev 16.5978 4.0740 -0.31
## popD 0.7584 0.8709 -0.90 0.37
## Number of obs: 665367, groups: id, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.5010 0.3283 -31.984 < 2e-16 ***
## as.factor(id)F2 4.9511 0.4308 11.492 < 2e-16 ***
## as.factor(id)F3 6.8512 0.9053 7.568 3.80e-14 ***
## as.factor(id)M1 4.4933 0.5999 7.489 6.92e-14 ***
## as.factor(id)M2 1.6845 0.3850 4.376 1.21e-05 ***
## as.factor(id)M3 -3.2282 0.4349 -7.422 1.15e-13 ***
## as.factor(id)M4 -0.3853 0.3322 -1.160 0.24620
## as.factor(id)M5 -7.9995 0.9670 -8.272 < 2e-16 ***
## elev 3.3949 0.4551 7.460 8.67e-14 ***
## popD -0.6027 0.1915 -3.147 0.00165 **
## forest 0.6272 0.3479 1.803 0.07141 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) a.()F2 a.()F3 a.()M1 a.()M2 a.()M3 a.()M4 a.()M5 elev popD
## as.fctr()F2 -0.751
## as.fctr()F3 -0.375 0.282
## as.fctr()M1 -0.523 0.391 0.197
## as.fctr()M2 -0.862 0.648 0.321 0.449
## as.fctr()M3 -0.751 0.562 0.279 0.394 0.646
## as.fctr()M4 -0.973 0.730 0.364 0.509 0.838 0.731
## as.fctr()M5 -0.279 0.203 0.110 0.126 0.234 0.209 0.271
## elev 0.004 -0.008 0.051 0.017 -0.012 -0.011 -0.002 0.018
## popD -0.051 0.001 0.005 -0.005 0.037 0.035 0.047 -0.029 0.043
## forest -0.065 0.086 0.052 0.057 0.051 0.045 0.065 -0.226 -0.016 -0.678
## convergence code: 0
## Model failed to converge with max|grad| = 0.00519038 (tol = 0.001, component 1)
toc()
## glmerfit.rc: 6062.74 sec elapsed
Read in coefficients from fit to individuals
load("data/rsfcoefs.Rdata")
rsf_coefs<-rsf_coefs %>% tidyr::spread(term, estimate)
Compare fixed effects summaries:
rsf_coefs<-rsf_coefs %>% select("elev", "forest", "popD")
apply(rsf_coefs,2,mean) # from fit to individuals
## elev forest popD
## 3.4476155 1.5702467 -0.6096788
apply(rsf_coefs,2,sd) # from fit to individuals
## elev forest popD
## 4.4674490 4.0692302 0.9530373
Compare estimates for each animal
rsf_coefs
## # A tibble: 8 x 3
## elev forest popD
## <dbl> <dbl> <dbl>
## 1 1.98 1.54 -0.256
## 2 7.71 -0.425 -0.238
## 3 10.3 0.374 -0.427
## 4 7.43 -0.0929 0.308
## 5 1.61 -0.894 -0.637
## 6 -2.62 0.593 -0.424
## 7 0.707 -0.0139 -0.333
## 8 0.467 11.5 -2.87
fixef(glmerfit.rc)[9:11]+ranef(glmerfit.rc)$id
## forest elev popD
## F1 3.4881891 -0.7431695 -0.2649493
## F2 -1.6485931 7.6606439 0.9938759
## F3 0.3737345 5.8861179 3.5709541
## M1 2.6579201 4.6016011 0.3032287
## M2 -2.0931445 1.5939326 0.6002173
## M3 0.5902234 -6.5604488 3.5714877
## M4 2.7538694 -2.0594345 -0.3335075
## M5 3.4812030 0.4664456 -1.5677636
Total Elapsed time
toc()
## total: 10841.77 sec elapsed