Purpose: demonstrate methods for analyzing data from multiple animals, allowing for multiple random slopes.
NOTE the mixed models here take considerable time to run (you may want to use caching to speed up if you plan to run multiple times). Change the following line, below, to enable caching: opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = FALSE).
Load libraries
library(knitr)
library(lubridate)
library(raster)
library(move)
library(amt)
library(broom)
library(nlme)
library(lme4)
library(tidyverse)
library(tictoc)
library(here)
library(glmmTMB)
library(GGally)
options(width=165)
opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = TRUE)
Record time for running all code
tic("total")
Read in annotated available data for RSF modeling
rsfdat <- read_rds("data/rsf_dat.rds")
Forest versus non-forest
rsfdat$forest <- ifelse(rsfdat$landC == "forest", 1, 0)
Weight available data
rsfdat$w <- ifelse(rsfdat$case_ == 1, 1, 5e3)
Lets start by fitting “full” models to individuals and then look to see if coeficients are correlated (high values of one coefficient correspond to high/low values of another coefficient)
class(rsfdat) <- class(rsfdat)[-1] # this should be fixed in the dev version
rsffits <- rsfdat %>% group_by(id) %>% nest %>%
mutate(mod = map(data, function(x) glm(case_ ~ elev + popD + forest, data = x, weight=w,family = binomial)))
Now, summarize fits to individual animals
rsffits2 <- rsffits %>%
dplyr::mutate(tidy = purrr::map(mod, broom::tidy),
n = purrr::map(data, nrow) %>% simplify())
rsffits2 %>% unnest(tidy) %>% print(., n=1000)
## # A tibble: 32 x 7
## id n term estimate std.error statistic p.value
## <fct> <int> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 M1 19301 (Intercept) -6.37 0.570 -11.2 5.27e- 29
## 2 M1 19301 elev 6.96 0.753 9.24 2.48e- 20
## 3 M1 19301 popD 0.306 0.0421 7.27 3.72e- 13
## 4 M1 19301 forest 0.117 0.0717 1.64 1.02e- 1
## 5 M4 188120 (Intercept) -11.6 0.0641 -181. 0.
## 6 M4 188120 elev 0.443 0.0797 5.56 2.73e- 8
## 7 M4 188120 popD -0.313 0.0140 -22.3 1.92e-110
## 8 M4 188120 forest 1.14 0.0220 51.7 0.
## 9 F2 63075 (Intercept) -6.50 0.314 -20.7 3.18e- 95
## 10 F2 63075 elev 6.90 0.423 16.3 6.62e- 60
## 11 F2 63075 popD -0.224 0.0292 -7.65 1.99e- 14
## 12 F2 63075 forest 0.171 0.0391 4.38 1.21e- 5
## 13 M3 51156 (Intercept) -14.4 0.318 -45.3 0.
## 14 M3 51156 elev -3.80 0.378 -10.0 1.02e- 23
## 15 M3 51156 popD -0.428 0.0249 -17.2 2.65e- 66
## 16 M3 51156 forest 0.457 0.0492 9.29 1.57e- 20
## 17 F3 31526 (Intercept) -2.93 0.951 -3.08 2.06e- 3
## 18 F3 31526 elev 10.1 1.11 9.07 1.20e- 19
## 19 F3 31526 popD 0.0930 0.103 0.905 3.66e- 1
## 20 F3 31526 forest 0.0987 0.0666 1.48 1.38e- 1
## 21 M2 34396 (Intercept) -11.2 0.174 -64.3 0.
## 22 M2 34396 elev 0.357 0.206 1.73 8.40e- 2
## 23 M2 34396 popD -0.498 0.0424 -11.7 8.70e- 32
## 24 M2 34396 forest 0.981 0.0519 18.9 1.13e- 79
## 25 F1 28326 (Intercept) -12.6 0.292 -43.1 0.
## 26 F1 28326 elev 0.152 0.325 0.467 6.41e- 1
## 27 F1 28326 popD 0.300 0.0573 5.24 1.64e- 7
## 28 F1 28326 forest 1.56 0.0788 19.8 4.29e- 87
## 29 M5 275088 (Intercept) -12.7 0.132 -96.1 0.
## 30 M5 275088 elev 0.452 0.0196 23.1 3.46e-118
## 31 M5 275088 popD -1.29 0.209 -6.18 6.36e- 10
## 32 M5 275088 forest -0.160 0.0274 -5.85 4.94e- 9
Keep just the estimates for later comparisons
rsf_coefs<-rsffits2 %>% unnest(tidy) %>%
select(-(std.error:p.value))
rsf_coefs_wide <- rsf_coefs %>% spread(., term, estimate)
Explore coefficients using a pair plot
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) +
geom_point() +
geom_smooth(method=lm, fill="blue", color="blue", ...)
p
}
rsf_coefs_wide %>% select(elev, popD, forest) %>%
GGally::ggpairs(., lower = list(continuous = my_fn), upper = list(continuous = wrap("cor", method= "spearman")))
Process:
tic("mixed mod")
fisher.tmp <- glmmTMB(case_ ~ elev + forest + popD + (1|id) + (0 + elev|id) + (0 + popD |id) +(0 +forest | id), family=binomial(), data = rsfdat,
doFit=FALSE, weights = w)
Set variance of random intercept to 10^6
fisher.tmp$parameters$theta[1] <- log(1e3)
nvarparm<-length(fisher.tmp$parameters$theta)
fisher.tmp$mapArg <- list(theta=factor(c(NA,1:(nvarparm-1))))
fisher.rsf.ind <- glmmTMB:::fitTMB(fisher.tmp)
summary(fisher.rsf.ind)
## Family: binomial ( logit )
## Formula: case_ ~ elev + forest + popD + (1 | id) + (0 + elev | id) + (0 + popD | id) + (0 + forest | id)
## Data: rsfdat
## Weights: w
##
## AIC BIC logLik deviance df.resid
## 816453.2 816533.4 -408219.6 816439.2 690981
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## id (Intercept) 1.000e+06 1000.0000
## id.1 elev 1.833e+01 4.2818
## id.2 popD 1.963e-01 0.4430
## id.3 forest 3.185e-01 0.5644
## Number of obs: 690988, groups: id, 8
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.773e-04 3.536e+02 0.000 1.00000
## elev 2.614e+00 1.526e+00 1.714 0.08659 .
## forest 5.443e-01 2.005e-01 2.715 0.00662 **
## popD -2.384e-01 1.597e-01 -1.493 0.13547
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
toc()
## mixed mod: 1258.58 sec elapsed
Compare fixed effects (above) to mean coefficient from individual fits. These are quite similar.
# MEAN COEFFICIENT FROM INDIVIDUAL FITS
rsf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(mean=mean(estimate), se.mean=sd(estimate)/sqrt(n()))
## # A tibble: 3 x 3
## term mean se.mean
## <chr> <dbl> <dbl>
## 1 elev 2.69 1.66
## 2 forest 0.545 0.215
## 3 popD -0.257 0.185
# POPULATION MEAN COEFFICIENT FROM MIXED MODEL
summary(fisher.rsf.ind)$coef$cond[-1,]
## Estimate Std. Error z value Pr(>|z|)
## elev 2.6143378 1.5255720 1.713677 0.086588052
## forest 0.5443407 0.2004621 2.715430 0.006618986
## popD -0.2383814 0.1596792 -1.492877 0.135469448
Compare variance of individual esimtates to variance component from mixed model. Note, the variance of the individual coefficients will be larger (too big) since it includes true variability and sampling variability.
# VARIANCE OF COEFFICIENTS FROM INDIVIDUAL FITS
rsf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(var_elev=var(estimate), sd_elev=sd(estimate))
## # A tibble: 3 x 3
## term var_elev sd_elev
## <chr> <dbl> <dbl>
## 1 elev 22.0 4.69
## 2 forest 0.370 0.608
## 3 popD 0.274 0.523
# VARIANCE COMPONENT FROM MIXED MODEL
summary(fisher.rsf.ind)$varcor
##
## Conditional model:
## Groups Name Std.Dev.
## id (Intercept) 1000.00000
## id.1 elev 4.28181
## id.2 popD 0.44305
## id.3 forest 0.56440
Look at individual coefficients
# COEFFICIENTS FROM INDIVIDUAL FITS
rsf_coefs2 <- rsf_coefs %>% filter(term!="(Intercept)") %>% arrange(term, id)
rsf_coefs2$method<-"indiv_fits"
names(rsf_coefs2)[4]<-"Estimate"
rsf_coefs2<-rsf_coefs2[,-2]
# COEFFICIENTS FROM MIXED MODEL
mixed_coefs <- coef(fisher.rsf.ind)$cond$id[,-1]
mixed_coefs$id <- rownames(mixed_coefs)
mixed_coefs <- mixed_coefs %>%
gather('elev', 'forest', 'popD', key="term", value="Estimate")
mixed_coefs$method<-"Mixed"
Combine and plot
allests<-rbind(mixed_coefs, rsf_coefs2)
Note that because of the large sample size, individual coefficients show very little shrinkage towards the overall mean.
ggplot(data=allests,
aes(x=id, y=Estimate, col=method))+
geom_point(size=3.5, position=position_dodge(width=0.3))+
xlab("")+ylab(expression(hat(beta)))+facet_wrap(~term)
Read in annotated available data for SSF modeling
ssfdat <- read_rds("data/ssf_dat.rds")
Create ID for each unique strata, by combining ID and step_id
ssfdat$step_id = with(ssfdat, paste0(id, step_id_, sep = ""))
Since not all animals experience all habitat types, lets just explore forest versus non-forest
ssfdat$forest <- ifelse(ssfdat$landC == "forest", 1, 0)
The process is the same as for the mixed RSF:
However, there are a few differences:
tic("mixed ssf")
fisher.tmp <- glmmTMB(case_ ~ elev + popD + forest + (1|step_id) + (0 + elev|id) + (0 + popD|id) + (0+ forest |id), family=poisson(), data = ssfdat,
doFit=FALSE)
Set variance of random intercept to 10^6
fisher.tmp$parameters$theta[1] <- log(1e3)
nvarparm<-length(fisher.tmp$parameters$theta)
fisher.tmp$mapArg <- list(theta=factor(c(NA,1:(nvarparm-1))))
fisher.ssf <- glmmTMB:::fitTMB(fisher.tmp)
summary(fisher.ssf)
## Family: poisson ( log )
## Formula: case_ ~ elev + popD + forest + (1 | step_id) + (0 + elev | id) + (0 + popD | id) + (0 + forest | id)
## Data: ssfdat
##
## AIC BIC logLik deviance df.resid
## 130130.9 130194.9 -65058.4 130116.9 69524
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## step_id (Intercept) 1.000e+06 1.000e+03
## id elev 1.829e+00 1.352e+00
## id.1 popD 3.581e-12 1.892e-06
## id.2 forest 9.715e-02 3.117e-01
## Number of obs: 69531, groups: step_id, 6321; id, 7
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.32630 12.57981 -0.185 0.85329
## elev 1.67740 0.78539 2.136 0.03270 *
## popD -0.04948 0.04817 -1.027 0.30427
## forest 0.34891 0.12871 2.711 0.00671 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
toc()
## mixed ssf: 18.36 sec elapsed
library(TwoStepCLogit)
tic("two-step")
twostep<-Ts.estim(formula = case_ ~ elev+ popD+forest+
strata(step_id) +cluster(id),data = ssfdat, random = ~ elev+popD+forest, all.m.1=F)
twostep
## Call:
## Ts.estim(formula = case_ ~ elev + popD + forest + strata(step_id) +
## cluster(id), data = ssfdat, random = ~elev + popD + forest,
## all.m.1 = F)
##
## beta coefficients:
## estimate se
## elev 1.807810 0.796812
## popD -0.044808 0.052567
## forest 0.344730 0.138059
##
## D = estimate of the between-cluster variance-covariance matrix D,
## for the random coefficients only:
## elev popD forest
## elev 2.785133 0.000000000 0.0000000
## popD 0.000000 0.001321728 0.0000000
## forest 0.000000 0.000000000 0.1143032
toc()
## two-step: 18.22 sec elapsed
class(ssfdat) <- class(ssfdat)[-1] # this should be fixed in the dev version
ssffits <- ssfdat %>% group_by(id) %>% nest %>%
mutate(mod = map(data, function(x) (fit_issf(case_ ~ elev + popD + forest + strata(step_id), data = x))))
Now, summarize fits to individual animals
ssffits <- ssffits %>%
mutate(tidy = map(mod, ~ broom::tidy(.x$model)),
n = map_int(data, nrow))
ssffits %>% unnest(tidy) %>% filter(term!="(Intercept)") %>% print(., n=1000)
## # A tibble: 21 x 9
## id n term estimate std.error statistic p.value conf.low conf.high
## <fct> <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 M4 13024 elev 0.725 0.620 1.17 2.42e- 1 -0.490 1.94
## 2 M4 13024 popD -0.0955 0.0726 -1.31 1.89e- 1 -0.238 0.0469
## 3 M4 13024 forest 0.461 0.0731 6.30 2.95e-10 0.317 0.604
## 4 F2 3817 elev 5.75 1.63 3.54 3.99e- 4 2.57 8.94
## 5 F2 3817 popD -0.0504 0.137 -0.368 7.13e- 1 -0.318 0.218
## 6 F2 3817 forest 0.414 0.143 2.89 3.80e- 3 0.134 0.694
## 7 M3 2233 elev 4.23 2.94 1.44 1.50e- 1 -1.53 9.99
## 8 M3 2233 popD -0.189 0.178 -1.06 2.88e- 1 -0.539 0.160
## 9 M3 2233 forest 0.393 0.190 2.06 3.92e- 2 0.0194 0.766
## 10 F3 1815 elev 5.91 4.38 1.35 1.78e- 1 -2.68 14.5
## 11 F3 1815 popD -0.408 0.373 -1.09 2.74e- 1 -1.14 0.323
## 12 F3 1815 forest 0.255 0.245 1.04 2.98e- 1 -0.225 0.736
## 13 M2 14113 elev 2.54 0.838 3.03 2.43e- 3 0.898 4.18
## 14 M2 14113 popD -0.0681 0.105 -0.647 5.17e- 1 -0.274 0.138
## 15 M2 14113 forest 0.351 0.0895 3.92 8.87e- 5 0.175 0.526
## 16 F1 12848 elev 0.567 0.732 0.774 4.39e- 1 -0.868 2.00
## 17 F1 12848 popD 0.262 0.153 1.71 8.64e- 2 -0.0374 0.560
## 18 F1 12848 forest 0.839 0.106 7.92 2.29e-15 0.631 1.05
## 19 M5 21681 elev 0.0558 0.191 0.292 7.70e- 1 -0.319 0.431
## 20 M5 21681 popD 2.55 0.773 3.29 9.94e- 4 1.03 4.06
## 21 M5 21681 forest -0.301 0.0902 -3.34 8.50e- 4 -0.478 -0.124
Keep just coefficients for comparisons
ssf_coefs<-ssffits %>% unnest(tidy) %>%
select(-(std.error:conf.high))
Compare fixed effects from mixed model and TS.Estim (above) to mean coefficient from individual fits
# MEAN COEFFICIENT FROM INDIVIDUAL FITS
se<-function(x){sd(x)/sqrt(length(x))}
ssf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(mean=mean(estimate), se=se(estimate))
## # A tibble: 3 x 3
## term mean se
## <chr> <dbl> <dbl>
## 1 elev 2.83 0.943
## 2 forest 0.344 0.128
## 3 popD 0.285 0.384
# POPULATION MEAN COEFFICIENT FROM MIXED MODEL
summary(fisher.ssf)$coef$cond[-1,]
## Estimate Std. Error z value Pr(>|z|)
## elev 1.67740175 0.78538855 2.135760 0.03269895
## popD -0.04948474 0.04816864 -1.027323 0.30426850
## forest 0.34890526 0.12870584 2.710874 0.00671062
# POPULATION MEAN COEFFICIENT FROM TWO-STEP (TS.Estim)
twostep$beta
## elev popD forest
## 1.80780990 -0.04480754 0.34473004
Compare variance of individual esimtates to variance component from mixed model.
# VARIANCE OF COEFFICIENTs FROM INDIVIDUAL FITS
ssf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(var_elev=var(estimate), sd_elev=sd(estimate))
## # A tibble: 3 x 3
## term var_elev sd_elev
## <chr> <dbl> <dbl>
## 1 elev 6.22 2.49
## 2 forest 0.115 0.339
## 3 popD 1.03 1.02
# VARIANCE OF COEFFICIENTs FROM MIXED MODEL
summary(fisher.ssf)$varcor #var
##
## Conditional model:
## Groups Name Std.Dev.
## step_id (Intercept) 1.0000e+03
## id elev 1.3523e+00
## id.1 popD 1.8923e-06
## id.2 forest 3.1168e-01
# VARIANCE OF COEFFICIENTs FROM TWO-STEP (TS.Estim)
sqrt(twostep$D) # sd
## elev popD forest
## elev 1.668872 0.00000000 0.0000000
## popD 0.000000 0.03635558 0.0000000
## forest 0.000000 0.00000000 0.3380876
Look at individual coefficients
# COEFFICIENTs FROM INDIVIDUAL FITS
ssf_coefs2 <- ssf_coefs %>% filter(term!="(Intercept)") %>% arrange(term, id)
ssf_coefs2$method<-"indiv_fits"
names(ssf_coefs2)[4]<-"Estimate"
ssf_coefs2<-ssf_coefs2[,-2]
# COEFFICIENTs FROM MIXED MODEL
mixed_coefs <- coef(fisher.ssf)$cond$id[,-1]
mixed_coefs$id <- rownames(mixed_coefs)
mixed_coefs <- mixed_coefs %>%
gather('elev', 'forest', 'popD', key="term", value="Estimate")
mixed_coefs$method<-"Mixed"
# COEFFICIENTS FROM TWO-STEP (TS.Estim)
twostepcoefs<-matrix(twostep$beta, nrow(twostep$r.effect), ncol(twostep$r.effect), byrow=TRUE)+twostep$r.effect
twostepcoefs<-as.data.frame(twostepcoefs)
twostepcoefs$method<-"two_step"
twostepcoefs$id<-rownames(twostep$r.effect)
twostepcoefs<-twostepcoefs %>% gather('elev', 'popD','forest', key="term", value="Estimate")
Combine and plot
allests<-rbind(mixed_coefs, ssf_coefs2,twostepcoefs)
Note here that a few coefficients fo relevation and popD show considerable shrinkage. These are coefficients with large SEs as seen by inspecting the individual fits.
ggplot(data=allests,
aes(x=id, y=Estimate, col=method))+
geom_point(size=3.5, position=position_dodge(width=0.3))+
xlab("")+ylab(expression(hat(beta)))+facet_wrap(~term)
Look at elevation, with coefficients sorted by SE. Note, the 3 coefficients with the largest SEs show the most shrinkage.
ssffits %>% unnest(tidy) %>% filter(term=="elev") %>% arrange(desc(std.error)) %>% select(-c(statistic:conf.high))
## # A tibble: 7 x 5
## id n term estimate std.error
## <fct> <int> <chr> <dbl> <dbl>
## 1 F3 1815 elev 5.91 4.38
## 2 M3 2233 elev 4.23 2.94
## 3 F2 3817 elev 5.75 1.63
## 4 M2 14113 elev 2.54 0.838
## 5 F1 12848 elev 0.567 0.732
## 6 M4 13024 elev 0.725 0.620
## 7 M5 21681 elev 0.0558 0.191
Similar story for popD. M5 shows considerably shrinkage due to the large SE associated with its coefficient.
ssffits %>% unnest(tidy) %>% filter(term=="popD") %>% arrange(desc(std.error)) %>% select(-c(statistic:conf.high))
## # A tibble: 7 x 5
## id n term estimate std.error
## <fct> <int> <chr> <dbl> <dbl>
## 1 M5 21681 popD 2.55 0.773
## 2 F3 1815 popD -0.408 0.373
## 3 M3 2233 popD -0.189 0.178
## 4 F1 12848 popD 0.262 0.153
## 5 F2 3817 popD -0.0504 0.137
## 6 M2 14113 popD -0.0681 0.105
## 7 M4 13024 popD -0.0955 0.0726
Total Elapsed time
toc()