Purpose: demonstrate methods for analyzing data from multiple animals, allowing for multiple random slopes.

NOTE the mixed models here take considerable time to run (you may want to use caching to speed up if you plan to run multiple times). Change the following line, below, to enable caching: opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = FALSE).

Preamble

Load libraries

library(knitr)
library(lubridate)
library(raster)
library(move)
library(amt) 
library(broom)
library(nlme)
library(lme4)
library(tidyverse)
library(tictoc)
library(here)
library(glmmTMB)
library(GGally)

options(width=165)
opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = TRUE)

Record time for running all code

tic("total")

RSF models

Read in annotated available data for RSF modeling

rsfdat <- read_rds("data/rsf_dat.rds")

Forest versus non-forest

rsfdat$forest <- ifelse(rsfdat$landC == "forest", 1, 0)

Weight available data

rsfdat$w <- ifelse(rsfdat$case_ == 1, 1, 5e3)

Individual fits

Lets start by fitting “full” models to individuals and then look to see if coeficients are correlated (high values of one coefficient correspond to high/low values of another coefficient)

class(rsfdat) <- class(rsfdat)[-1] # this should be fixed in the dev version
rsffits <- rsfdat %>% group_by(id) %>% nest %>% 
  mutate(mod = map(data, function(x) glm(case_ ~ elev + popD + forest, data = x, weight=w,family = binomial))) 

Now, summarize fits to individual animals

rsffits2 <- rsffits %>%
  dplyr::mutate(tidy = purrr::map(mod, broom::tidy),
                n = purrr::map(data, nrow) %>% simplify())
rsffits2 %>% unnest(tidy) %>% print(., n=1000)
## # A tibble: 32 x 7
##    id         n term        estimate std.error statistic   p.value
##    <fct>  <int> <chr>          <dbl>     <dbl>     <dbl>     <dbl>
##  1 M1     19301 (Intercept)  -6.37      0.570    -11.2   5.27e- 29
##  2 M1     19301 elev          6.96      0.753      9.24  2.48e- 20
##  3 M1     19301 popD          0.306     0.0421     7.27  3.72e- 13
##  4 M1     19301 forest        0.117     0.0717     1.64  1.02e-  1
##  5 M4    188120 (Intercept) -11.6       0.0641  -181.    0.       
##  6 M4    188120 elev          0.443     0.0797     5.56  2.73e-  8
##  7 M4    188120 popD         -0.313     0.0140   -22.3   1.92e-110
##  8 M4    188120 forest        1.14      0.0220    51.7   0.       
##  9 F2     63075 (Intercept)  -6.50      0.314    -20.7   3.18e- 95
## 10 F2     63075 elev          6.90      0.423     16.3   6.62e- 60
## 11 F2     63075 popD         -0.224     0.0292    -7.65  1.99e- 14
## 12 F2     63075 forest        0.171     0.0391     4.38  1.21e-  5
## 13 M3     51156 (Intercept) -14.4       0.318    -45.3   0.       
## 14 M3     51156 elev         -3.80      0.378    -10.0   1.02e- 23
## 15 M3     51156 popD         -0.428     0.0249   -17.2   2.65e- 66
## 16 M3     51156 forest        0.457     0.0492     9.29  1.57e- 20
## 17 F3     31526 (Intercept)  -2.93      0.951     -3.08  2.06e-  3
## 18 F3     31526 elev         10.1       1.11       9.07  1.20e- 19
## 19 F3     31526 popD          0.0930    0.103      0.905 3.66e-  1
## 20 F3     31526 forest        0.0987    0.0666     1.48  1.38e-  1
## 21 M2     34396 (Intercept) -11.2       0.174    -64.3   0.       
## 22 M2     34396 elev          0.357     0.206      1.73  8.40e-  2
## 23 M2     34396 popD         -0.498     0.0424   -11.7   8.70e- 32
## 24 M2     34396 forest        0.981     0.0519    18.9   1.13e- 79
## 25 F1     28326 (Intercept) -12.6       0.292    -43.1   0.       
## 26 F1     28326 elev          0.152     0.325      0.467 6.41e-  1
## 27 F1     28326 popD          0.300     0.0573     5.24  1.64e-  7
## 28 F1     28326 forest        1.56      0.0788    19.8   4.29e- 87
## 29 M5    275088 (Intercept) -12.7       0.132    -96.1   0.       
## 30 M5    275088 elev          0.452     0.0196    23.1   3.46e-118
## 31 M5    275088 popD         -1.29      0.209     -6.18  6.36e- 10
## 32 M5    275088 forest       -0.160     0.0274    -5.85  4.94e-  9

Keep just the estimates for later comparisons

rsf_coefs<-rsffits2 %>% unnest(tidy) %>% 
  select(-(std.error:p.value))

rsf_coefs_wide <- rsf_coefs %>% spread(., term, estimate)

Explore coefficients using a pair plot

my_fn <- function(data, mapping, ...){
  p <- ggplot(data = data, mapping = mapping) + 
    geom_point() + 
    geom_smooth(method=lm, fill="blue", color="blue", ...)
  p
}

rsf_coefs_wide %>% select(elev, popD, forest) %>%    
   GGally::ggpairs(., lower = list(continuous = my_fn), upper = list(continuous = wrap("cor", method= "spearman")))

glmmTMB independent random slopes model

Process:

  1. Set up model, but do not fit
  2. Set random intercept variance to large fixed value, set other variance components to 0
  3. Fit the model THis is the model we’d like to fit (where coefficients may be correlated), but you will wait a long time. And, it requires too many variance parameters for the small number of individuals
tic("mixed mod")
fisher.tmp <- glmmTMB(case_ ~ elev + forest + popD + (1|id) + (0 + elev|id) + (0 + popD |id) +(0 +forest | id), family=binomial(), data = rsfdat,
                      doFit=FALSE, weights = w)

Set variance of random intercept to 10^6

fisher.tmp$parameters$theta[1] <- log(1e3)
nvarparm<-length(fisher.tmp$parameters$theta)
fisher.tmp$mapArg <- list(theta=factor(c(NA,1:(nvarparm-1))))
fisher.rsf.ind <- glmmTMB:::fitTMB(fisher.tmp)
summary(fisher.rsf.ind)
##  Family: binomial  ( logit )
## Formula:          case_ ~ elev + forest + popD + (1 | id) + (0 + elev | id) + (0 +      popD | id) + (0 + forest | id)
## Data: rsfdat
## Weights: w
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  816453.2  816533.4 -408219.6  816439.2    690981 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev. 
##  id     (Intercept) 1.000e+06 1000.0000
##  id.1   elev        1.833e+01    4.2818
##  id.2   popD        1.963e-01    0.4430
##  id.3   forest      3.185e-01    0.5644
## Number of obs: 690988, groups:  id, 8
## 
## Conditional model:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -6.773e-04  3.536e+02   0.000  1.00000   
## elev         2.614e+00  1.526e+00   1.714  0.08659 . 
## forest       5.443e-01  2.005e-01   2.715  0.00662 **
## popD        -2.384e-01  1.597e-01  -1.493  0.13547   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
toc()
## mixed mod: 1258.58 sec elapsed

Comparisons

Compare fixed effects (above) to mean coefficient from individual fits. These are quite similar.

# MEAN COEFFICIENT FROM INDIVIDUAL FITS 
rsf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(mean=mean(estimate), se.mean=sd(estimate)/sqrt(n()))
## # A tibble: 3 x 3
##   term     mean se.mean
##   <chr>   <dbl>   <dbl>
## 1 elev    2.69    1.66 
## 2 forest  0.545   0.215
## 3 popD   -0.257   0.185
# POPULATION MEAN COEFFICIENT FROM MIXED MODEL
summary(fisher.rsf.ind)$coef$cond[-1,]
##          Estimate Std. Error   z value    Pr(>|z|)
## elev    2.6143378  1.5255720  1.713677 0.086588052
## forest  0.5443407  0.2004621  2.715430 0.006618986
## popD   -0.2383814  0.1596792 -1.492877 0.135469448

Compare variance of individual esimtates to variance component from mixed model. Note, the variance of the individual coefficients will be larger (too big) since it includes true variability and sampling variability.

# VARIANCE OF COEFFICIENTS FROM INDIVIDUAL FITS
rsf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(var_elev=var(estimate), sd_elev=sd(estimate))
## # A tibble: 3 x 3
##   term   var_elev sd_elev
##   <chr>     <dbl>   <dbl>
## 1 elev     22.0     4.69 
## 2 forest    0.370   0.608
## 3 popD      0.274   0.523
# VARIANCE COMPONENT FROM MIXED MODEL
summary(fisher.rsf.ind)$varcor
## 
## Conditional model:
##  Groups Name        Std.Dev.  
##  id     (Intercept) 1000.00000
##  id.1   elev           4.28181
##  id.2   popD           0.44305
##  id.3   forest         0.56440

Look at individual coefficients

# COEFFICIENTS FROM INDIVIDUAL FITS
rsf_coefs2 <- rsf_coefs %>% filter(term!="(Intercept)") %>% arrange(term, id)
rsf_coefs2$method<-"indiv_fits"                       
names(rsf_coefs2)[4]<-"Estimate"
rsf_coefs2<-rsf_coefs2[,-2]

# COEFFICIENTS FROM MIXED MODEL
mixed_coefs <- coef(fisher.rsf.ind)$cond$id[,-1]
mixed_coefs$id <- rownames(mixed_coefs)
mixed_coefs <- mixed_coefs %>% 
  gather('elev', 'forest', 'popD', key="term", value="Estimate")
mixed_coefs$method<-"Mixed"

Combine and plot

allests<-rbind(mixed_coefs, rsf_coefs2)

Note that because of the large sample size, individual coefficients show very little shrinkage towards the overall mean.

ggplot(data=allests, 
       aes(x=id, y=Estimate, col=method))+
  geom_point(size=3.5, position=position_dodge(width=0.3))+
  xlab("")+ylab(expression(hat(beta)))+facet_wrap(~term)

SSF model

Read in annotated available data for SSF modeling

ssfdat <- read_rds("data/ssf_dat.rds")

Create ID for each unique strata, by combining ID and step_id

ssfdat$step_id = with(ssfdat, paste0(id, step_id_, sep = ""))

Since not all animals experience all habitat types, lets just explore forest versus non-forest

ssfdat$forest <- ifelse(ssfdat$landC == "forest", 1, 0)

glmmTMB: random slopes model

The process is the same as for the mixed RSF:

  1. Set up model, but do not fit
  2. Set random intercept variance to large fixed value, set other variance components to 0
  3. Fit the model

However, there are a few differences:

  • we will use a Poisson likelihood rather than logistic
  • we won’t need weights
  • we will include fixed intercepts for each step_id
tic("mixed ssf")
fisher.tmp <- glmmTMB(case_ ~ elev + popD + forest + (1|step_id) + (0 + elev|id) + (0 + popD|id) + (0+ forest |id), family=poisson(), data = ssfdat,
                      doFit=FALSE)

Set variance of random intercept to 10^6

fisher.tmp$parameters$theta[1] <- log(1e3)
nvarparm<-length(fisher.tmp$parameters$theta)
fisher.tmp$mapArg <- list(theta=factor(c(NA,1:(nvarparm-1))))
fisher.ssf <- glmmTMB:::fitTMB(fisher.tmp)
summary(fisher.ssf)
##  Family: poisson  ( log )
## Formula:          case_ ~ elev + popD + forest + (1 | step_id) + (0 + elev | id) +      (0 + popD | id) + (0 + forest | id)
## Data: ssfdat
## 
##      AIC      BIC   logLik deviance df.resid 
## 130130.9 130194.9 -65058.4 130116.9    69524 
## 
## Random effects:
## 
## Conditional model:
##  Groups  Name        Variance  Std.Dev. 
##  step_id (Intercept) 1.000e+06 1.000e+03
##  id      elev        1.829e+00 1.352e+00
##  id.1    popD        3.581e-12 1.892e-06
##  id.2    forest      9.715e-02 3.117e-01
## Number of obs: 69531, groups:  step_id, 6321; id, 7
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -2.32630   12.57981  -0.185  0.85329   
## elev         1.67740    0.78539   2.136  0.03270 * 
## popD        -0.04948    0.04817  -1.027  0.30427   
## forest       0.34891    0.12871   2.711  0.00671 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
toc()
## mixed ssf: 18.36 sec elapsed

Using Ts.Estim

library(TwoStepCLogit)
tic("two-step")
twostep<-Ts.estim(formula = case_ ~  elev+ popD+forest+
           strata(step_id) +cluster(id),data = ssfdat, random = ~ elev+popD+forest, all.m.1=F) 
twostep
## Call:
## Ts.estim(formula = case_ ~ elev + popD + forest + strata(step_id) + 
##     cluster(id), data = ssfdat, random = ~elev + popD + forest, 
##     all.m.1 = F)
## 
## beta coefficients:
##          estimate        se
## elev     1.807810  0.796812
## popD    -0.044808  0.052567
## forest   0.344730  0.138059
## 
## D = estimate of the between-cluster variance-covariance matrix D,
##     for the random coefficients only:
##             elev         popD     forest
## elev    2.785133  0.000000000  0.0000000
## popD    0.000000  0.001321728  0.0000000
## forest  0.000000  0.000000000  0.1143032
toc()
## two-step: 18.22 sec elapsed

Individual fits

class(ssfdat) <- class(ssfdat)[-1] # this should be fixed in the dev version
ssffits <- ssfdat %>% group_by(id) %>% nest %>% 
  mutate(mod = map(data, function(x) (fit_issf(case_ ~ elev + popD + forest + strata(step_id), data = x))))

Now, summarize fits to individual animals

ssffits <- ssffits %>%
  mutate(tidy = map(mod, ~ broom::tidy(.x$model)),
         n = map_int(data, nrow))

ssffits %>% unnest(tidy) %>% filter(term!="(Intercept)") %>% print(., n=1000)
## # A tibble: 21 x 9
##    id        n term   estimate std.error statistic  p.value conf.low conf.high
##    <fct> <int> <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
##  1 M4    13024 elev     0.725     0.620      1.17  2.42e- 1  -0.490     1.94  
##  2 M4    13024 popD    -0.0955    0.0726    -1.31  1.89e- 1  -0.238     0.0469
##  3 M4    13024 forest   0.461     0.0731     6.30  2.95e-10   0.317     0.604 
##  4 F2     3817 elev     5.75      1.63       3.54  3.99e- 4   2.57      8.94  
##  5 F2     3817 popD    -0.0504    0.137     -0.368 7.13e- 1  -0.318     0.218 
##  6 F2     3817 forest   0.414     0.143      2.89  3.80e- 3   0.134     0.694 
##  7 M3     2233 elev     4.23      2.94       1.44  1.50e- 1  -1.53      9.99  
##  8 M3     2233 popD    -0.189     0.178     -1.06  2.88e- 1  -0.539     0.160 
##  9 M3     2233 forest   0.393     0.190      2.06  3.92e- 2   0.0194    0.766 
## 10 F3     1815 elev     5.91      4.38       1.35  1.78e- 1  -2.68     14.5   
## 11 F3     1815 popD    -0.408     0.373     -1.09  2.74e- 1  -1.14      0.323 
## 12 F3     1815 forest   0.255     0.245      1.04  2.98e- 1  -0.225     0.736 
## 13 M2    14113 elev     2.54      0.838      3.03  2.43e- 3   0.898     4.18  
## 14 M2    14113 popD    -0.0681    0.105     -0.647 5.17e- 1  -0.274     0.138 
## 15 M2    14113 forest   0.351     0.0895     3.92  8.87e- 5   0.175     0.526 
## 16 F1    12848 elev     0.567     0.732      0.774 4.39e- 1  -0.868     2.00  
## 17 F1    12848 popD     0.262     0.153      1.71  8.64e- 2  -0.0374    0.560 
## 18 F1    12848 forest   0.839     0.106      7.92  2.29e-15   0.631     1.05  
## 19 M5    21681 elev     0.0558    0.191      0.292 7.70e- 1  -0.319     0.431 
## 20 M5    21681 popD     2.55      0.773      3.29  9.94e- 4   1.03      4.06  
## 21 M5    21681 forest  -0.301     0.0902    -3.34  8.50e- 4  -0.478    -0.124

Keep just coefficients for comparisons

ssf_coefs<-ssffits %>% unnest(tidy) %>% 
  select(-(std.error:conf.high)) 

Comparisons

Compare fixed effects from mixed model and TS.Estim (above) to mean coefficient from individual fits

# MEAN COEFFICIENT FROM INDIVIDUAL FITS 
se<-function(x){sd(x)/sqrt(length(x))}
ssf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>%   summarize(mean=mean(estimate), se=se(estimate))
## # A tibble: 3 x 3
##   term    mean    se
##   <chr>  <dbl> <dbl>
## 1 elev   2.83  0.943
## 2 forest 0.344 0.128
## 3 popD   0.285 0.384
# POPULATION MEAN COEFFICIENT FROM MIXED MODEL
summary(fisher.ssf)$coef$cond[-1,]
##           Estimate Std. Error   z value   Pr(>|z|)
## elev    1.67740175 0.78538855  2.135760 0.03269895
## popD   -0.04948474 0.04816864 -1.027323 0.30426850
## forest  0.34890526 0.12870584  2.710874 0.00671062
# POPULATION MEAN COEFFICIENT FROM TWO-STEP (TS.Estim)
twostep$beta
##        elev        popD      forest 
##  1.80780990 -0.04480754  0.34473004

Compare variance of individual esimtates to variance component from mixed model.

# VARIANCE OF COEFFICIENTs FROM INDIVIDUAL FITS
ssf_coefs %>% filter(term!="(Intercept)") %>% group_by(term) %>% summarize(var_elev=var(estimate), sd_elev=sd(estimate))
## # A tibble: 3 x 3
##   term   var_elev sd_elev
##   <chr>     <dbl>   <dbl>
## 1 elev      6.22    2.49 
## 2 forest    0.115   0.339
## 3 popD      1.03    1.02
# VARIANCE OF COEFFICIENTs FROM MIXED MODEL
summary(fisher.ssf)$varcor #var
## 
## Conditional model:
##  Groups  Name        Std.Dev.  
##  step_id (Intercept) 1.0000e+03
##  id      elev        1.3523e+00
##  id.1    popD        1.8923e-06
##  id.2    forest      3.1168e-01
# VARIANCE OF COEFFICIENTs FROM TWO-STEP (TS.Estim)
sqrt(twostep$D) # sd
##            elev       popD    forest
## elev   1.668872 0.00000000 0.0000000
## popD   0.000000 0.03635558 0.0000000
## forest 0.000000 0.00000000 0.3380876

Look at individual coefficients

# COEFFICIENTs FROM INDIVIDUAL FITS
ssf_coefs2 <- ssf_coefs %>% filter(term!="(Intercept)") %>% arrange(term, id)
ssf_coefs2$method<-"indiv_fits"                       
names(ssf_coefs2)[4]<-"Estimate"
ssf_coefs2<-ssf_coefs2[,-2]

# COEFFICIENTs FROM MIXED MODEL
mixed_coefs <- coef(fisher.ssf)$cond$id[,-1]
mixed_coefs$id <- rownames(mixed_coefs)
mixed_coefs <- mixed_coefs %>% 
  gather('elev', 'forest', 'popD', key="term", value="Estimate")
mixed_coefs$method<-"Mixed"


# COEFFICIENTS FROM TWO-STEP (TS.Estim)
twostepcoefs<-matrix(twostep$beta, nrow(twostep$r.effect), ncol(twostep$r.effect), byrow=TRUE)+twostep$r.effect
twostepcoefs<-as.data.frame(twostepcoefs)
twostepcoefs$method<-"two_step"
twostepcoefs$id<-rownames(twostep$r.effect)
twostepcoefs<-twostepcoefs %>% gather('elev', 'popD','forest', key="term", value="Estimate")

Combine and plot

allests<-rbind(mixed_coefs, ssf_coefs2,twostepcoefs)

Note here that a few coefficients fo relevation and popD show considerable shrinkage. These are coefficients with large SEs as seen by inspecting the individual fits.

ggplot(data=allests, 
       aes(x=id, y=Estimate, col=method))+
  geom_point(size=3.5, position=position_dodge(width=0.3))+
  xlab("")+ylab(expression(hat(beta)))+facet_wrap(~term)

Look at elevation, with coefficients sorted by SE. Note, the 3 coefficients with the largest SEs show the most shrinkage.

ssffits %>% unnest(tidy)  %>% filter(term=="elev") %>% arrange(desc(std.error)) %>% select(-c(statistic:conf.high))
## # A tibble: 7 x 5
##   id        n term  estimate std.error
##   <fct> <int> <chr>    <dbl>     <dbl>
## 1 F3     1815 elev    5.91       4.38 
## 2 M3     2233 elev    4.23       2.94 
## 3 F2     3817 elev    5.75       1.63 
## 4 M2    14113 elev    2.54       0.838
## 5 F1    12848 elev    0.567      0.732
## 6 M4    13024 elev    0.725      0.620
## 7 M5    21681 elev    0.0558     0.191

Similar story for popD. M5 shows considerably shrinkage due to the large SE associated with its coefficient.

ssffits %>% unnest(tidy)  %>% filter(term=="popD") %>% arrange(desc(std.error)) %>% select(-c(statistic:conf.high))
## # A tibble: 7 x 5
##   id        n term  estimate std.error
##   <fct> <int> <chr>    <dbl>     <dbl>
## 1 M5    21681 popD    2.55      0.773 
## 2 F3     1815 popD   -0.408     0.373 
## 3 M3     2233 popD   -0.189     0.178 
## 4 F1    12848 popD    0.262     0.153 
## 5 F2     3817 popD   -0.0504    0.137 
## 6 M2    14113 popD   -0.0681    0.105 
## 7 M4    13024 popD   -0.0955    0.0726

Total Elapsed time

toc()