--- title: "Phylogeny and Species Trait Effects on Detectability" author: "Peter Solymos" date: "`r Sys.Date()`" vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Phylogeny and Species Trait Effects on Detectability} output: knitr:::html_vignette --- ```{r update-stuff,eval=FALSE,results='hide',echo=FALSE} devtools::install_github("borealbirds/lhreg") devtools::build_vignettes("~/repos/lhreg") ``` ## Introduction This document is a supporting material for the manuscript entitled *Phylogeny and species traits predict songbird detectability* by Peter Solymos, Steven M. Matsuoka, Diana Stralberg, Erin M. Bayne, and Nicole K. S. Barker. The **lhreg** R package contains the (1) data, (2) analysis code used in the manuscript, and (3) code required to summarize the results and produce tables and figures. The R package is hosted on [GitHub](https://github.com/borealbirds/lhreg), Please submit issues [here](https://github.com/borealbirds/lhreg/issues). The package is archived on Zenodo under the DOI [10.5281/zenodo.574886](https://zenodo.org/badge/latestdoi/90998177). The package can be installed as: ```{r install,eval=FALSE} devtools::install_github("borealbirds/lhreg") ``` The present document can be viewed as: ```{r vignette,eval=FALSE} vignette(topic = "lhreg", package = "lhreg") ``` ## Components of detectability The manuscript refer to singing rate ($SR$, here we also refer to it as `phi`) and detection distance ($DD$, which we refer to as `tau`), which are linked to detectability as explained in this section. Solymos et al. (2013, DOI [10.1111/2041-210X.12106](https://dx.doi.org/10.1111/2041-210X.12106)) describes the mathematical details of singing rate estimation based on removal sampling and effective detection distance estimation via distance sampling. These quantities define the probability of individuals of the species are available for sampling given presence (often referred to as availability, $p$), and the probability that an individual of that species is being detected given it produces a cue (often referred to as perceptibility, $q$). $p=1-exp(-t~SR)$, where $t$ is the duration of the counting period. $q=DD^2~(1-exp(-r^2 / DD^2))~/~r^2$, where $r$ is the counting radius (i.e. not counting individuals beyond this distance from the observer). Probability of detection is the product to the two components: $pq$. ## Data and data processing ### Variables The `lhreg_data` is a data frame with all response variables (`logphi` is log singing rate, `logtau` is log detection distance) and trait values (unmodified and transformed versions). ```{r trait-data,message=FALSE} library(lhreg) data(lhreg_data) str(lhreg_data) with(lhreg_data, plot(exp(logphi), exp(logtau), cex=logmass*0.5, col=Mig2, pch=c(21, 22)[Hab2])) legend("topright", bty="n", pch=c(21, 21, 22, 22), col=c(1,2,1,2), legend=c("Migratory/Closed", "Resident/Closed", "Migratory/Open", "Resident/Open")) ``` Here is a list of species used in the manuscript: ```{r species-table,results='markup',echo=FALSE} knitr::kable(lhreg_data[,c("common_name", "scientific_name", "spp")], digits=3, row.names=FALSE) ``` Here are the response variables and their standard errors: ```{r responses-table,results='markup',echo=FALSE} lhreg_data$SR <- exp(lhreg_data$logphi) lhreg_data$DD <- 100*exp(lhreg_data$logtau) knitr::kable(lhreg_data[,c("common_name", "SR", "logphi", "logphiSE", "DD", "logtau", "logtauSE")], digits=3, row.names=FALSE) ``` Other trait variables used are predictors: ```{r predictor-table,results='markup',echo=FALSE} knitr::kable(lhreg_data[,c("common_name", "mass", "MaxFreqkHz", "Mig2", "Hab2", "Nesthm")], digits=2, row.names=FALSE) ``` ### Phylogenetic correlations This code was used to average the 1000 pseudo-posterior trees from Jetz et al. (2012) with Ericson backbone to represent the phylogenetic relationships among the species: ```{r phylo-corr,eval=FALSE} library(ape) mph <- read.nexus("11960.tre") # 1000 trees with Ericson backbone lhreg_tree <- consensus(mph) table(sapply(mph, function(z) length(z$tip.label))) CORR <- TRUE vv <- list() vv[[1]] <- vcv(mph[[1]], corr=CORR) for (i in 2:length(mph)) { v <- vcv(mph[[i]], corr=CORR) v <- v[rownames(vv[[1]]), colnames(vv[[1]])] vv[[i]] <- v } vvv <- v for (i in 1:length(v)) { vvv[i] <- mean(sapply(vv, function(z) z[i])) } spp <- intersect(rownames(lhreg_data), rownames(vvv)) vvv <- vvv[spp,spp] cor_matrix <- as.matrix(nearPD(vvv, corr=TRUE)$mat) ``` The object `cor_matrix` is part of the **lhreg** package, relative phylogenies can be reconstructed from it: ```{r heatmap} library(ape) data(cor_matrix) data(lhreg_tree) str(cor_matrix) heatmap(cor_matrix) ``` ## Analysis Linear model were used to screen trait variables, so that we compared that *full* model with the intercept only *null* model, with or without phylogenetic correlation. ### Variable screening for SR ```{r screening-sr} library(lhreg) data(lhreg_data) data(cor_matrix) y <- lhreg_data$logphi m4 <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab4, lhreg_data) m3 <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab3, lhreg_data) m2 <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab2, lhreg_data) m4s <- step(m4, trace=0) m3s <- step(m3, trace=0) m2s <- step(m2, trace=0) AIC(m4,m3,m2,m4s,m3s,m2s) summary(m2s) mp <- update(m2s, .~.-Nesthm) amp <- anova(mp) round(structure(100 * amp[,"Sum Sq"] / sum(amp[,"Sum Sq"]), names=rownames(amp)), 1) mp0 <- lm(y ~ 1) summary(mp0) # null model for SR summary(mp) # full model for SR ## evaluating interactions mpx <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab2 + logmass:xMaxFreqkHz + Hab2:xMaxFreqkHz, lhreg_data) mpx2 <- step(mpx) summary(mpx2) ``` ### Variable screening for DD ```{r screening-dd} y <- lhreg_data$logtau m4 <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab4, lhreg_data) m3 <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab3, lhreg_data) m2 <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab2, lhreg_data) m4s <- step(m4, trace=0) m3s <- step(m3, trace=0) m2s <- step(m2, trace=0) AIC(m4,m3,m2,m4s,m3s,m2s) summary(m2s) mt <- update(m2s, .~.-Nesthm) amt <- anova(mt) round(structure(100 * amt[,"Sum Sq"] / sum(amt[,"Sum Sq"]), names=rownames(amt)), 1) mt0 <- lm(y ~ 1) summary(mp0) # null model for DD summary(mp) # full model for DD ## evaluating interactions mtx <- lm(y ~ logmass + Mig2 + Nesthm + xMaxFreqkHz + Hab2 + logmass:xMaxFreqkHz + Hab2:xMaxFreqkHz, lhreg_data) mtx2 <- step(mtx) summary(mtx2) ``` ### Mixed model selection The following models were compared, the prefix `t` and `p` indicates `tau` (detection distance) and `phi` (singing rate): 1. `M00` as null model $M_{00}$, 2. `Ml0` as phylogeny-only model $M_{\lambda0}$, 3. `M0b` as trait-only model $M_{0\beta}$, 4. `Mlb` as combined model $M_{\lambda\beta}$. ```{r models,eval=FALSE} met <- "DE" # can use "SANN", "Nelder-Mead" is quickest x <- lhreg_data vc <- cor_matrix ## model matrix definitions X0 <- matrix(1, nrow(x), 1) # null (for both) colnames(X0) <- "Intercept" Xt <- model.matrix(mt) # DD Xp <- model.matrix(mp) # SR ## fit models for tau tM00 <- lhreg(Y=x$logtau, X=X0, SE=x$logtauSE, V=vc, lambda=0, hessian=TRUE, method=met) tMl0 <- lhreg(Y=x$logtau, X=X0, SE=x$logtauSE, V=vc, lambda=NA, hessian=TRUE, method=met) tM0b <- lhreg(Y=x$logtau, X=Xt, SE=x$logtauSE, V=vc, lambda=0, hessian=TRUE, method=met) tMlb <- lhreg(Y=x$logtau, X=Xt, SE=x$logtauSE, V=vc, lambda=NA, hessian=TRUE, method=met) ## fit models for phi pM00 <- lhreg(Y=x$logphi, X=X0, SE=x$logphiSE, V=vc, lambda=0, hessian=TRUE, method=met) pMl0 <- lhreg(Y=x$logphi, X=X0, SE=x$logphiSE, V=vc, lambda=NA, hessian=TRUE, method=met) pM0b <- lhreg(Y=x$logphi, X=Xp, SE=x$logphiSE, V=vc, lambda=0, hessian=TRUE, method=met) pMlb <- lhreg(Y=x$logphi, X=Xp, SE=x$logphiSE, V=vc, lambda=NA, hessian=TRUE, method=met) ``` ### Profile likelihood for lambda Profile likelihood was calculated to understand how traits affect the strength of phylogeny through the variable `lambda`. It is possible to use `lambda` > 1 but it leads to extremely low likelihoods in our case. ```{r profile-lik,eval=FALSE} ## set up lambda values to evaluate at lam <- seq(0, 1, by=0.01) ## parallel computation is faster cl <- makeCluster(4) ## load package on workers tmp <- clusterEvalQ(cl, library(lhreg)) object <- tMl0 clusterExport(cl, "object") pl_tMl0 <- pbsapply(lam, function(z, ...) profile_lambda1(object, z, ...), cl=cl, method=met) object <- tMlb clusterExport(cl, "object") pl_tMlb <- pbsapply(lam, function(z, ...) profile_lambda1(object, z, ...), cl=cl, method=met) object <- pMl0 clusterExport(cl, "object") pl_pMl0 <- pbsapply(lam, function(z, ...) profile_lambda1(object, z, ...), cl=cl, method=met) object <- pMlb clusterExport(cl, "object") pl_pMlb <- pbsapply(lam, function(z, ...) profile_lambda1(object, z, ...), cl=cl, method=met) stopCluster(cl) # close cluster ``` ### Leave-one-out (LOO) analysis Leave one out cross-validation was used to see how well we could predict the values based on data from the other species, traits and phylogeny. Mean squared error (MSE) and variance components were calculated based on LOO cross validation. We also used LOO to calculate jackknife type non-parametric confidence intervals for the estimated parameters (not presented in the manuscript). ```{r loo,eval=FALSE} n <- nrow(x) # we will do n runs cl <- makeCluster(4) # parallel if you wish tmp <- clusterEvalQ(cl, library(lhreg)) # load package loo_tM00 <- t(pbsapply(1:n, loo1, object=tM00, cl=cl)) #loo_tM00 <- t(pbsapply(1:n, loo2, object=tM00, cl=cl, method=met)) loo_tMl0 <- t(pbsapply(1:n, loo2, object=tMl0, cl=cl, method=met)) loo_tM0b <- t(pbsapply(1:n, loo1, object=tM0b, cl=cl)) #loo_tM0b <- t(pbsapply(1:n, loo2, object=tM0b, cl=cl, method=met)) loo_tMlb <- t(pbsapply(1:n, loo2, object=tMlb, cl=cl, method=met)) loo_pM00 <- t(pbsapply(1:n, loo1, object=pM00, cl=cl)) #loo_pM00 <- t(pbsapply(1:n, loo2, object=pM00, cl=cl, method=met)) loo_pMl0 <- t(pbsapply(1:n, loo2, object=pMl0, cl=cl, method=met)) loo_pM0b <- t(pbsapply(1:n, loo1, object=pM0b, cl=cl)) #loo_pM0b <- t(pbsapply(1:n, loo2, object=pM0b, cl=cl, method=met)) loo_pMlb <- t(pbsapply(1:n, loo2, object=pMlb, cl=cl, method=met)) stopCluster(cl) ``` ### Parametric bootstrap We use the `simulate` method to simulate observations from a Multivariate Normal distribution according to the input object (without the observation error) to refit the model and returns simulated values and estimates. ```{r par-boot,eval=FALSE} nsim <- 999 ## simulated data is nice, thus quick optimization works metQ <- "Nelder-Mead" cl <- makeCluster(4) # parallel if you wish tmp <- clusterEvalQ(cl, library(lhreg)) # load package pb_tM00 <- parametric_bootstrap(tM00, nsim=nsim, method=metQ, cl=cl) pb_tMl0 <- parametric_bootstrap(tMl0, nsim=nsim, method=metQ, cl=cl) pb_tM0b <- parametric_bootstrap(tM0b, nsim=nsim, method=metQ, cl=cl) pb_tMlb <- parametric_bootstrap(tMlb, nsim=nsim, method=metQ, cl=cl) pb_pM00 <- parametric_bootstrap(pM00, nsim=nsim, method=metQ, cl=cl) pb_pMl0 <- parametric_bootstrap(pMl0, nsim=nsim, method=metQ, cl=cl) pb_pM0b <- parametric_bootstrap(pM0b, nsim=nsim, method=metQ, cl=cl) pb_pMlb <- parametric_bootstrap(pMlb, nsim=nsim, method=metQ, cl=cl) stopCluster(cl) ``` ### Prediction intervals We then calculate the prediction interval for an observation conditional on the other species and the known tree (this one and the other species included), and returns the bootstrap distribution of the prediction that can be used to calculate quantile based prediction intervals. ```{r pred-int,eval=FALSE} cl <- makeCluster(4) pit <- pred_int(tM0b, pb_tM0b, cl=cl) pip <- pred_int(pMlb, pb_pMlb, cl=cl) stopCluster(cl) ## save the results: save(list=c("cor_matrix", "lam", "lhreg_data", "met", "n", "vc", "x", "X0", "Xp", "Xt", "amp", "mp", "mp0", "mpx", "mpx2", "amt", "mt", "mt0", "mtx", "mtx2", "pM00", "pM0b", "pMl0", "pMlb", "tM00", "tM0b", "tMl0", "tMlb", "pl_pMl0", "pl_pMlb", "pl_tMl0", "pl_tMlb", "loo_pM00", "loo_pM0b", "loo_pMl0", "loo_pMlb", "loo_tM00", "loo_tM0b", "loo_tMl0", "loo_tMlb", "pb_pM00", "pb_pM0b", "pb_pMl0", "pb_pMlb", "pb_tM00", "pb_tM0b", "pb_tMl0", "pb_tMlb", "pit", "pip"), file="lhreg-results-DE2.rda") ``` ## Results Load results and set some values: ```{r load-results} library(lhreg) #load(system.file("extdata", "lhreg-results-DE.rda", package = "lhreg")) load(system.file("extdata", "lhreg-results-DE2.rda", package = "lhreg")) Level <- 0.95 Crit <- -0.5*qchisq(Level, 1) ltmp <- seq(0, 1, by=0.0001) ## red-yl-blue Col <- c("#2C7BB6", "#6BAACF", "#ABD9E9", "#D4ECD3", "#FFFFBF", "#FED690", "#FDAE61", "#EA633E", "#D7191C") Col1 <- Col[1] Col2 <- rgb(171/255, 217/255, 233/255, 0.5) # Col[3] Col3 <- Col[9] Col4 <- rgb(253/255, 174/255, 97/255, 0.5) # Col[7] prt <- exp(loo_tM0b[,1:2]) prp <- exp(loo_pMlb[,1:2]) PIt <- t(apply(exp(pit), 1, quantile, c((1-Level)/2, 1-(1-Level)/2))) PIp <- t(apply(exp(pip), 1, quantile, c((1-Level)/2, 1-(1-Level)/2))) library(plotrix) size_fun <- function(x, Min=0.2, Max=1) { x <- x - min(x) x <- x / max(x) x <- x * (Max-Min) + Min x } col_fun <- colorRampPalette(Col) # red-yl-blue size_col_fun <- function(x, col_fun, nbins=100, ...) { x <- size_fun(x, ...) q <- seq(min(x), max(x), by=diff(range(x))/nbins) i <- as.integer(cut(x, q, include.lowest = TRUE)) col_fun(nbins)[i] } ``` ### Table with estimates and MSE This table has estimates, confidence intervals, MSE, $\Delta$AIC. ```{r table-1} get_CI <- function(x, level=0.95) t(apply(x$estimates, 2, quantile, c((1-level)/2, 1-(1-level)/2))) sf <- function(z, loo, pb, type=c("wald", "jack", "boot"), level=0.95) { type <- match.arg(type) zz <- z$summary a <- c((1-level)/2, 1-(1-level)/2) dig <- 2 if (type == "wald") { pcut <- function(p) { factor(c("***", "**", "*", "+", "ns")[as.integer(cut(p, c(1, 0.1, 0.05, 0.01, 0.001, 0), include.lowest=TRUE, right=FALSE))], levels=c("***", "**", "*", "+", "ns")) } out <- structure(sapply(1:nrow(zz), function(i) paste0(round(zz[i,1], dig), " (SE +/- ", round(zz[i,2], dig), pcut(zz[i,4]), ")")), names=rownames(zz)) } if (type == "jack") { CI <- t(apply(rbind(zz[,1], loo[,3:ncol(loo),drop=FALSE]), 2, quantile, a)) out <- structure(sapply(1:nrow(zz), function(i) paste0(round(zz[i,1], dig), " (", round(CI[i,1], dig), ", ", round(CI[i,2], dig), ")")), names=rownames(zz)) } if (type == "boot") { CI <- get_CI(pb, level) out <- structure(sapply(1:nrow(zz), function(i) paste0(round(zz[i,1], dig), " (", round(CI[i,1], dig), ", ", round(CI[i,2], dig), ")")), names=rownames(zz)) } out } ## AIC tables aict <- AIC(tM00, tMl0, tM0b, tMlb) aict$dAIC <- aict$AIC-min(aict$AIC) aicp <- AIC(pM00, pMl0, pM0b, pMlb) aicp$dAIC <- aicp$AIC-min(aicp$AIC) cbind(aict, t(sapply(list(tM00, tMl0, tM0b, tMlb), function(z) z$summary[c("sigma","lambda"), 1]))) cbind(aicp, t(sapply(list(pM00, pMl0, pM0b, pMlb), function(z) z$summary[c("sigma","lambda"), 1]))) ## MSE SSEt <- c( tM00 = sum((loo_tM00[,"pred"] - tM00$Y)^2), tMl0 = sum((loo_tMl0[,"pred"] - tMl0$Y)^2), tM0b = sum((loo_tM0b[,"pred"] - tM0b$Y)^2), tMlb = sum((loo_tMlb[,"pred"] - tMlb$Y)^2)) SSEp <- c( pM00 = sum((loo_pM00[,"pred"] - pM00$Y)^2), pMl0 = sum((loo_pMl0[,"pred"] - pMl0$Y)^2), pM0b = sum((loo_pM0b[,"pred"] - pM0b$Y)^2), pMlb = sum((loo_pMlb[,"pred"] - pMlb$Y)^2)) MSEt <- SSEt / n MSEp <- SSEp / n Type <- "boot" # can be wald, jack, boot zzz <- list( M00=sf(pM00, loo_pM00, pb_pM00, Type, Level), Ml0=sf(pMl0, loo_pMl0, pb_pMl0, Type, Level), M0b=sf(pM0b, loo_pM0b, pb_pM0b, Type, Level), Mlb=sf(pMlb, loo_pMlb, pb_pMlb, Type, Level)) m <- matrix("", length(zzz[[4]]), 4) rownames(m) <- names(zzz[[4]]) colnames(m) <- c("M00", "Ml0", "M0b", "Mlb") for (i in 1:4) { j <- match(names(zzz[[i]]), rownames(m)) m[j,i] <- zzz[[i]] } m["lambda", c("M00", "M0b")] <- "0 (fixed)" #m[m==""] <- "n/a" m1 <- m zzz <- list( M00=sf(tM00, loo_tM00, pb_tM00, Type, Level), Ml0=sf(tMl0, loo_tMl0, pb_tMl0, Type, Level), M0b=sf(tM0b, loo_tM0b, pb_tM0b, Type, Level), Mlb=sf(tMlb, loo_tMlb, pb_tMlb, Type, Level)) m <- matrix("", length(zzz[[4]]), 4) rownames(m) <- names(zzz[[4]]) colnames(m) <- c("M00", "Ml0", "M0b", "Mlb") for (i in 1:4) { j <- match(names(zzz[[i]]), rownames(m)) m[j,i] <- zzz[[i]] } m["lambda", c("M00", "M0b")] <- "0 (fixed)" #m[m==""] <- "n/a" m2 <- m m1 <- rbind(m1, df=aicp$df, dAIC=round(aicp$dAIC, 3), XV_MSE=round(MSEp, 3)) m2 <- rbind(m2, df=aict$df, dAIC=round(aict$dAIC, 3), XV_MSE=round(MSEt, 3)) print.default(m1, quote=FALSE) # SR results print.default(m2, quote=FALSE) # DD results ``` Shared variation is calculated as: ```{r shared-var} Var_fun <- function(MSE) { Var <- 100*(MSE[1]-MSE[-1])/MSE[1] Var <- c(Var, Var[1]+Var[2]-Var[3]) names(Var) <- c("phylo", "traits", "both", "shared") Var } ## SR round(Var_fun(MSEp), 1) ## DD round(Var_fun(MSEt), 1) ``` ### Tree with SR, DD, and trait values Load the 1000 posterior trees, pick one and plot trait values alongside the tree. ```{r tree-trait,fig.height=10,fig.width=15} library(ape) load(system.file("extdata", "mph.rda", package = "lhreg")) tre <- mph[[1000]] # pick one tree ii <- match(tre$tip.label, rownames(lhreg_data)) d2 <- lhreg_data[ii,] NAMES <- paste0(d2$common_name, " (", d2$spp, ")") tre$tip.label <- NAMES xy <- data.frame( x=node.depth.edgelength(tre)[1:length(tre$tip.label)], y=node.height(tre)[1:length(tre$tip.label)]) phy_pts_size <- function(z, vari, ...) points(xy[,1] + z, xy[,2], pch=19, cex=size_fun(vari, Min=0.3, Max=1.2), ...) phy_pts_col <- function(z, vari, col=1, ...) points(xy[,1] + z, xy[,2], pch=19, cex=size_fun(vari, Min=0.3, Max=1.2), col=size_col_fun(vari, colorRampPalette(c("#000000", col)), Min=0.3), ...) phy_pts_2 <- function(z, vari, cex=0.6, col="#000000", ...) { points(xy[,1] + z, xy[,2], pch=19, col=c(col, "#FFFFFF")[as.integer(vari)], cex=cex, ...) points(xy[,1] + z, xy[,2], pch=21, col=col, cex=cex) } #pdf("Fig1.pdf", width=10, height=15) op <- par(mar=c(1,1,1,1)) plot(tre, cex=0.6, label.offset=22, font=1) segments(xy[,1], xy[,2], xy[,1]+21, xy[,2], lwd=1, col=1) phy_pts_size(2, exp(d2$logphi), col=Col1) phy_pts_size(5, exp(d2$logtau), col=Col3) phy_pts_2(10, d2$Mig2, col=1) phy_pts_size(13, d2$MaxFreqkHz, col="#4daf4a") phy_pts_size(16, sqrt(exp(d2$logmass)), col="#984ea3") phy_pts_2(19, d2$Hab2, col="#ff7f00") text(xy[1,1]+c(2,5,10,13,16,19), rep(142, 6), c("SR", "DD", "Migr", "Pitch", "Mass", "Habitat"), pos=4, srt=90, cex=0.65, offset=0) par(op) #dev.off() ``` ### Profile likelihood profiles for lambda ```{r fig-1,width=14,height=6} #pdf("FigPL.pdf", width=14, height=6) op <- par(mfrow=c(1,2)) Res1 <- pl_pMl0 Res2 <- pl_pMlb yv <- exp(Res1-max(Res1)) plot(lam, yv, type="n", lwd=1, ylim=exp(c(2*Crit, 0)), xlab=expression(lambda), ylab="Profile Likelihood Ratio") tmp1 <- splinefun(lam, yv)(ltmp) i1 <- tmp1 > exp(Crit) yv <- exp(Res2-max(Res2)) tmp2 <- splinefun(lam, yv)(ltmp) i2 <- tmp2 > exp(Crit) polygon(c(ltmp[i1], rev(ltmp[i1])), c(tmp1[i1], rep(-1, sum(i1))), col=Col2, border=NA) polygon(c(ltmp[i2], rev(ltmp[i2])), c(tmp2[i2], rep(-1, sum(i2))), col=Col4, border=NA) abline(h=exp(Crit), col="grey") lines(ltmp, tmp1, col=Col1, lwd=1) lines(ltmp[i1], tmp1[i1], col=Col1, lwd=3) lines(rep(ltmp[which.max(tmp1)], 2), c(1, -1), col=Col1) lines(ltmp, tmp2, col=Col3, lwd=1) lines(ltmp[i2], tmp2[i2], col=Col3, lwd=3) lines(rep(ltmp[which.max(tmp2)], 2), c(1, -1), col=Col3) box() legend(0.1, 1, bty="n", border=c(Col1, Col3), fill=c(Col2, Col4), pt.cex=2, pt.lwd=2, legend=c(expression(M[lambda*0]-SR), expression(M[lambda*beta]-SR))) round(c(Max_Ml0=ltmp[which.max(tmp1)], CI=range(ltmp[i1])), 3) round(c(Max_Mlx=ltmp[which.max(tmp2)], CI=range(ltmp[i2])), 3) Res1 <- pl_tMl0 Res2 <- pl_tMlb yv <- exp(Res1-max(Res1)) plot(lam, yv, type="n", lwd=1, ylim=exp(c(2*Crit, 0)), xlab=expression(lambda), ylab="Profile Likelihood Ratio") tmp1 <- splinefun(lam, yv)(ltmp) i1 <- tmp1 > exp(Crit) yv <- exp(Res2-max(Res2)) tmp2 <- splinefun(lam, yv)(ltmp) i2 <- tmp2 > exp(Crit) polygon(c(ltmp[i1], rev(ltmp[i1])), c(tmp1[i1], rep(-1, sum(i1))), col=Col2, border=NA) polygon(c(ltmp[i2], rev(ltmp[i2])), c(tmp2[i2], rep(-1, sum(i2))), col=Col4, border=NA) abline(h=exp(Crit), col="grey") lines(ltmp, tmp1, col=Col1, lwd=1) lines(ltmp[i1], tmp1[i1], col=Col1, lwd=3) lines(rep(ltmp[which.max(tmp1)], 2), c(1, -1), col=Col1) lines(ltmp, tmp2, col=Col3, lwd=1) lines(ltmp[i2], tmp2[i2], col=Col3, lwd=3) lines(rep(ltmp[which.max(tmp2)], 2), c(1, -1), col=Col3) box() legend(0.1, 1, bty="n", border=c(Col1, Col3), fill=c(Col2, Col4), pt.cex=2, pt.lwd=2, legend=c(expression(M[lambda*0]-DD), expression(M[lambda*beta]-DD))) round(c(Max_Ml0=ltmp[which.max(tmp1)], CI=range(ltmp[i1])), 3) round(c(Max_Mlx=ltmp[which.max(tmp2)], CI=range(ltmp[i2])), 3) par(op) #dev.off() ``` ### Bootstrap distributions of lambda ```{r fig-boot,width=7,height=7} #pdf("FigPB.pdf", width=10, height=10) op <- par(mfrow=c(2,2)) plot(density(pb_pMl0$estimates[,"lambda"]), xlim=c(0,1), main="SR Ml0", col=Col1) plot(density(pb_pMlb$estimates[,"lambda"]), xlim=c(0,1), main="SR Mlb", col=Col1) plot(density(pb_tMl0$estimates[,"lambda"]), xlim=c(0,1), main="DD Ml0", col=Col3) plot(density(pb_tMlb$estimates[,"lambda"]), xlim=c(0,1), main="DD Mlb", col=Col3) par(op) #dev.off() ``` ### LOO cross validation plots ```{r fig-2,width=13,height=7} #pdf("Fig2.pdf", width=12.75, height=7) op <- par(mfrow=c(1,2)) Max <- 0.7 D <- as.matrix(dist(prp)) diag(D) <- Inf D <- apply(D, 1, min) plot(prp, xaxs = "i", yaxs = "i", type="n", asp=1, ylim=c(0, Max), xlim=c(0, Max), xlab="Time-removal SR Estimate (/min)", ylab="LOO SR Estimate (/min)") abline(0,2,lty=2, col="grey") abline(0,1/2,lty=2, col="grey") abline(0,1.5,lty=2, col="grey") abline(0,1/1.5,lty=2, col="grey") abline(0,1,lty=1, col=1) r0 <- size_fun(x$MaxFreqkHz, 0.2*Max/50, Max/50) draw.ellipse(prp[,1], prp[,2], r0, r0, border=c(Col1,Col3)[as.integer(x$Mig2)]) segments(prp[,1], PIp[,1], prp[,1], PIp[,2], col=c(Col1,Col3)[as.integer(x$Mig2)], lwd=1) box() legend("topleft", pch=21, col=c(Col1,Col3), pt.cex=c(2,2), bty="n", legend=c("Migrant", "Resident")) r <- seq(0.2*Max/50, Max/50, len=5) draw.ellipse(rep(0.06, 5), 0.55+r-max(r), r, r, border=1) text(c(0.09, 0.06, 0.06), c(0.61, 0.52, 0.58), c("Song Pitch (kHz)",round(range(x$MaxFreqkHz),1)), cex=c(1,0.75,0.75)) text(0.9*Max, 0.05*Max, expression(M[lambda*beta]-SR)) #Ti <- D > 0.03 #Ti <- D > 0.03 | (prp[,1]/prp[,2] > 2 | prp[,1]/prp[,2] < 1/2) Ti <- D > 0.03 | (prp[,1]/prp[,2] > 3.2 | prp[,1]/prp[,2] < 1/3.2) text(prp[,1], prp[,2], ifelse(Ti, as.character(x$spp), NA), cex=0.6, pos=3, col=1) Max <- 100*2.1 D <- as.matrix(dist(prt)) diag(D) <- Inf D <- apply(D, 1, min) plot(100*prt, xaxs = "i", yaxs = "i", type="n", asp=1, ylim=c(0, Max), xlim=c(0, Max), xlab="Distance-sampling DD Estimate (m)", ylab="LOO DD Estimate (m)") abline(0,2,lty=2, col="grey") abline(0,1/2,lty=2, col="grey") abline(0,1.5,lty=2, col="grey") abline(0,1/1.5,lty=2, col="grey") abline(0,1,lty=1, col=1) r0 <- size_fun(x$logmass, 0.2*Max/50, Max/50) draw.ellipse(100*prt[,1], 100*prt[,2], r0, r0, border=c(Col1,Col3)[as.integer(x$Hab2)]) segments(100*prt[,1], 100*PIt[,1], 100*prt[,1], 100*PIt[,2], col=c(Col1,Col3)[as.integer(x$Hab2)], lwd=1) box() legend("topleft", pch=21, col=c(Col1,Col3), pt.cex=c(2,2), bty="n", legend=c("Habitat: Closed", "Habitat: Open")) r <- seq(0.2*Max/50, Max/50, len=5) S <- Max/0.7 draw.ellipse(S*rep(0.06, 5), S*0.55+r-max(r), r, r, border=1) text(S*c(0.09, 0.06, 0.06), S*c(0.61, 0.52, 0.58), c("Body Mass (g)",round(range(x$logmass),1)), cex=c(1,0.75,0.75)) text(0.9*Max, 0.05*Max, expression(M[0*beta]-DD)) Ti <- D > 0.09 | (prt[,1]/prt[,2] > 2 | prt[,1]/prt[,2] < 1/2) text(100*prt[,1], 100*prt[,2], ifelse(Ti, as.character(x$spp), NA), cex=0.6, pos=3, col=1) par(op) #dev.off() ``` The percent of species where prediction intervals overlapped the estimated values: ```{r percent-overlap} library(intrval) ## SR ## # species with overlapping PI sum(prp[,"obs"] %[]% PIp) ## # species with >150% or <66% of original estimates sum((prp[,"pred"]/prp[,"obs"]) %)(% c(3/2, 2/3)) ## # species with >200% or <50% of original estimates sum((prp[,"pred"]/prp[,"obs"]) %)(% c(2, 1/2)) ## DD ## % species with overlapping PI sum(prt[,"obs"] %[]% PIt) ## # species with >150% or <66% of original estimates sum((prt[,"pred"]/prt[,"obs"]) %)(% c(3/2, 2/3)) ## # species with >200% or <50% of original estimates sum((prt[,"pred"]/prt[,"obs"]) %)(% c(2, 1/2)) ``` ### Mapping continuous traits on the tree We need to hack the `phytools::contMap` function to produce non-rainbow colors. Then we produce trees for the continuous traits. ```{r tree-trait-2,fig.height=12,fig.width=12} library(phytools) contMap2 <- function (tree, x, res = 100, fsize = NULL, ftype = NULL, lwd = 4, legend = NULL, lims = NULL, outline = TRUE, sig = 3, type = "phylogram", direction = "rightwards", plot = TRUE, col_fun=rainbow, ...) { if (!inherits(tree, "phylo")) stop("tree should be an object of class \"phylo\".") if (hasArg(mar)) mar <- list(...)$mar else mar <- rep(0.3, 4) if (hasArg(offset)) offset <- list(...)$offset else offset <- NULL if (hasArg(method)) method <- list(...)$method else method <- "fastAnc" if (hasArg(hold)) hold <- list(...)$hold else hold <- TRUE if (hasArg(leg.txt)) leg.txt <- list(...)$leg.txt else leg.txt <- "trait value" h <- max(nodeHeights(tree)) steps <- 0:res/res * max(h) H <- nodeHeights(tree) if (method == "fastAnc") a <- fastAnc(tree, x) else if (method == "anc.ML") { fit <- anc.ML(tree, x) a <- fit$ace if (!is.null(fit$missing.x)) x <- c(x, fit$missing.x) } else if (method == "user") { if (hasArg(anc.states)) anc.states <- list(...)$anc.states else { cat("No ancestral states have been provided. Using states estimated with fastAnc.\n\n") a <- fastAnc(tree, x) } if (length(anc.states) < tree$Nnode) { nodes <- as.numeric(names(anc.states)) tt <- tree for (i in 1:length(nodes)) { M <- matchNodes(tt, tree, method = "distances") ii <- M[which(M[, 2] == nodes[i]), 1] tt <- bind.tip(tt, nodes[i], edge.length = 0, where = ii) } a <- fastAnc(tt, c(x, anc.states)) M <- matchNodes(tree, tt, method = "distances") a <- a[as.character(M[, 2])] names(a) <- M[, 1] } else { if (is.null(names(anc.states))) names(anc.states) <- 1:tree$Nnode + Ntip(tree) a <- anc.states[as.character(1:tree$Nnode + Ntip(tree))] } } y <- c(a, x[tree$tip.label]) names(y)[1:Ntip(tree) + tree$Nnode] <- 1:Ntip(tree) A <- matrix(y[as.character(tree$edge)], nrow(tree$edge), ncol(tree$edge)) cols <- col_fun(1001) names(cols) <- 0:1000 if (is.null(lims)) lims <- c(min(y), max(y)) trans <- 0:1000/1000 * (lims[2] - lims[1]) + lims[1] names(trans) <- 0:1000 tree$maps <- vector(mode = "list", length = nrow(tree$edge)) for (i in 1:nrow(tree$edge)) { XX <- cbind(c(H[i, 1], steps[intersect(which(steps > H[i, 1]), which(steps < H[i, 2]))]), c(steps[intersect(which(steps > H[i, 1]), which(steps < H[i, 2]))], H[i, 2])) - H[i, 1] YY <- rowMeans(XX) if (!all(YY == 0)) { b <- vector() for (j in 1:length(YY)) b[j] <- (A[i, 1]/YY[j] + A[i, 2]/(max(XX) - YY[j]))/(1/YY[j] + 1/(max(XX) - YY[j])) } else b <- A[i, 1] d <- sapply(b, phytools:::getState, trans = trans) tree$maps[[i]] <- XX[, 2] - XX[, 1] names(tree$maps[[i]]) <- d } tree$mapped.edge <- phytools:::makeMappedEdge(tree$edge, tree$maps) tree$mapped.edge <- tree$mapped.edge[, order(as.numeric(colnames(tree$mapped.edge)))] class(tree) <- c("simmap", setdiff(class(tree), "simmap")) xx <- list(tree = tree, cols = cols, lims = lims) class(xx) <- "contMap" if (plot) phytools:::plot.contMap(xx, fsize = fsize, ftype = ftype, lwd = lwd, legend = legend, outline = outline, sig = sig, type = type, mar = mar, direction = direction, offset = offset, hold = hold, leg.txt = leg.txt) invisible(xx) } ## log singing rates contMap2(tre, structure(d2$logphi, .Names=NAMES), fsize=0.5, outline=FALSE, col_fun=col_fun) ## log detection distances contMap2(tre, structure(d2$logtau, .Names=NAMES), fsize=0.5, outline=FALSE, col_fun=col_fun) ## log body mass contMap2(tre, structure(d2$logmass, .Names=NAMES), fsize=0.5, outline=FALSE, col_fun=col_fun) ## pitch contMap2(tre, structure(d2$MaxFreqkHz, .Names=NAMES), fsize=0.5, outline=FALSE, col_fun=col_fun) ## migratory status contMap2(tre, structure(d2$Mig2, .Names=NAMES), fsize=0.5, outline=FALSE, col_fun=col_fun) ## habitat associations contMap2(tre, structure(d2$Hab2, .Names=NAMES), fsize=0.5, outline=FALSE, col_fun=col_fun) ```