This article provides code for common tasks, such as how to take the ‘long format’ data through the ABMI public data API and turn it into a more conventional ‘wide format’, i.e. samples x species. This is a common pre-requisite of data analysis.
Prerequisites:
library(abmidata)
library(mefa4)
## directory to save the results to
ROOT <- ""
## current local date
DATE <- as.Date(Sys.time(), tz=Sys.timezone(location = TRUE))
## ABMI site metadata
gis <- read.csv("https://raw.githubusercontent.com/psolymos/abmianalytics/master/lookup/sitemetadata.csv")
## turn characters into factors
char2fact <- function(x) {
for (i in 1:ncol(x))
if (is.character(x[,i]))
x[,i] <- as.factor(x[,i])
x
}
This involves vascular plants, lichens, mosses, and soil mites.
## common variables
TAXA <- c("vplants", "mites", "mosses", "lichens")
cn1 <- c("ABMISite", "Year", "subunit", "site_year", "site_year_sub", "offgrid", "nearest")
cn2 <- c("SpeciesID", "CommonName", "ScientificName", "TaxonomicResolution",
"UniqueTaxonomicIdentificationNumber")
## function to create unique IDs
add_labels <- function(res, sub_col) {
res$offgrid <- startsWith(as.character(res$ABMISite), "OG")
res$subunit <- res[[sub_col]]
res[[sub_col]] <- NULL
res$site_year <- interaction(res$ABMISite,
res$Year, drop=TRUE, sep="_")
res$site_year_sub <- interaction(res$ABMISite,
res$Year,
res$subunit, drop=TRUE, sep="_")
tmp <- strsplit(as.character(res$ABMISite), "-")
res$nearest <- sapply(tmp, function(z) {
zz <- if (length(z) > 1) z[3] else z[1]
as.integer(gsub("\\D+", "", zz))
})
res
}
## function to normalize species names
normalize_species <- function(res, spgen_only=TRUE) {
if (spgen_only) {
levels(res$ScientificName) <- gsub("X ", "", levels(res$ScientificName))
levels(res$ScientificName) <- gsub(" x ", " ", levels(res$ScientificName))
levels(res$ScientificName) <- sapply(strsplit(levels(res$ScientificName), " "),
function(z) {
paste(z[1:min(2, length(z))], collapse=" ")
})
levels(res$TaxonomicResolution)[levels(res$TaxonomicResolution) %in%
c("Subspecies", "Variety")] <- "Species"
}
res$SpeciesID <- res$ScientificName
levels(res$SpeciesID) <- nameAlnum(levels(res$SpeciesID),
capitalize="mixed", collapse="")
res$SpeciesID <- droplevels(res$SpeciesID)
res
}
## loop through TAXA
for (taxon in TAXA) {
cat("taxon:", taxon, "\n - pulling and normalizing data")
flush.console()
## vascular plants -------------------------
if (taxon == "vplants") {
tab <- "T15"
sub_col <- "Quadrant"
allowed_subunits <- c("NE", "NW", "SE", "SW")
allowed_resolution <- c("Genus", "Species")
sub_max <- 4
res <- char2fact(ad_get_table(tab))
res0 <- res
save_list <- "res0"
colnames(res) <- gsub(" ", "", colnames(res))
res <- add_labels(res, sub_col=sub_col)
res <- normalize_species(res)
}
## mosses -------------------------
if (taxon == "mosses") {
tab1 <- "T19A" # T19A Moss Identification (2003-2008)
tab2 <- "T19B" # T19B Moss Identification (since 2009)
sub_col <- "Quadrant"
allowed_subunits <- c("NE", "NW", "SE", "SW", "1ha")
allowed_resolution <- c("Genus", "Species")
sub_max <- 4
res1 <- char2fact(ad_get_table(tab1))
res2 <- char2fact(ad_get_table(tab2))
res01 <- res1
res02 <- res2
save_list <- c("res01", "res02")
colnames(res1) <- gsub(" ", "", colnames(res1))
res1[[sub_col]] <- as.factor("1ha")
res1 <- add_labels(res1, sub_col=sub_col)
res1 <- normalize_species(res1)
colnames(res2) <- gsub(" ", "", colnames(res2))
res2 <- add_labels(res2, sub_col=sub_col)
res2 <- normalize_species(res2)
tmp <- intersect(colnames(res1), colnames(res2))
res <- rbind(res1[,tmp], res2[,tmp])
}
## lichens -------------------------
if (taxon == "lichens") {
tab1 <- "T20A" # T20A Lichen Identification (2003-2008)
tab2 <- "T20B" # T20B Lichen Identification (since 2009)
sub_col <- "Quadrant"
allowed_subunits <- c("NE", "NW", "SE", "SW", "1ha")
allowed_resolution <- c("Genus", "Species")
sub_max <- 4
res1 <- char2fact(ad_get_table(tab1))
res2 <- char2fact(ad_get_table(tab2))
res01 <- res1
res02 <- res2
save_list <- c("res01", "res02")
colnames(res1) <- gsub(" ", "", colnames(res1))
res1[[sub_col]] <- as.factor("1ha")
res1 <- add_labels(res1, sub_col=sub_col)
res1 <- normalize_species(res1)
colnames(res2) <- gsub(" ", "", colnames(res2))
res2 <- add_labels(res2, sub_col=sub_col)
res2 <- normalize_species(res2)
tmp <- intersect(colnames(res1), colnames(res2))
res <- rbind(res1[,tmp], res2[,tmp])
}
## mites -------------------------
if (taxon == "mites") {
tab <- "T24A"
sub_col <- "Quadrant"
allowed_subunits <- c("NE", "NW", "SE", "SW")
allowed_resolution <- c("Genus", "Species", "Subspecies")
sub_max <- 4
res <- char2fact(ad_get_table(tab))
res0 <- res
save_list <- "res0"
colnames(res) <- gsub(" ", "", colnames(res))
res <- add_labels(res, sub_col=sub_col)
res <- normalize_species(res, spgen_only=FALSE) # keep spp names as is
res$CommonName <- NA
}
cat(" --- OK\n - processing attributes and x-tabs")
flush.console()
## sample attributes
x_site_year <- nonDuplicated(res, res$site_year, TRUE)[,cn1]
x_site_year$subunit <- x_site_year$site_year_sub <- NULL
rownames(x_site_year) <- x_site_year$site_year
x_site_year_sub <- nonDuplicated(res, res$site_year_sub, TRUE)[,cn1]
rownames(x_site_year_sub) <- x_site_year_sub$site_year_sub
## species attributes
z <- nonDuplicated(res, res$SpeciesID, TRUE)[,c(cn2)]
rownames(z) <- z$SpeciesID
## sample-species cross tabs
y_site_year_sub <- Xtab(~ site_year_sub + SpeciesID, res,
cdrop=c("NONE","SNI", "VNA", "DNC", "PNA"))
y_site_year_sub01 <- y_site_year_sub
y_site_year_sub01[y_site_year_sub01 > 0] <- 1
if (taxon %in% c("vplants", "mosses", "lichens"))
y_site_year_sub <- y_site_year_sub01
## mefa bundles for sample/subunits
m_site_year_sub <- Mefa(y_site_year_sub, x_site_year_sub, z)
m_site_year_sub <- m_site_year_sub[,taxa(m_site_year_sub)$TaxonomicResolution %in%
allowed_resolution]
m_site_year_sub01 <- Mefa(y_site_year_sub01, x_site_year_sub, z)
m_site_year_sub01 <- m_site_year_sub01[,
taxa(m_site_year_sub)$TaxonomicResolution %in% allowed_resolution]
## aggregated cross tabs for binomial tables
tmp <- m_site_year_sub01[samp(m_site_year_sub01)$subunit %in% allowed_subunits]
nn <- sum_by(rep(1, nrow(tmp)), droplevels(samp(tmp)$site_year))
y_site_year <- groupSums(xtab(tmp), 1, droplevels(samp(tmp)$site_year))
stopifnot(max(y_site_year) == sub_max)
## aggregated mefa bundles for samples
m_site_year <- Mefa(y_site_year, x_site_year, z)
samp(m_site_year)$nQuadrant <- nn[match(rownames(m_site_year), rownames(nn)),"by"]
## csv view
out_site_year <- data.frame(samp(m_site_year), as.matrix(xtab(m_site_year)))
out_site_year_sub <- data.frame(samp(m_site_year_sub),
as.matrix(xtab(m_site_year_sub)))
out_species <- taxa(m_site_year)
cat(" --- OK\n - saving files")
flush.console()
## save raw input
save(list=save_list, file=file.path(ROOT, "data", "raw", "species",
paste0(taxon, "_", DATE, ".RData")))
## save bundles
save(m_site_year, m_site_year_sub, file=file.path(ROOT, "data", "inter", "species",
paste0(taxon, "_", DATE, ".RData")))
## write csv & binary
write.csv(out_site_year, row.names=FALSE,
file=file.path(ROOT,
paste0(taxon, "_SiteBinom_", DATE, ".csv")))
write.csv(out_site_year_sub, row.names=FALSE,
file=file.path(ROOT,
paste0(taxon, "_Quadrant_", DATE, ".csv")))
write.csv(out_species, row.names=FALSE,
file=file.path(ROOT,
paste0(taxon, "_Species_", DATE, ".csv")))
save(out_site_year, out_site_year_sub, out_species,
file=file.path(ROOT,
paste0(taxon, "_out_", DATE, ".RData")))
## vascular plants common/dominant
if (taxon == "vplants") {
res$comdom <- res$Present
levels(res$comdom)[!(levels(res$comdom) %in%
c("Dominant", "Common"))] <- "Uncommon"
table(res$comdom)
xt0 <- Xtab(~ site_year_sub + SpeciesID + comdom, res,
cdrop=c("NONE","SNI", "VNA", "DNC", "PNA"))
xt1 <- xt0$Uncommon
xt1[xt1>0] <- 1
xt10 <- xt0$Common * 10
xt10[xt10>0] <- 10
xt100 <- xt0$Dominant * 100
xt100[xt100>0] <- 100
y_comdom <- xt1
y_comdom[xt10>0] <- xt10[xt10>0]
y_comdom[xt100>0] <- xt100[xt100>0]
save(y_comdom, file=file.path(ROOT,
paste0(taxon, "_comdom_", DATE, ".RData")))
}
cat(" --- OK\n")
flush.console()
}
Habitat elements are also measured at the site centre, but there are many different protocols and tables where the results are coming from.
HEtabs <- list(
"T11" = list(
name="Canopy cover",
slug="canopy-cover",
sub="Sub-ordinaltransect"),
"T12B" = list(
name="Cover layers",
slug="cover-layers",
sub="Quadrant"),
"T08" = list(
name="CWD",
slug="cwd",
sub="Transect"),
"T01D" = list(
name="Ground cover",
slug="ground-cover",
sub=NULL),
"T25" = list(
name="Mineral soil",
slug="mineral-soil",
sub="Quadrant"),
"T02A" = list(
name="Surface substrate",
slug="surface-substrate",
sub=NULL),
"T09" = list(
name="Trees snags",
slug="trees-snags",
sub="Quadrant"))
HabElem <- list()
for (i in seq_along(HEtabs)) {
cat("* Pulling ", HEtabs[[i]]$name, "...")
res <- char2fact(ad_get_table(names(HEtabs)[i]))
colnames(res) <- gsub(" ", "", colnames(res))
if (is.null(HEtabs[[i]]$sub)) {
subcol <- "sub_col"
res$sub_col <- rep("DNC", nrow(res))
} else {
subcol <- HEtabs[[i]]$sub
}
res <- add_labels(res, sub_col=subcol)
HabElem[[HEtabs[[i]]$slug]] <- res
write.csv(res, row.names=FALSE,
file=file.path(ROOT,
paste0("habitatelements_", HEtabs[[i]]$slug, "_", DATE, ".csv")))
cat(" OK\n")
}
save(HabElem,
file=file.path(ROOT,
paste0("habitatelements_out_", DATE, ".RData")))
The ABMI public data API can be used to access bird point count data
collected using the RiverForks (RF
) and SongMeter ARU
(SM
) protocol. For latest data, see WildTrax.
taxon <- "birds"
resRF0 <- char2fact(ad_get_table("T26A"))
resSM0 <- char2fact(ad_get_table("T26B"))
for (i in c("Precipitation", "Wind Conditions",
"Industrial Noise", "Backgroud Noise")) {
resSM0[[i]] <- as.numeric(as.character(resSM0[[i]]))
}
resSM0$`ABMI Site` <- as.character(resSM0$`ABMI Site`)
resRF <- resRF0
resSM <- resSM0
table(resRF$Year)
table(resSM$Year)
save(resRF0, resSM0,
file=file.path(ROOT, paste0(taxon, "_", DATE, ".RData")))
## River Forks, 3 intervals ------------------------------
colnames(resRF) <- gsub(" ", "", colnames(resRF))
resRF <- add_labels(resRF, sub_col="PointCountStation")
#resRF <- normalize_species(resRF)
resRF$SpeciesID <- resRF$CommonName
levels(resRF$SpeciesID) <- nameAlnum(levels(resRF$SpeciesID), capitalize="mixed", collapse="")
resRF$SpeciesID <- droplevels(resRF$SpeciesID)
resRF$TBB_TIME_1ST_DETECTED <- resRF[["TimeFirstDetected(seconds)"]]
resRF$TBB_INTERVAL_1 <- resRF[["Interval1(0-200seconds)"]]
resRF$TBB_INTERVAL_2 <- resRF[["Interval2(201-400seconds)"]]
resRF$TBB_INTERVAL_3 <- resRF[["Interval3(401-600seconds)"]]
resRF$TBB_TIME_1ST_DETECTED <- as.character(resRF$TBB_TIME_1ST_DETECTED)
table(resRF$TBB_TIME_1ST_DETECTED,resRF$Year)
resRF$TBB_TIME_1ST_DETECTED[resRF$TBB_TIME_1ST_DETECTED %in%
c("DNC", "NONE", "VNA")] <- NA
resRF$TBB_TIME_1ST_DETECTED <- as.numeric(as.character(resRF$TBB_TIME_1ST_DETECTED))
resRF$period1st <- as.numeric(cut(resRF$TBB_TIME_1ST_DETECTED, c(-1, 200, 400, 600)))
resRF$keep <-
resRF$TBB_INTERVAL_1 %in% c("0","1") &
resRF$TBB_INTERVAL_2 %in% c("0","1") &
resRF$TBB_INTERVAL_3 %in% c("0","1")
#resRF <- resRF[resRF$TBB_INTERVAL_1 %in% c("0","1"),]
#resRF <- resRF[resRF$TBB_INTERVAL_2 %in% c("0","1"),]
#resRF <- resRF[resRF$TBB_INTERVAL_3 %in% c("0","1"),]
resRF$TBB_INTERVAL_1 <- as.integer(resRF$TBB_INTERVAL_1) - 1L
resRF$TBB_INTERVAL_2 <- as.integer(resRF$TBB_INTERVAL_2) - 1L
resRF$TBB_INTERVAL_3 <- as.integer(resRF$TBB_INTERVAL_3) - 1L
tmp <- col(matrix(0,sum(resRF$keep),3)) *
resRF[resRF$keep,c("TBB_INTERVAL_1","TBB_INTERVAL_2","TBB_INTERVAL_3")]
tmp[tmp==0] <- NA
tmp <- cbind(999,tmp)
resRF$period123 <- 999
resRF$period123[resRF$keep] <- apply(tmp, 1, min, na.rm=TRUE)
resRF$period123[resRF$period123 > 3] <- 999
with(resRF, table(period1st, period123))
resRF$period1 <- pmin(resRF$period1st, resRF$period123)
with(resRF, table(period1st, period1))
with(resRF, table(period123, period1))
## SM units --------------------------------------------
colnames(resSM) <- gsub(" ", "", colnames(resSM))
#cn <- c("ABMISite", "Year", "Quadrant", "Method",
# "Interval1(1minute)", "Interval2(1minute)", "Interval3(1minute)",
# "CommonName", "RecordingDate", "RecordingTime")
#resSM <- resSM[,cn]
tmp <- paste(resSM$RecordingDate, resSM$RecordingTime)
resSM$Start <- strptime(tmp, "%d-%b-%y %H:%M:%S")
resSM$Duration <- NA
resSM$Duration[resSM$Method %in% c("11", "14")] <- 3
resSM$Duration[resSM$Method %in% c("12", "13")] <- 1
#resSM <- add_labels(resSM, sub_col="Quadrant")
resSM$site_year_sub <- interaction(resSM$ABMISite, resSM$Year, resSM$Quadrant, drop=TRUE, sep="_")
table(is.na(resSM$ABMISite))
#resSM <- normalize_species(resSM)
resSM$SpeciesID <- resSM$CommonName
levels(resSM$SpeciesID) <- nameAlnum(levels(resSM$SpeciesID), capitalize="mixed", collapse="")
resSM$SpeciesID <- droplevels(resSM$SpeciesID)
## first detection interval
resSM$int1 <- ifelse(resSM$`Interval1(1minute)` == "VNA", NA, as.integer(resSM$`Interval1(1minute)`))
resSM$int2 <- ifelse(resSM$`Interval2(1minute)` == "VNA", NA, as.integer(resSM$`Interval2(1minute)`))
resSM$int3 <- ifelse(resSM$`Interval3(1minute)` == "VNA", NA, as.integer(resSM$`Interval3(1minute)`))
tmp <- col(resSM[,c("int1", "int2", "int3")])
tmp[is.na(resSM[,c("int1", "int2", "int3")])] <- Inf
tmp2 <- find_min(tmp)
tmp2$value[is.infinite(tmp2$value)] <- NA
resSM$res1 <- tmp2$value
resSM$ToY <- resSM$Start$yday
resSM$ToYc <- as.integer(cut(resSM$ToY, c(0, 105, 120, 140, 150, 160, 170, 180, 365)))
resSM$visit <- interaction(resSM$site_year_sub, resSM$ToYc, drop=TRUE)
resSM$ToD <- resSM$Start$hour + resSM$Start$min / 60
resSM$ToDx <- round(resSM$ToD, 0)
resSM$ToDc <- as.factor(ifelse(resSM$ToDx == 0, "Midnight", "Morning"))
## crosstabs
cd <- c("NONE","SNI", "VNA", "DNC", "PNA")
xt_10 <- Xtab(~ site_year_sub + SpeciesID,
resRF[resRF$period123 <= 3,], cdrop=cd)
xt_pts <- Xtab(~ site_year_sub + SpeciesID + period123, resRF, cdrop=cd)[1:3]
xt_pts[[1]] <- as.matrix(xt_pts[[1]])
xt_pts[[2]] <- as.matrix(xt_pts[[2]])
xt_pts[[3]] <- as.matrix(xt_pts[[3]])
x_pts <- nonDuplicated(resRF, site_year_sub, TRUE)
x_pts <- x_pts[rownames(xt_pts[[1]]),]
xt_stn <- as.matrix(Xtab(~ site_year_sub + SpeciesID, resSM, cdrop=cd))
x_stn <- nonDuplicated(resSM, site_year_sub, TRUE)
x_stn <- x_stn[rownames(xt_stn),]
keep <- !is.na(resSM$visit)
xt_vis <- as.matrix(Xtab(~ visit + SpeciesID, resSM[keep,], cdrop=cd))
x_vis <- nonDuplicated(resSM[keep,], visit, TRUE)
x_vis <- x_vis[rownames(xt_vis),]
save(xt_pts, xt_stn, xt_vis, x_pts, x_stn, x_vis, xt_10, resSM, resRF,
file=file.path(ROOT, "birds-crosstabs.RData"))