library(readr) # read csv objects
library(dplyr) # manipulate tables
library(tidyr) # data tidying
library(ggplot2) # plot figures
library(scales) # change axis values
library(countrycode) # download country information
Supplementary material - Figures and Tables
0010_Supplementary_analysis.qmd
In this document we provide the code to reproduce the Figures and Tables presented in supplementary material of the manuscript “The macroecology of knowledge: Spatio-temporal patterns of name-bearing types in biodiversity science”
Native species richness
Native richness with Catalog of Fishes
We calculate the native species richness for each country from data in the Catalog of Fishes. We used this source to avoid taxonomic mismatches between species names.
# Data from 01_C_data_preparation.qmd
<- readr::read_csv(file = here::here("data","processed", "df_country_native.csv")) df_country_native
<-
countries ::ne_countries(scale = "medium",
rnaturalearthreturnclass = "sf") |>
::filter(region_wb != "Antarctica")|>
dplyr::ms_filter_islands(min_area = 20000000000) |>
rmapshaper::ms_simplify(keep = 0.95)
rmapshaper
<-
sf_countries ::st_as_sf(countries) |>
sf::filter(admin != "Antarctica") |>
dplyr::select(iso_a3)
dplyr
<-
df_country_native_sf |>
sf_countries ::full_join(df_country_native,
dplyrby = c(iso_a3 = "country_distribution"))
Comparing richness from the Catalog of Fishes and Fishbase
Here we compared the richness obtained from the Catalog of Fishes and the Fishbase.
<- readr::read_csv(file = here::here("data","processed", "fishbase_species_country.csv"))
df_country_native_fishbase
<-
df_country_native_fishbase2 |>
df_country_native_fishbase ::rename(iso3c.fishbase = iso3c)
dplyr
<-
df_richness_all |>
df_country_native ::left_join(df_country_native_fishbase2, by = c(country_distribution = "iso3c.fishbase")) |>
dplyr::drop_na()
tidyr
cor(df_richness_all$native.richness, df_richness_all$n)
[1] 0.898011
Figure S1 - Native richness
Native richness was extracted from the Catalog of Fishes
|>
df_country_native_sf ggplot()+
geom_sf(aes(fill = native.richness))+
scale_fill_distiller(palette = "YlGnBu",
direction = 1,
na.value = "grey90",
breaks = c(1, 1000, 2000,3000, 3854))+
labs(
fill = "Native richness"
+
)guides(fill = guide_colorbar(barwidth = 20))+
theme_void()+
theme(
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "white",
color = NA)
+
)coord_sf(
crs = "+proj=moll +x_50=0 +y_0=0 +lat_0=0 +lon_0=0"
)
ggsave(here::here("output", "figures",
"Supp-material", "FigS1_native_richness.png"),
width = 7, height = 5, dpi = 600)
All time Domestic Contribution (DC) and Domestic Retention (DR)
Here we provided all time values of DR and DC for each region. We used the same data from Figure 2 of the main text, but pulling together all the data
# Data from 01_C_data_preparation.qmd
<- readr::read_csv(file = here::here("data", "processed", "flow_region_prop.csv")) flow_region_prop
Figure S2 - Scatterplot of all-time
|>
flow_region_prop ggplot(aes(x = prop_DC, y = prop_DR, fill = region_type))+
geom_hline(yintercept = 0.5, linetype = "dashed")+
geom_vline(xintercept = 0.5, linetype = "dashed")+
geom_point(
shape = 21,
size = 2.5
+
)scale_fill_manual(
values = c(
"Europe & Central Asia" = "#E64B35FF",
"East Asia & Pacific" = "#4DBBD5FF",
"North America" = "#3C5488FF",
"South Asia" = "#B09C85FF",
"Latin America & Caribbean" = "#00A087FF",
"Sub-Saharan Africa" = "#F39B7FFF",
"Middle East & North Africa" = "#8491B4FF"
)+
)scale_x_continuous(
labels = scales::label_percent(),
expand = expansion(mult = c(0.05, 0))
+
)scale_y_continuous(
labels = scales::label_percent(),
expand = expansion(mult = c(0.05, 0))
+
)labs(
x = "Domestic Contribution (DC)",
y = "Domestic Retention (DR)"
+
)theme_classic()+
theme(
strip.background = element_rect(fill = NA, color = NA),
strip.text = element_text(face = "bold"),
legend.position = "none",
plot.background = element_blank(),
panel.spacing = unit(5, "pt"),
panel.spacing.x = unit(15, "pt"),
plot.margin = margin(5,15,5,5,"pt"),
axis.line = element_line(lineend = "round"),
axis.text = element_text(color = "black"),
axis.ticks = element_line(color = "black")
+
)coord_cartesian(xlim = c(0,1),
ylim = c(0,1),
clip = "off")
# saving image
ggsave(filename = here::here("output", "figures", "Supp-material", "FigS2_scatterplot.png"),
width = 3.5, height = 3)
Calculating turnover metrics with full native dataset
Reading packages
# data
library(readr) # reading CSV files
library(here) # constructing file paths
library(dplyr) # data manipulation
library(tidyr) # data tidying
library(phyloregion) # handling phylogenetic data and transformations
Importing and processing data
Importing and processing native composition and NBT composition data
# From 01_C_data_preparation.qmd
<- readr::read_csv(here::here("data", "raw", "spp_native_distribution.csv"))
spp_native_distribution
# From 01_C_data_preparation.qmd
<- readr::read_csv(here::here("data", "raw", "spp_type_distribution.csv")) spp_type_distribution
Checking the specimens with data in both tables (native distribution and types). We transformed the long format species occurrence data frame to dense format. During this procedure we removed 1204 species that do not have information on native distribution (or we couldn’t get this information from CAS)
<-
df_native_grid |>
spp_native_distribution ::select(grids = country_distribution,
dplyrspecies = species) |>
::drop_na(grids)
tidyr
<-
df_type_grid |>
spp_type_distribution ::select(grids = country_museum,
dplyrspecies = species) |>
::drop_na(grids) |>
tidyr::mutate(grids = paste(grids, "type", sep = "_"))
dplyr
# joining data frames
<- rbind(df_native_grid, df_type_grid) # joining both matrices -
df_all_grid #native and types composition
#### Just descriptive quantities
<- unique(df_native_grid$grids)
country_native <- gsub(pattern = "_type",
country_type replacement = "",
unique(df_type_grid$grids))
<- setdiff(country_native, country_type) # countries with no type specimen
country_type_zero
# transforming into a sparse matrix to speed up calculations
<- df_all_grid |>
sparse_all ::long2sparse(grids = "grids", species = "species") |>
phyloregion::sparse2dense()
phyloregion
# Transforming in presence absence matrix
<- ifelse(sparse_all >= 1, 1, 0)
sparse_all_pa
# Binding countries with no types - adding zeroes
<- paste(country_type_zero, "_type", sep = "") # this will be used to bind together matrix with types and add the countries with no type
country_type_zero_names <- matrix(0,
matrix_type_zero nrow = length(country_type_zero_names),
ncol = ncol(sparse_all_pa),
dimnames = list(country_type_zero_names,
colnames(sparse_all_pa)))
<- rbind(sparse_all_pa, matrix_type_zero) sparse_all_pa2
Calculating directional turnover based on native and primary type comparison
Here we calculated the turnover in two directions. One is the turnover of native composition, in other words, the underrepresentation of native fish species in museums and natural collections within the country. Values closer to one indicate that the country present a huge underepresentation of its native fish fauna in primary types located within the country.
The other metric is primary type turnover. Values approaching one indicate that there is an overepresentation of primary types maintained in the country when compared to the native fish fauna of that country.
source(here::here("R", "functions", "function_beta_types_success_fail.R"))
<- unique(df_native_grid$grids) # country names
names_countries
<- beta_types(presab = sparse_all_pa2,
df_all_beta names.countries = names_countries) # calculating metrics of directional turnover
::write_csv(df_all_beta, here::here("data", "processed", "df_all_beta.csv")) readr
Plotting native and NBT turnover for each country
In this section we present the cartogram of world map with values of native and NBT turnover using the full dataset of native species (Figure S4)
Loading packages
#data
library(dplyr)
library(tidyr)
#plot
library(ggplot2)
library(patchwork)
library(cowplot)
#map
library(rnaturalearth)
library(rmapshaper)
library(sf)
library(biscale)
Data
# Data from 02_D_beta-countries.qmd
<- readr::read_csv(here::here("data", "processed", "df_all_beta.csv")) df_all_beta
Joining metric information with geographical data
<- rnaturalearth::ne_countries(returnclass = "sf")
countries
<-
sf_countries ::st_as_sf(countries) |>
sf::filter(admin != "Antarctica") |>
dplyr::st_transform(crs = "+proj=moll +x_0=0 +y_0=0 +lat_0=0 +lon_0=0") |>
sf::select(iso_a3_eh)
dplyr
<-
df_all_beta_sf |>
sf_countries ::full_join(df_all_beta, by = c(iso_a3_eh = "countries")) dplyr
First processing spatial data to convert NA values into 0
<-
df_all_beta_sf2 |>
df_all_beta_sf ::st_as_sf() |>
sf::ms_filter_islands(min_area = 12391399903) |>
rmapshaper::mutate(
dplyrtype.beta = ifelse(is.na(type.beta),
0,
type.beta),native.beta = ifelse(is.na(native.beta),
0,
native.beta))
Create palettes
<- colorRampPalette(c("#d3d3d3", "#accaca", "#81c1c1", "#52b6b6"))
palette_blue
<- colorRampPalette(c("#d3d3d3", "#c5acc2", "#bb84b1", "#ac5a9c")) palette_pink
Plotting maps
<-
map_native_beta ggplot() +
geom_sf(data = df_all_beta_sf2,
aes(geometry = geometry,
fill = native.beta),
color = "white",
size = 0.1, na.rm = T) +
scale_fill_gradientn(
colors = palette_pink(10),
na.value = "#d3d3d3",
limits = c(0,1)
+
)guides(fill = guide_colorbar(
barheight = unit(0.1, units = "in"),
barwidth = unit(4, units = "in"),
ticks.colour = "grey20",
title.position="top",
title.hjust = 0.5
+
)) labs(
fill = "Native"
+
)theme_classic()+
theme(
legend.position = "bottom",
legend.margin = margin(-10,0,0,0,"pt"),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
)
<-
map_type_beta ggplot() +
geom_sf(data = df_all_beta_sf2,
aes(geometry = geometry,
fill = type.beta),
color = "white",
size = 0.1, na.rm = T) +
scale_fill_gradientn(
colors = palette_blue(10),
na.value = "#d3d3d3",
limits = c(0,1)
+
)guides(fill = guide_colorbar(
barheight = unit(0.1, units = "in"),
barwidth = unit(4, units = "in"),
ticks.colour = "grey20",
title.position="top",
title.hjust = 0.5
+
)) labs(
fill = "Types"
+
)theme_classic()+
theme(
legend.position = "bottom",
legend.margin = margin(-10,0,0,0,"pt"),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
)
Plotting the two quantities (native and types turnover) in a bivariate map
<-
sf_bivar_types bi_class(df_all_beta_sf2,
x = type.beta,
y = native.beta,
style = "equal",
dim = 4)
<-
bivar_map_types ggplot() +
geom_sf(data = sf_bivar_types,
aes(geometry = geometry,
fill = bi_class),
color = "white",
size = 0.1,
show.legend = FALSE) +
bi_scale_fill(pal = "DkBlue2",
dim = 4) +
theme_classic()+
theme(
legend.position = "bottom",
legend.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
panel.background = element_rect(fill = NA),
plot.background = element_rect(fill = NA)
)
<-
legend bi_legend(pal = "DkBlue2",
dim = 4,
xlab = "NBT",
ylab = "Native",
size = )
<-
bivar_map_type_final ggdraw() +
draw_plot(legend, 0.0, 0.15, 0.25, 0.25) +
draw_plot(bivar_map_types, 0, 0, 1, 1)
Joining all the maps
<-
map_turnover_all +map_type_beta+bivar_map_type_final+
map_native_beta::plot_layout(
patchworkdesign =
"AB
CC"
+
)::plot_annotation(tag_levels = "a")&
patchworktheme(
plot.tag = element_text(face = "bold", hjust = 0, vjust = 1),
plot.tag.position = c(0, 1),
) map_turnover_all
ggsave(here::here("output", "figures", "Supp-material", "FigS3_turnover_metrics.png"),
dpi=600, width = 10, height = 9) map_turnover_all,
Comparison between full dataset and endemic dataset
We performed correlations between Native and NBT turnover calculated using the endemic dataset and the full dataset of native species
<- readr::read_csv(here::here("data", "processed", "df_endemic_beta.csv"))
df_endemic_beta
<-
df_endemic_beta2 |>
df_endemic_beta ::drop_na(type.beta)
tidyr<-
df_all_beta_sf2 |>
df_all_beta_sf ::drop_na(type.beta)
tidyr
<-
df_cor |>
df_endemic_beta2 ::left_join(df_all_beta_sf2, by = c(countries = "iso_a3_eh"))
dplyr
cor(df_cor$type.beta.x, df_cor$type.beta.y)
[1] 0.8185594
cor(df_cor$native.beta.x, df_cor$native.beta.y)
[1] 0.893118
Model results
Here we report tables containing full results of the generalized linear models presented in the main text . Also we report residuals diagnostic plots using DHARMa package
library(sjPlot) # creating summary tables of model results
library(glmmTMB) # read model output objects
library(DHARMa) # diagnostic graphics of models
library(here) # constructing file paths
Model data
Reading model results
# Data from 03_C_data_preparation.qmd
<- readr::read_csv(here::here("data", "processed", "df_country_complete6.csv"))
df_country_complete6
# Data from 04_D_model_NBTs.qmd
<- readRDS(here::here("output",
mod_counting_NBT "models",
"model_res_counting.rds")) # NBT total countings
<- readRDS(here::here("output",
mod_DC "models",
"model_res_dc.rds"))
<- readRDS(here::here("output",
mod_DR "models",
"model_res_dr.rds"))
<- readRDS(here::here("output",
mod_turnover_native "models",
"model_res_turnover_native.rds"))
<- readRDS(here::here("output",
mod_turnover_nbt "models",
"model_res_turnover_nbt.rds"))
Tables with estimated parameters
Producing tables with model parameters. These tables corresponds to the Tables S1 to S5 in the Supplementary material
# Table with model parameters for total number of NBT
::tab_model(mod_counting_NBT,
sjPlottransform = NULL,
pred.labels = c("Intercept",
"Native richness",
"Gbif records per area",
"Years since independence",
"GDP",
"Number of museums",
"Dispersion parameter"),
dv.labels = "Total Name Bearing Types",
string.pred = "Coefficients",
string.est = "Estimates",
string.p = "P-value")
Total Name Bearing Types | ||||
Coefficients | Estimates | CI | P-value | |
Count Model | ||||
Intercept | 3.64 | 3.20 – 4.09 | <0.001 | |
Native richness | 0.67 | 0.07 – 1.28 | 0.030 | |
Gbif records per area | 0.36 | -0.00 – 0.73 | 0.052 | |
Years since independence | -0.03 | -0.41 – 0.35 | 0.879 | |
GDP | 0.84 | 0.37 – 1.31 | <0.001 | |
Number of museums | 0.62 | 0.13 – 1.10 | 0.013 | |
Dispersion parameter | 0.46 | 0.33 – 0.65 | ||
Zero-Inflated Model | ||||
(Intercept) | -19.93 | -36.70 – -3.16 | 0.020 | |
native.richness.pad | 1.98 | -1.08 – 5.04 | 0.205 | |
records.per.area.pad | -0.04 | -1.13 – 1.05 | 0.941 | |
years.independence.pad | 1.01 | -0.37 – 2.38 | 0.151 | |
gdp.pad | -0.56 | -2.59 – 1.46 | 0.586 | |
n.museums.pad | -52.35 | -95.39 – -9.31 | 0.017 | |
Observations | 116 | |||
R2 / R2 adjusted | 1.000 / 1.000 |
# Table with parameter for Domestic contribution as the response variable
::tab_model(mod_DC,
sjPlottransform = NULL,
pred.labels = c("Intercept",
"Native richness",
"Gbif records per area",
"Years since independence",
"GDP",
"Number of museums"),
dv.labels = "Domestic Contribution",
string.pred = "Coefficients",
string.est = "Estimates",
string.p = "P-value")
Domestic Contribution | |||
Coefficients | Estimates | CI | P-value |
Intercept | 1.39 | 0.94 – 1.85 | <0.001 |
Native richness | 0.05 | -0.30 – 0.40 | 0.772 |
Gbif records per area | -0.41 | -0.67 – -0.15 | 0.002 |
Years since independence | 0.15 | -0.16 – 0.46 | 0.335 |
GDP | -0.71 | -1.11 – -0.30 | 0.001 |
Number of museums | -0.07 | -0.41 – 0.26 | 0.666 |
Observations | 116 |
# Table with parameters for model with Domestic retention as response variable
::tab_model(mod_DR,
sjPlottransform = NULL,
pred.labels = c("Intercept",
"Native richness",
"Gbif records per area",
"Years since independence",
"GDP",
"Number of museums"),
dv.labels = "Domestic Retention",
string.pred = "Coefficients",
string.est = "Estimates",
string.p = "P-value")
Domestic Retention | ||||
Coefficients | Estimates | CI | P-value | |
Count Model | ||||
(Intercept) | -1.02 | -1.34 – -0.70 | <0.001 | |
native.richness.pad | 0.08 | -0.19 – 0.35 | 0.554 | |
records.per.area.pad | 0.06 | -0.15 – 0.28 | 0.576 | |
years.independence.pad | 0.02 | -0.20 – 0.25 | 0.844 | |
gdp.pad | 0.43 | 0.16 – 0.70 | 0.002 | |
n.museums.pad | 0.30 | 0.04 – 0.57 | 0.024 | |
(Intercept) | 4.40 | 2.91 – 6.65 | ||
Zero-Inflated Model | ||||
(Intercept) | -7.14 | -14.16 – -0.12 | 0.046 | |
native.richness.pad | 0.03 | -1.78 – 1.85 | 0.971 | |
records.per.area.pad | -0.86 | -3.08 – 1.36 | 0.448 | |
years.independence.pad | 0.23 | -0.77 – 1.22 | 0.656 | |
gdp.pad | -1.37 | -3.29 – 0.55 | 0.163 | |
n.museums.pad | -19.17 | -36.83 – -1.50 | 0.033 | |
Observations | 116 |
# Table with parameter from model with native turnover as response variable
::tab_model(mod_turnover_native,
sjPlottransform = NULL,
pred.labels = c("Intercept",
"Native richness",
"Gbif records per area",
"Years since independence",
"GDP",
"Number of museums"),
dv.labels = "Native turnover",
string.pred = "Coefficients",
string.est = "Estimates",
string.p = "P-value")
Native turnover | |||
Coefficients | Estimates | CI | P-value |
Intercept | 1.54 | 1.22 – 1.85 | <0.001 |
Native richness | -0.10 | -0.46 – 0.26 | 0.585 |
Gbif records per area | -0.22 | -0.52 – 0.08 | 0.151 |
Years since independence | -0.19 | -0.49 – 0.11 | 0.212 |
GDP | -0.83 | -1.16 – -0.49 | <0.001 |
Number of museums | -0.62 | -0.99 – -0.26 | 0.001 |
Observations | 116 |
# Table with parameters from model with nbt turnover as response variable
::tab_model(mod_turnover_nbt,
sjPlottransform = NULL,
pred.labels = c("Intercept",
"Native richness",
"Gbif records per area",
"Years since independence",
"GDP",
"Number of museums"),
dv.labels = "NBT turnover",
string.pred = "Coefficients",
string.est = "Estimates",
string.p = "P-value")
NBT turnover | |||
Coefficients | Estimates | CI | P-value |
Intercept | -0.83 | -1.39 – -0.27 | 0.004 |
Native richness | -0.04 | -0.44 – 0.35 | 0.831 |
Gbif records per area | 0.28 | 0.01 – 0.55 | 0.040 |
Years since independence | -0.29 | -0.66 – 0.08 | 0.127 |
GDP | 0.77 | 0.33 – 1.22 | 0.001 |
Number of museums | -0.19 | -0.56 – 0.18 | 0.317 |
Observations | 116 |
Diagnostic graphics
QQ-plots and residuals x predicted plots using DHARMa package. These plots are used to compose the Figure S4 in the Supplementary material
# total number of NBT
::simulateResiduals(fittedModel = mod_counting_NBT, plot = T) DHARMa
Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
Scaled residual values: 0.7153866 0.001931219 0.696 0.2490446 0.648 0.992 0.1028773 0.5024718 0.3282699 0.8253997 0.4188254 0.3612344 0.8827414 0.1228934 0.6018339 0.236 0.5249315 0.4109765 0.4040863 0.4968128 ...
# Domestic Contribution and Domestic Retention
::simulateResiduals(fittedModel = mod_DC, plot = TRUE) # DC DHARMa
Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
Scaled residual values: 0.2810206 0.5451656 0.8206016 0.9920759 0.632 0.036 0.04606064 0.2683718 0.1179472 0.6518268 0.8012575 0.8871672 0.5651337 0.1619332 0.3115715 0.5 0.636466 0.1587027 0.8301241 0.3219646 ...
::simulateResiduals(fittedModel = mod_DR, plot = TRUE) # DR DHARMa
Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
Scaled residual values: 0.03292798 0.4846432 0.752 0.7350029 0.6355823 0.8954591 0.01251584 0.7590609 0.3283859 0.9303126 0.5763356 0.8149691 0.1378731 0.637628 0.416421 0.42 0.4933403 0.09946908 0.720217 0.5149565 ...
# native turnover
::simulateResiduals(fittedModel = mod_turnover_native, plot = TRUE) DHARMa
Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
Scaled residual values: 0.3643169 0.9604177 0.218907 0.4996626 0.4468048 0.586672 0.2291625 0.1288934 0.6268271 0.09580275 0.2318073 0.1704958 0.5271945 0.4248068 0.2145687 0.764 0.431788 0.8319357 0.3455402 0.3028504 ...
# NBT turnover
::simulateResiduals(fittedModel = mod_turnover_nbt, plot = TRUE) DHARMa
Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
Scaled residual values: 0.8617319 0.5820144 0.2277367 0.2187418 0.208 0.9406471 0.7678828 0.4724905 0.8400271 0.4985776 0.3119429 0.5414365 0.666443 0.4497502 0.1222786 0.6144549 0.05974446 0.8600977 0.084 0.5668753 ...