library(dplyr) # data manipulation
library(tidyr) # data tidying
library(glue) # string interpolation
library(here) # constructing file paths
library(readr) # reading CSV files
# create period of 50 years
= function(value){ return(value - value %% 50) } floor_period
01 - Data preparation
01_D_data_preparation.qmd
General overview
In this document we provide all the scripts needed to process the main data and later create models and figures.
Reading libraries, data and basic functions
Data containing NBT (Name Bearing Types) flow. This data frame contains the flow of NBT among countries. The source country of the NBT is represented by its three letter acronym (country_type) and its respective world bank region (region_type). The housing location is represented by three letter code of the country (contry_museum) and its respective world bank region of the museum (region_museum). NBT flowing is represented by 50-year time intervals. The total number of NBT flowing from one region/country to another is represented by the column n. These names and meanings are consistent throughout all data sets
# Flow by each period, region and country
<- readr::read_csv(here::here("data", "raw", "flow_period_region_country.csv")) flow_period_region_country
Load data
# 1
# Flow by each period - total number of NBT by period
<- flow_period_region_country |>
flow_period ::group_by(period) |>
dplyr::count(wt = n) |>
dplyr::ungroup() |>
dplyr::add_count(name = "total_period", wt = n)
dplyr
# Flow by each region - total number of NBT among regions
<- flow_period_region_country |>
flow_region ::group_by(region_type, region_museum) |>
dplyr::count(wt = n) |>
dplyr::ungroup() |>
dplyr::group_by(region_type) |>
dplyr::add_count(name = "total_region_type", wt = n) |>
dplyr::ungroup()
dplyr
::write_csv(flow_region, here::here("data","processed", "flow_region.csv"))
readr
# Flow among countries
<- flow_period_region_country |>
flow_country ::group_by(country_type, country_museum) |>
dplyr::count(wt = n) |>
dplyr::ungroup() |>
dplyr::group_by(country_type) |>
dplyr::add_count(name = "total_country_type", wt = n) |>
dplyr::ungroup()
dplyr
::write_csv(flow_country, here::here("data","processed", "flow_country.csv"))
readr
# 2
# Flow by each period and region - add total from period and region_type
<- flow_period_region_country |>
flow_period_region ::group_by(period, region_type, region_museum) |>
dplyr::count(wt = n) |>
dplyr::ungroup()|>
dplyr::group_by(period, region_type) |>
dplyr::add_count(name = "total_period_region_type", wt = n) |>
dplyr::ungroup()
dplyr
::write_csv(flow_period_region, here::here("data","processed", "flow_period_region.csv"))
readr
# Flow by each region and country - add total from country_type
<- flow_period_region_country |>
flow_region_country ::group_by(region_type, region_museum,
dplyr|>
country_type, country_museum) ::count(wt = n) |>
dplyr::ungroup() |>
dplyr::group_by(country_type) |>
dplyr::add_count(name = "total_country_type", wt = n) |>
dplyr::ungroup()
dplyr
# Proportions
# Flow by each period and region - add proportions
<- flow_period_region |>
flow_period_region_prop # ungroup
::ungroup() |>
dplyr# group by period and museum region
::group_by(period, region_museum) |>
dplyr# count how many NBTs were deposited in the museums of this region during the period
::add_count(wt = n, name = "total_period_region_museum") |>
dplyr# ungroup
::ungroup() |>
dplyr# group by period
::group_by(period) |>
dplyr# count how many NBTs were described during the period
::add_count(wt = n, name = "total_period") |>
dplyr# filter only within each region
::filter(region_type == region_museum) |>
dplyr#add prop_DC (Domestic Contribution) and prop_DR (Domestic Retention)
::mutate(prop_DC = n/total_period_region_museum,
dplyrprop_DR = n/total_period_region_type) |>
::ungroup() |>
dplyr::mutate(prop_DC = ifelse(is.na(prop_DC), 0, prop_DC))
dplyr
::write_csv(flow_period_region_prop, here::here("data","processed", "flow_period_region_prop.csv"))
readr
# Flow by each period and region - add proportions
<- flow_region |>
flow_region_prop # ungroup
::ungroup() |>
dplyr# group by period and museum region
::group_by(region_museum) |>
dplyr# count how many NBTs were deposited in the museums of this region
::add_count(wt = n, name = "total_region_museum") |>
dplyr# ungroup
::ungroup() |>
dplyr# filter only within each region
::filter(region_type == region_museum) |>
dplyr#add prop_DC (Domestic Contribution) and prop_DR (Domestic Retention)
::mutate(prop_DC = n/total_region_museum,
dplyrprop_DR = n/total_region_type)
::write_csv(flow_region_prop, here::here("data","processed", "flow_region_prop.csv")) readr
Museum data
<- readr::read_csv(here::here("data", "raw", "museum_data.csv"))
museum_data
<-
infra_museum |>
museum_data ::group_by(country_museum) |>
dplyr::count(name = "n.museums")
dplyr
::write_csv(infra_museum, here::here("data","processed", "infra_museum.csv")) readr
Native and Types species by country list
# Native
<- readr::read_csv(here::here("data", "raw", "spp_native_distribution.csv"))
spp_native_distribution
<- spp_native_distribution |>
df_country_native ::group_by(country_distribution, region_distribution) |>
dplyr::count(name = "native.richness") |>
dplyr::ungroup()
dplyr
::write_csv(df_country_native, here::here("data","processed", "df_country_native.csv"))
readr
# Types
<- readr::read_csv(here::here("data", "raw", "spp_type_distribution.csv"))
spp_type_distribution
<- spp_type_distribution |>
df_country_type ::group_by(country_museum, region_museum) |>
dplyr::count(name = "type_richness") |>
dplyr::ungroup()
dplyr
::write_csv(df_country_type, here::here("data","processed", "df_country_type.csv")) readr
Bio-Dem
We downloaded data from Bio-Dem data base and saved them as csv files. Here I read these files that contain information on GDP per capta (e_migdppc), total number of occurrence species records from gbif (records), records per area from gbif (records_per_area), years since independence for each country (yearsSinceIndependence). Country names is represented by iso3c code - an unique three letters code that identify each country.
<- readr::read_csv(file = here::here("data", "raw", "bio-dem_data.csv")) df_bio_dem