############################################################################### # SPATIAL AVAILABILITY SCORE (SAS) CALCULATOR # Complete R Script - Version 1.0 # Associated with: Nazari & Bussmann (2024) # Contact: [Corresponding author email] ############################################################################### #============================================================================== # 1. INSTALL REQUIRED PACKAGES (run once) #============================================================================== # install.packages(c("rgbif", "sf", "dplyr", "rnaturalearth", # "rnaturalearthdata", "ggplot2", "units")) #============================================================================== # 2. LOAD LIBRARIES #============================================================================== library(rgbif) # GBIF data access library(sf) # Spatial operations library(dplyr) # Data manipulation library(rnaturalearth) # Country boundaries library(ggplot2) # Visualization library(units) # Unit management #============================================================================== # 3. USER INPUT - MODIFY THESE VALUES #============================================================================== species_name <- "Urtica dioica" # Target species (scientific name) target_country <- "Iran" # Target country or region output_dir <- "./SAS_results" # Output directory #============================================================================== # 4. MAIN FUNCTION: CALCULATE SAS #============================================================================== calculate_SAS <- function(species_name, target_country, output_dir = "./SAS_results") { # Create output directory if (!dir.exists(output_dir)) dir.create(output_dir) cat("=============================================\n") cat("SAS CALCULATION FOR:", species_name, "\n") cat("REGION:", target_country, "\n") cat("=============================================\n\n") # 4.1 Get country boundaries and area cat("1. Loading country boundaries...\n") country <- rnaturalearth::ne_countries(country = target_country, scale = "medium", returnclass = "sf") if (nrow(country) == 0) stop("Country not found. Check spelling or use ISO code (e.g., 'IRN').") country_area <- as.numeric(st_area(country)) / 1e6 # Convert to km² cat(sprintf(" Country area: %.2f km²\n", country_area)) # 4.2 Download occurrence data from GBIF cat("\n2. Downloading occurrence data from GBIF...\n") occ_data <- rgbif::occ_search(scientificName = species_name, hasCoordinate = TRUE, limit = 50000)$data if (is.null(occ_data) || nrow(occ_data) == 0) { stop("No occurrence records found for ", species_name) } cat(sprintf(" Raw records: %d\n", nrow(occ_data))) # 4.3 Clean data cat("\n3. Cleaning data...\n") occ_clean <- occ_data %>% filter(!is.na(decimalLongitude) & !is.na(decimalLatitude)) %>% filter(is.na(coordinateUncertaintyInMeters) | coordinateUncertaintyInMeters <= 5000) %>% filter(basisOfRecord %in% c("HUMAN_OBSERVATION", "OBSERVATION", "PRESERVED_SPECIMEN")) %>% select(species, decimalLongitude, decimalLatitude, coordinateUncertaintyInMeters, countryCode) cat(sprintf(" After cleaning: %d records\n", nrow(occ_clean))) # 4.4 Filter to target country cat("\n4. Filtering to target country...\n") occ_sf <- st_as_sf(occ_clean, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326) occ_in_country <- st_intersection(occ_sf, country) if (nrow(occ_in_country) == 0) { stop("No records found in ", target_country) } occ_final <- occ_in_country %>% mutate(decimalLongitude = st_coordinates(.)[,1], decimalLatitude = st_coordinates(.)[,2]) %>% st_drop_geometry() cat(sprintf(" Records in %s: %d\n", target_country, nrow(occ_final))) # 4.5 Remove spatial outliers cat("\n5. Removing spatial outliers...\n") if (nrow(occ_final) >= 10) { coords <- as.matrix(occ_final[, c("decimalLongitude", "decimalLatitude")]) centroid <- colMeans(coords) cov_mat <- cov(coords) m_dist <- mahalanobis(coords, centroid, cov_mat) cutoff <- qchisq(0.99, df = 2) # p < 0.01 occ_final <- occ_final[m_dist <= cutoff, ] cat(sprintf(" After outlier removal: %d records\n", nrow(occ_final))) } # 4.6 Calculate EOO cat("\n6. Calculating Extent of Occurrence (EOO)...\n") if (nrow(occ_final) < 3) { cat(" Not enough points for EOO calculation.\n") EOO_km2 <- 0 hull <- NULL } else { points_sf <- st_as_sf(occ_final, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326) points_eq <- st_transform(points_sf, crs = "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84") convex_hull <- st_convex_hull(st_union(points_eq)) EOO_km2 <- as.numeric(st_area(convex_hull)) / 1e6 hull <- st_transform(convex_hull, crs = 4326) cat(sprintf(" EOO: %.2f km²\n", EOO_km2)) } # 4.7 Calculate SAS cat("\n7. Calculating Spatial Availability Score (SAS)...\n") SAS_value <- log(1 + EOO_km2) / log(1 + country_area) cat(sprintf(" SAS = %.4f\n", SAS_value)) # 4.8 Interpret results cat("\n8. Interpretation:\n") if (SAS_value < 0.2) interpretation <- "Very restricted" else if (SAS_value < 0.4) interpretation <- "Limited regionally" else if (SAS_value < 0.6) interpretation <- "Moderate availability" else if (SAS_value < 0.8) interpretation <- "Widely available" else interpretation <- "Extremely widespread" cat(sprintf(" %s within %s\n", interpretation, target_country)) # 4.9 Create visualization cat("\n9. Creating visualization...\n") if (nrow(occ_final) > 0 && !is.null(hull)) { p <- ggplot() + geom_sf(data = country, fill = "lightgray", color = "darkgray") + geom_point(data = occ_final, aes(x = decimalLongitude, y = decimalLatitude), color = "red", size = 1, alpha = 0.6) + geom_sf(data = hull, fill = NA, color = "blue", linewidth = 1) + labs(title = paste(species_name, "in", target_country), subtitle = sprintf("EOO = %.1f km² | SAS = %.3f (%s)", EOO_km2, SAS_value, interpretation), x = "Longitude", y = "Latitude") + theme_minimal() ggsave(file.path(output_dir, paste0("SAS_", gsub(" ", "_", species_name), "_", target_country, ".png")), p, width = 10, height = 7, dpi = 300) cat(" Map saved.\n") } # 4.10 Save results cat("\n10. Saving results...\n") results <- data.frame( Species = species_name, Region = target_country, Region_Area_km2 = round(country_area, 2), Records_Used = nrow(occ_final), EOO_km2 = round(EOO_km2, 2), EOO_Percent = round((EOO_km2 / country_area) * 100, 2), SAS = round(SAS_value, 4), Interpretation = interpretation, Date = as.character(Sys.Date()) ) write.csv(results, file.path(output_dir, paste0("SAS_Results_", gsub(" ", "_", species_name), "_", target_country, ".csv")), row.names = FALSE) cat(" Results saved to CSV.\n") # Print summary cat("\n=============================================\n") print(results) cat("=============================================\n") return(results) } #============================================================================== # 5. RUN THE ANALYSIS #============================================================================== # Execute the function with your parameters results <- calculate_SAS(species_name = species_name, target_country = target_country, output_dir = output_dir) #============================================================================== # 6. BATCH PROCESSING (Optional - for multiple species) #============================================================================== # Uncomment and modify for multiple species: # species_list <- c("Urtica dioica", "Malva sylvestris", "Ferula assa-foetida") # all_results <- list() # # for (sp in species_list) { # cat("\n\nProcessing:", sp, "\n") # all_results[[sp]] <- calculate_SAS(species_name = sp, # target_country = target_country, # output_dir = output_dir) # } # # # Combine all results # combined_results <- do.call(rbind, all_results) # write.csv(combined_results, # file.path(output_dir, "SAS_All_Species_Results.csv"), # row.names = FALSE) ############################################################################### # END OF SCRIPT ###############################################################################