## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  fig.width = 7,
  fig.height = 4,
  fig.align='center'
)

## -----------------------------------------------------------------------------
# Load RuHere
library(RuHere)
# Loading the example data
data("occurrences", package = "RuHere")

## -----------------------------------------------------------------------------
# Remove invalid coordinates and store the result as a list to separate valid/invalid data
occ_split <- remove_invalid_coordinates(
  occ = occurrences,
  long = "decimalLongitude",
  lat = "decimalLatitude",
  return_invalid = TRUE
)

# Records with invalid coordinates
occ_split$invalid[, c("species", "decimalLongitude", "decimalLatitude")]

# Update the main 'occ' data frame to contain only the valid records
occ <- occ_split$valid

## -----------------------------------------------------------------------------
occ <- flag_fossil(occ) # Scan for fossil-related terms
# Number of records flagged as fossil
sum(!occ$fossil_flag) # No records flagged as fossil

## -----------------------------------------------------------------------------
occ <- flag_cultivated(occ) # Scan for fossil-related terms
# Number of records flagged as fossil
sum(!occ$cultivated_flag)

## -----------------------------------------------------------------------------
# Flag all iNaturalist records (including Research Grade)
occ_inat <- flag_inaturalist(occ, 
                             research_grade = TRUE) #Flag even when is research-grade
sum(!occ_inat$inaturalist_flag) #Number of flagged records

# Flag only iNaturalist records without Research Grade
occ <- flag_inaturalist(occ, 
                        research_grade = FALSE) # Flags only non-peer-verified iNaturalist records
sum(!occ$inaturalist_flag) #All inaturalist records are classified as Research Grade

## -----------------------------------------------------------------------------
occ <- flag_year(occ, lower_limit = 1980, 
                 upper_limit = NULL) #We could specify a upper limit as well
sum(!occ$year_flag) #Number of flagged records

## -----------------------------------------------------------------------------
# Duplicated records
new_occ <- rbind(occurrences[1:1000, ], occurrences[1:100,])

## -----------------------------------------------------------------------------
# Flag records and keep the most recent and preferably from GBIF:
# Create vector to prioritize gbif records
preferable_datasource <- c("gbif", "specieslink", "idigbio")
occ_dup1 <- flag_duplicates(occ = new_occ, continuous_variable = "year",
                            categorical_variable = "data_source", 
                            priority_categories = preferable_datasource)
sum(!occ_dup1$duplicated_flag) #Number of flagged records

## -----------------------------------------------------------------------------
# Flag duplicates based on coordinates and year
occ_dup2 <- flag_duplicates(occ = new_occ, additional_groups = "year")

## -----------------------------------------------------------------------------
# Import raster
data("worldclim", package = "RuHere")
wc <- terra::unwrap(worldclim) #Unpack raster
# Flag duplicates based in raster cells and keep the most recent 
occ_dup3 <- flag_duplicates(occ = new_occ, continuous_variable = "year", 
                            by_cell = TRUE, raster_variable = wc)

## -----------------------------------------------------------------------------
# Install the package if necessary
# if(!require("CoordinateCleaner")){
#   install.packages("CoordinateCleaner")
# }

# Loading the package
library(CoordinateCleaner)

# Run spatial check using some tests
occ <- clean_coordinates(x = occ,
                         tests = c("capitals", "centroids", "equal", 
                                   "institutions", "zeros"))

## -----------------------------------------------------------------------------
head(occ[,19:25])

## ----eval=FALSE---------------------------------------------------------------
# # Interactive map with map_here()
# map_here(occ, species = "Araucaria angustifolia", label = "record_id", cex = 4)

## ----show mapview, eval=T, echo=F, fig.align='center', out.width='80%'--------
knitr::include_graphics("vignettes_img/IMG02.jpeg")

## ----out.width = "80%"--------------------------------------------------------
# Static map with ggplot
ggmap_here(occ, species = "Araucaria angustifolia", 
           show_no_flagged = FALSE) # Do not show unflagged records

## ----out.width = "80%"--------------------------------------------------------
ggmap_here(occ, species = "Araucaria angustifolia", 
           facet_wrap = TRUE)

## -----------------------------------------------------------------------------
occ_consensus <- flag_consensus(occ, 
                                flags = c("cultivated", "year"),
                                consensus_rule = "any_true",
                                flag_name = "old_cultivated")

# Records flagged because they are cultivated and collected before 1980
occ_consensus_flagged <- occ_consensus[!occ_consensus$old_cultivated, ]
occ_consensus_flagged[, c("species", "cultivated_flag", "year_flag", "old_cultivated")]

## ----out.width = "80%"--------------------------------------------------------
ggmap_here(occ_consensus, 
           flags = c("year", "cultivated"),            # Specific flags to show
           additional_flags = "old_cultivated",        # Column name of the custom flag
           names_additional_flags = "Old & cultivated",# Label used in the legend
           col_additional_flags = "red",                # Color for the custom flag
           show_no_flagged = FALSE)                      # Do not show unflagged records

## -----------------------------------------------------------------------------
# Create directory to save removed records
path_to_save <- file.path(tempdir(), "removed_records")
dir.create(path_to_save)

# Identify records to force keeping and removing
to_keep <- c("gbif_17175", "gbif_6108")
to_remove <- c("gbif_5516", "specieslink_1091")

# Remove flagged records with manual control
# and save removed records to a folder
occ_cleaned <- remove_flagged(occ = occ,
                              flags = "all",  
                              column_id = "record_id",
                              force_keep = to_keep,
                              force_remove = to_remove,
                              save_flagged = TRUE,
                              output_dir = path_to_save)

# Total number of records
nrow(occ)
# Number of valid records
nrow(occ_cleaned)
# Number of records removed
nrow(occ) - nrow(occ_cleaned)

## ----eval=FALSE---------------------------------------------------------------
# fs::dir_tree(path_to_save)
# #> Temp/removed_records
# #> ├── Biodiversity Institution.gz
# #> ├── Capital centroid.gz
# #> ├── Country-Province centroid.gz
# #> ├── Cultivated.gz
# #> ├── Equal lat-long.gz
# #> └── Zero lat-long.gz

## -----------------------------------------------------------------------------
flag_summary <- summarize_flags(occ)

## -----------------------------------------------------------------------------
# Data.frame summarizing the number of records per flag
flag_summary$df_summary

## ----out.width = "80%"--------------------------------------------------------
# Bar plot
flag_summary$plot_summary

## ----out.width = "80%"--------------------------------------------------------
# Summarize removed records using saved data
flag_summary_dir <- summarize_flags(flagged_dir = path_to_save, 
                                    show_unflagged = FALSE, # Do not show unflagged records
                                    fill = "firebrick") # Change color of bars
flag_summary_dir$plot_summary

## ----eval=FALSE---------------------------------------------------------------
# ggplot2::ggsave(filename = file.path(path_to_save, "Summary.png"),
#                 plot = flag_summary_dir$plot_summary, width = 8, height = 5,
#                 dpi = 600)

## ----out.width = "80%"--------------------------------------------------------
# Create a grid of species richness
r_richness <- richness_here(occ, summary = "species", res = 2)

# Create a grid of record density (total number of occurrences)
r_records <- richness_here(occ, summary = "records", res = 2)

# Plotting the results
ggrid_here(r_richness)

ggrid_here(r_records)

## ----out.width = "80%"--------------------------------------------------------
# Converting flag columns to numeric for plotting
# We invert the logic so that errors (FALSE) become 1 and clean data (TRUE) become 0
occ$cultivated_flag_num <- as.numeric(!occ$cultivated_flag)

# Create the grid 
r_flagged <- richness_here(occ, 
                           summary = "records", 
                           field = "cultivated_flag_num", 
                           field_name = "Cultivated records",
                           fun = sum,
                           res = 2)

# Plot with ggrid_here
ggrid_here(r_flagged, 
           low_color = "white", 
           mid_color = "orange", 
           high_color = "firebrick")

