library(rangeModelMetadata)
library(sp)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spocc)
## Warning: package 'spocc' was built under R version 3.4.4
A simple rmm
object is a list object that is structured to contain metadata pertaining to species range models. Here we make an empty rmm
object containing only the obligate set of fields.
rmm1=rmmTemplate(family=c('obligate'))
str(rmm1)
## List of 7
## $ authorship :List of 3
## ..$ rmmName: NULL
## ..$ names : NULL
## ..$ contact: NULL
## $ data :List of 4
## ..$ occurrence :List of 5
## .. ..$ taxon : NULL
## .. ..$ dataType: NULL
## .. ..$ yearMin : NULL
## .. ..$ yearMax : NULL
## .. ..$ sources : NULL
## ..$ environment:List of 7
## .. ..$ variableNames: NULL
## .. ..$ yearMin : NULL
## .. ..$ yearMax : NULL
## .. ..$ extentSet : NULL
## .. ..$ resolution : NULL
## .. ..$ projection : NULL
## .. ..$ sources : NULL
## ..$ observation:List of 3
## .. ..$ variableNames: NULL
## .. ..$ minVal : NULL
## .. ..$ maxVal : NULL
## ..$ dataNotes : NULL
## $ dataPrep :List of 1
## ..$ dataPrepNotes: NULL
## $ model :List of 5
## ..$ algorithm : NULL
## ..$ algorithmCitation: NULL
## ..$ speciesNumber : NULL
## ..$ objective :List of 1
## .. ..$ rangeType: NULL
## ..$ references : NULL
## $ output : Named list()
## $ performance: Named list()
## $ code :List of 1
## ..$ software:List of 2
## .. ..$ platform: NULL
## .. ..$ packages: NULL
## - attr(*, "class")= chr [1:2] "list" "RMM"
A more complex rmm
object with all predefined fields. It may seem like a lot at first, but we’re trying to keep you all happy, and many of these won’t be needed. As we explore the hierarchy below, it’ll seem simpler…
rmm2=rmmTemplate(family=NULL)
str(rmm2)
## List of 7
## $ authorship :List of 9
## ..$ rmmName : NULL
## ..$ names : NULL
## ..$ ownership : NULL
## ..$ license : NULL
## ..$ contact : NULL
## ..$ relatedReferences: NULL
## ..$ authorNotes : NULL
## ..$ miscNotes : NULL
## ..$ doi : NULL
## $ data :List of 5
## ..$ occurrence :List of 9
## .. ..$ taxon : NULL
## .. ..$ dataType : NULL
## .. ..$ yearMin : NULL
## .. ..$ yearMax : NULL
## .. ..$ sources : NULL
## .. ..$ presenceSampleSize : NULL
## .. ..$ absenceSampleSize : NULL
## .. ..$ backgroundSampleSizeSet : NULL
## .. ..$ backgroundSampleSizeRule: NULL
## ..$ environment:List of 10
## .. ..$ variableNames: NULL
## .. ..$ minVal : NULL
## .. ..$ maxVal : NULL
## .. ..$ yearMin : NULL
## .. ..$ yearMax : NULL
## .. ..$ extentSet : NULL
## .. ..$ extentRule : NULL
## .. ..$ resolution : NULL
## .. ..$ projection : NULL
## .. ..$ sources : NULL
## ..$ observation:List of 3
## .. ..$ variableNames: NULL
## .. ..$ minVal : NULL
## .. ..$ maxVal : NULL
## ..$ transfer :List of 1
## .. ..$ environment1:List of 9
## .. .. ..$ minVal : NULL
## .. .. ..$ maxVal : NULL
## .. .. ..$ yearMin : NULL
## .. .. ..$ yearMax : NULL
## .. .. ..$ sources : NULL
## .. .. ..$ extentSet : NULL
## .. .. ..$ extentRule: NULL
## .. .. ..$ resolution: NULL
## .. .. ..$ notes : NULL
## ..$ dataNotes : NULL
## $ dataPrep :List of 5
## ..$ errors :List of 2
## .. ..$ duplicateRemoval :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ questionablePointRemoval:List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## ..$ geographic :List of 6
## .. ..$ geographicStandardization :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ geographicalOutlierRemoval:List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ centroidRemoval :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ pointInPolygon :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ altitudeRemoval :List of 1
## .. .. ..$ rule: NULL
## .. ..$ spatialThin :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## ..$ biological :List of 3
## .. ..$ taxonomicHarmonization:List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ cultivatedRemoval :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ nonNativeRemoval :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## ..$ environmental:List of 3
## .. ..$ environmentalOutlierRemoval:List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ environmentalThin :List of 2
## .. .. ..$ rule : NULL
## .. .. ..$ notes: NULL
## .. ..$ notes : NULL
## ..$ dataPrepNotes: NULL
## $ model :List of 23
## ..$ algorithm : NULL
## ..$ algorithmCitation : NULL
## ..$ speciesNumber : NULL
## ..$ covariateScaling : NULL
## ..$ occurrenceTreatedAs: NULL
## ..$ selectionRules : NULL
## ..$ finalModelSettings : NULL
## ..$ notes : NULL
## ..$ objective :List of 4
## .. ..$ rangeType : NULL
## .. ..$ invasion : NULL
## .. ..$ transfer : NULL
## .. ..$ populationType: NULL
## ..$ partition :List of 5
## .. ..$ occurrenceSubsampling: NULL
## .. ..$ numberFolds : NULL
## .. ..$ partitionSet : NULL
## .. ..$ partitionRule : NULL
## .. ..$ notes : NULL
## ..$ maxent :List of 19
## .. ..$ featureSet : NULL
## .. ..$ featureRule : NULL
## .. ..$ regularizationMultiplierSet: NULL
## .. ..$ regularizationRule : NULL
## .. ..$ convergenceThresholdSet : NULL
## .. ..$ samplingBiasRule : NULL
## .. ..$ samplingBiasNotes : NULL
## .. ..$ targetGroupSampleSize : NULL
## .. ..$ offsetSet : NULL
## .. ..$ offsetRule : NULL
## .. ..$ expertMapProbSet : NULL
## .. ..$ expertMapProbRule : NULL
## .. ..$ expertMapRateSet : NULL
## .. ..$ expertMapRateRule : NULL
## .. ..$ expertMapSkewSet : NULL
## .. ..$ expertMapSkewRule : NULL
## .. ..$ expertMapShiftSet : NULL
## .. ..$ expertMapShiftRule : NULL
## .. ..$ notes : NULL
## ..$ ppm :List of 3
## .. ..$ formula: NULL
## .. ..$ fitting: NULL
## .. ..$ notes : NULL
## ..$ ensemble :List of 2
## .. ..$ weighting: NULL
## .. ..$ notes : NULL
## ..$ glm :List of 3
## .. ..$ family : NULL
## .. ..$ formula: NULL
## .. ..$ notes : NULL
## ..$ mars :List of 1
## .. ..$ notes: NULL
## ..$ brt :List of 6
## .. ..$ nTrees : NULL
## .. ..$ interactionDepth: NULL
## .. ..$ shrinkage : NULL
## .. ..$ bagFraction : NULL
## .. ..$ trainFraction : NULL
## .. ..$ notes : NULL
## ..$ bioclim :List of 1
## .. ..$ notes: NULL
## ..$ ann :List of 1
## .. ..$ notes: NULL
## ..$ gam :List of 3
## .. ..$ family : NULL
## .. ..$ formula: NULL
## .. ..$ notes : NULL
## ..$ randomForest :List of 1
## .. ..$ notes: NULL
## ..$ rangeBagging :List of 4
## .. ..$ votes : NULL
## .. ..$ nDimensions : NULL
## .. ..$ proportionSubset: NULL
## .. ..$ notes : NULL
## ..$ occupancy :List of 3
## .. ..$ formula : NULL
## .. ..$ observationFormula: NULL
## .. ..$ notes : NULL
## ..$ references : NULL
## $ output :List of 4
## ..$ prediction :List of 7
## .. ..$ units : NULL
## .. ..$ minVal : NULL
## .. ..$ maxVal : NULL
## .. ..$ thresholdSet : NULL
## .. ..$ thresholdRule: NULL
## .. ..$ notes : NULL
## .. ..$ extrapolation: NULL
## ..$ transfer :List of 3
## .. ..$ environment1 :List of 7
## .. .. ..$ units : NULL
## .. .. ..$ minVal : NULL
## .. .. ..$ maxVal : NULL
## .. .. ..$ thresholdSet : NULL
## .. .. ..$ thresholdRule: NULL
## .. .. ..$ extrapolation: NULL
## .. .. ..$ notes : NULL
## .. ..$ notes : NULL
## .. ..$ extrapolation: NULL
## ..$ ensembleRule: NULL
## ..$ uncertainty :List of 4
## .. ..$ units : NULL
## .. ..$ minVal: NULL
## .. ..$ maxVal: NULL
## .. ..$ notes : NULL
## $ performance:List of 5
## ..$ trainingDataStats :List of 15
## .. ..$ AUC : NULL
## .. ..$ pearsonCor : NULL
## .. ..$ cohensKappa : NULL
## .. ..$ trueSkillStatistic: NULL
## .. ..$ truePositiveRate : NULL
## .. ..$ trueNegativeRate : NULL
## .. ..$ falsePositiveRate : NULL
## .. ..$ falseNegativeRate : NULL
## .. ..$ boyce : NULL
## .. ..$ pAUC : NULL
## .. ..$ pAUCLoThreshold : NULL
## .. ..$ pAUCHiThreshold : NULL
## .. ..$ AIC : NULL
## .. ..$ BIC : NULL
## .. ..$ DIC : NULL
## ..$ testingDataStats :List of 12
## .. ..$ AUC : NULL
## .. ..$ AUCDiff : NULL
## .. ..$ pearsonCor : NULL
## .. ..$ cohensKappa : NULL
## .. ..$ trueSkillStatistic: NULL
## .. ..$ truePositiveRate : NULL
## .. ..$ trueNegativeRate : NULL
## .. ..$ falsePositiveRate : NULL
## .. ..$ falseNegativeRate : NULL
## .. ..$ boyce : NULL
## .. ..$ omissionRate : NULL
## .. ..$ notes : NULL
## ..$ evaluationDataStats:List of 9
## .. ..$ AUC : NULL
## .. ..$ pearsonCor : NULL
## .. ..$ cohensKappa : NULL
## .. ..$ trueSkillStatistic: NULL
## .. ..$ truePositiveRate : NULL
## .. ..$ trueNegativeRate : NULL
## .. ..$ falsePositiveRate : NULL
## .. ..$ falseNegativeRate : NULL
## .. ..$ boyce : NULL
## ..$ references : NULL
## ..$ notes : NULL
## $ code :List of 9
## ..$ wallace :List of 9
## .. ..$ occsNum : NULL
## .. ..$ userCSV : NULL
## .. ..$ removedIDs : NULL
## .. ..$ occsCellPolyCoords: NULL
## .. ..$ userBgExt : NULL
## .. ..$ userBgPath : NULL
## .. ..$ userBgShpParams : NULL
## .. ..$ maxentEvalPlotCell: NULL
## .. ..$ bcPlotSettings : NULL
## ..$ software :List of 2
## .. ..$ platform: NULL
## .. ..$ packages: NULL
## ..$ demoCodeLink : NULL
## ..$ vignetteCodeLink: NULL
## ..$ fullCodeLink : NULL
## ..$ demoDataLink : NULL
## ..$ vignetteDataLink: NULL
## ..$ fullDataLink : NULL
## ..$ codeNotes : NULL
## - attr(*, "class")= chr [1:2] "list" "RMM"
rmm
objects can be populated manually by entering data direcetly into the fields, or through the use of several helper functions. Although the rmm
object template already contains a number of fields that depend on the specified family
, users can also add new fields as needed. We provide suggestions of both common fields to add and common values for many fields.
Not sure what to enter in different fields? We can suggest options.
rmmSuggest('dataPrep',fullFieldDepth=FALSE)
## $type
## [1] "field2"
##
## $suggestions
## [1] "dataPrep$errors" "dataPrep$geographic"
## [3] "dataPrep$biological" "dataPrep$environmental"
## [5] "dataPrep$dataPrepNotes"
rmmSuggest('dataPrep',fullFieldDepth=TRUE) # for all fields below the specified one
## $type
## [1] "field2"
##
## $suggestions
## [1] "dataPrep$errors$duplicateRemoval$rule"
## [2] "dataPrep$errors$duplicateRemoval$notes"
## [3] "dataPrep$errors$questionablePointRemoval$rule"
## [4] "dataPrep$errors$questionablePointRemoval$notes"
## [5] "dataPrep$geographic$geographicStandardization$rule"
## [6] "dataPrep$geographic$geographicStandardization$notes"
## [7] "dataPrep$geographic$geographicalOutlierRemoval$rule"
## [8] "dataPrep$geographic$geographicalOutlierRemoval$notes"
## [9] "dataPrep$geographic$centroidRemoval$rule"
## [10] "dataPrep$geographic$centroidRemoval$notes"
## [11] "dataPrep$geographic$pointInPolygon$rule"
## [12] "dataPrep$geographic$pointInPolygon$notes"
## [13] "dataPrep$geographic$altitudeRemoval$rule"
## [14] "dataPrep$geographic$spatialThin$rule"
## [15] "dataPrep$geographic$spatialThin$notes"
## [16] "dataPrep$biological$taxonomicHarmonization$rule"
## [17] "dataPrep$biological$taxonomicHarmonization$notes"
## [18] "dataPrep$biological$cultivatedRemoval$rule"
## [19] "dataPrep$biological$cultivatedRemoval$notes"
## [20] "dataPrep$biological$nonNativeRemoval$rule"
## [21] "dataPrep$biological$nonNativeRemoval$notes"
## [22] "dataPrep$environmental$environmentalOutlierRemoval$rule"
## [23] "dataPrep$environmental$environmentalOutlierRemoval$notes"
## [24] "dataPrep$environmental$environmentalThin$rule"
## [25] "dataPrep$environmental$environmentalThin$notes"
## [26] "dataPrep$environmental$notes"
## [27] "dataPrep$dataPrepNotes"
rmmSuggest('dataPrep$errors$duplicateRemoval')
## $type
## [1] "entity"
##
## $suggestions
## [1] "dataPrep$errors$duplicateRemoval$rule"
## [2] "dataPrep$errors$duplicateRemoval$notes"
rmmSuggest('dataPrep$errors$duplicateRemoval$rule')
## $suggestions
## [1] "Environmental duplicates" "coordinate duplicates"
## [3] "other (specify in Notes)" "NA"
Another more complex example:
rmmSuggest('model')
## $type
## [1] "field2"
##
## $suggestions
## [1] "model$algorithm" "model$algorithmCitation"
## [3] "model$speciesNumber" "model$covariateScaling"
## [5] "model$occurrenceTreatedAs" "model$selectionRules"
## [7] "model$finalModelSettings" "model$notes"
## [9] "model$objective" "model$partition"
## [11] "model$maxent" "model$ppm"
## [13] "model$ensemble" "model$glm"
## [15] "model$mars" "model$brt"
## [17] "model$bioclim" "model$ann"
## [19] "model$gam" "model$randomForest"
## [21] "model$rangeBagging" "model$occupancy"
## [23] "model$references"
rmmSuggest('model$maxent$')
## $type
## [1] "field3"
##
## $suggestions
## [1] "model$maxent$$featureSet"
## [2] "model$maxent$$featureRule"
## [3] "model$maxent$$regularizationMultiplierSet"
## [4] "model$maxent$$regularizationRule"
## [5] "model$maxent$$convergenceThresholdSet"
## [6] "model$maxent$$samplingBiasRule"
## [7] "model$maxent$$samplingBiasNotes"
## [8] "model$maxent$$targetGroupSampleSize"
## [9] "model$maxent$$offsetSet"
## [10] "model$maxent$$offsetRule"
## [11] "model$maxent$$expertMapProbSet"
## [12] "model$maxent$$expertMapProbRule"
## [13] "model$maxent$$expertMapRateSet"
## [14] "model$maxent$$expertMapRateRule"
## [15] "model$maxent$$expertMapSkewSet"
## [16] "model$maxent$$expertMapSkewRule"
## [17] "model$maxent$$expertMapShiftSet"
## [18] "model$maxent$$expertMapShiftRule"
## [19] "model$maxent$$notes"
rmmSuggest('$model$maxent$featureSet')
## $suggestions
## [1] "L" "LQ" "LQP" "LQPT" "LQPTH" " H" "HT"
To make it easier to fill some rmm
fields, we provide autofill functions that extract relevant information from common R objects used in a range modeling workflow. Also see vignette('rmm_workflow')
for a demonstration of how to fill in metadata during an SDM workflow.
rmm=rmmTemplate()
rmm=rmmAutofillPackageCitation(rmm,c('raster','sp'))
# search GBIF for occurrence data to demonstrate the autofill function
bv=spocc::occ('Bradypus variegatus', 'gbif', limit=50, has_coords=TRUE)
rmm=rmmAutofillspocc(rmm,bv$gbif)
# get some env layers to demonstrate the autofill function
rasterFiles=list.files(path=paste(system.file(package='dismo'), '/ex', sep=''),
pattern='grd', full.names=TRUE)
# make a stack of the rasters
env=raster::stack(rasterFiles)
rmm=rmmAutofillEnvironment(rmm,env,transfer=0) # for fitting environment
# just using the same rasters for demonstration; in practice these are different
rmm=rmmAutofillEnvironment(rmm,env,transfer=1) # for transfer environment 1
rmm=rmmAutofillEnvironment(rmm,env,transfer=2) # for transfer environment 2
To see what fields you might’ve missed…
empties=rmmCheckEmpty(rmm)
## ===================================
## There are 33 empty obligate fields:
## $authorship$ownership
## $authorship$license
## $authorship$relatedReferences
## $authorship$authorNotes
## $authorship$miscNotes
## $authorship$doi
## $data$environment$extentRule
## $model$selectionRules
## $model$finalModelSettings
## $model$notes
## $model$objective$invasion
## $model$objective$transfer
## $model$partition$partitionSet
## $model$partition$partitionRule
## $model$partition$notes
## $output$prediction$thresholdSet
## $output$prediction$thresholdRule
## $output$prediction$extrapolation
## $output$transfer$environment1$extrapolation
## $output$transfer$notes
## $output$uncertainty$units
## $output$uncertainty$minVal
## $output$uncertainty$maxVal
## $output$uncertainty$notes
## $performance$references
## $performance$notes
## $code$demoCodeLink
## $code$vignetteCodeLink
## $code$fullCodeLink
## $code$demoDataLink
## $code$vignetteDataLink
## $code$fullDataLink
## $code$codeNotes
## ===================================
## ===================================
To check the field names in your object, use the function rmmNameCheck
# Make an empty template
rmm1<-rmmTemplate()
# Add a new, non-standard field
rmm1$dataPrep$biological$taxonomicHarmonization$taxonomy_source<-"The Plant List" # # Checking the names identifies the new, non-standard field we've added ("taxonomy_source")
rmm1=rmmCheckName(rmm1)
## The following names are not similar to any suggested names, please verify that these are accurate:
## $dataPrep$biological$taxonomicHarmonization$taxonomy_source
##
To check the field values in your rmm object, use the function rmmValueCheck
#First, we create an empty rmm template
rmm1<-rmmTemplate()
#We add 3 of the bioclim layers, including a spelling error (an extra space) in bio2, and a word that is clearly not a climate layer, 'cromulent'.
rmm1$data$environment$variableNames<- c("bio1", "bio 2", "bio3", "cromulent")
#Now, when we check the values, we see that bio1 and bio2 are reported as exact matches, while 'bio 2' is flagged as a partial match with a suggested value of 'bio2', and 'cromulent' is flagged as not matched at all.
rmmCheckValue(rmm = rmm1)
##
## ==========================================
## For the field rmm$data$environment$variableNames
## The following entries appear accurate:
##
## bio1; bio3
## The following entries are similar to suggested values, please verify:
## bio 2
##
## Suggested alternatives include:
## bio2
## The following entries are not similar to any suggested values, please verify that these are accurate:
## cromulent
##
#If we'd like to return a dataframe containing this information in a perhaps more useful format:
rmmCheckValueOutput<-rmmCheckValue(rmm = rmm1,returnData = TRUE)
##
## ==========================================
## For the field rmm$data$environment$variableNames
## The following entries appear accurate:
##
## bio1; bio3
## The following entries are similar to suggested values, please verify:
## bio 2
##
## Suggested alternatives include:
## bio2
## The following entries are not similar to any suggested values, please verify that these are accurate:
## cromulent
##
These ‘check’ functions work by comparing the values or names within an rmm
object to those in a data dictionary. These functions are designed to check for non-standard values and names, and DO NOT necessarily identify correct vs. incorrect values/names. Non-standard values may be perfectly valid, or they may be erroneous, and the user will have to make this distinction.
To run all the available checks at once, we’ll check the object that we filled in a few chunks back.
# rmmCheckFinalize(rmm, family=c('base','obligate'))
rmm
objectTo make rmm
objects portable to other interfaces, they are readily written to csv format.
outFile='~/Desktop/demo_rmmToCSV.csv'
rmmObj=rmmTemplate()
rmmToCSV(rmmObj,filename=outFile)
system(paste0('open ', outFile, ' -a "Microsoft Excel"'))
It can be helpful to simply view the data dictionary:
dd=rmmDataDictionary()
str(dd)
## 'data.frame': 210 obs. of 10 variables:
## $ field1 : chr "authorship" "authorship" "authorship" "authorship" ...
## $ field2 : chr NA NA NA NA ...
## $ field3 : chr NA NA NA NA ...
## $ entity : chr "rmmName" "names" "ownership" "license" ...
## $ class : chr "character" "character vector" "character" "character" ...
## $ taxonSpecific : chr "no" "no" "no" "no" ...
## $ constrainedValues: chr "NULL" "NULL" "NULL" "CC; CC BY; CC BY-SA; CC BY-ND; CC BY-NC; CC BY-NC-SA; CC BY-NC-ND; other" ...
## $ family : chr "obligate, base" "obligate, base" "base" "base" ...
## $ example : chr "MerowMaitnerOwensKassEnquistGuralnick_2018_Acer_Maxent_b3" "Merow, Cory and Maitner, Brian and Owens, Hannah and Kass, Jamie and Enquist, Brian and Guralnick, Rob" "Merow, Cory;" "CC; CC BY; CC BY-SA; CC BY-ND; CC BY-NC; CC BY-NC-SA; CC BY-NC-ND" ...
## $ description : chr "Use the format Author_Year_Taxa_Model_fw, where the last two characters (here, fw) are alphanumeric and random." "The names of those who created this model." "The names of the person(s) who own this model." "The license under which this model has been produced. See https://creativecommons.org/licenses/ for common options." ...
# rmmDataDictionary(excel=TRUE) # try this if you have excel