The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

oai introduction

A general purpose client to work with any ‘OAI-PMH’ service.

The ‘OAI-PMH’ protocol is described at http://www.openarchives.org/OAI/openarchivesprotocol.html.

The main functions follow the OAI-PMH verbs:

Get oai

Install from CRAN

install.packages("oai")

Or install the development version from GitHub

devtools::install_github("ropensci/oai")

Load oai

library("oai")

Identify

id("http://oai.datacite.org/oai")
#>   repositoryName                      baseURL protocolVersion
#> 1   DataCite MDS https://oai.datacite.org/oai             2.0
#>             adminEmail    earliestDatestamp deletedRecord
#> 1 support@datacite.org 2011-01-01T00:00:00Z    persistent
#>            granularity compression compression.1
#> 1 YYYY-MM-DDThh:mm:ssZ        gzip       deflate
#>                                      description
#> 1 oaioai.datacite.org:oai:oai.datacite.org:12425

ListIdentifiers

list_identifiers(from = '2018-05-01T', until = '2018-09-01T')
#> # A tibble: 2,904 x 5
#>    identifier        datestamp    setSpec           setSpec.1     setSpec.2
#>    <chr>             <chr>        <chr>             <chr>         <chr>    
#>  1 cbe4cdda-2045-41… 2018-08-30T… installation:842… dataset_type… country:…
#>  2 3fb7ddd8-07c0-49… 2018-08-28T… installation:842… dataset_type… country:…
#>  3 34585f24-1ffe-47… 2018-08-28T… installation:842… dataset_type… country:…
#>  4 8ac24ec8-1e7b-40… 2018-08-28T… installation:842… dataset_type… country:…
#>  5 e381b970-9b62-46… 2018-08-28T… installation:842… dataset_type… country:…
#>  6 4496b1b1-1ea9-4e… 2018-08-28T… installation:842… dataset_type… country:…
#>  7 fc5d68a6-b511-4c… 2018-08-27T… installation:73e… dataset_type… country:…
#>  8 5e0073bc-de2a-4b… 2018-08-27T… installation:688… dataset_type… country:…
#>  9 ab2684cf-62e5-4f… 2018-08-27T… installation:804… dataset_type… country:…
#> 10 34fbcf59-d9bb-47… 2018-08-27T… installation:7c5… dataset_type… country:…
#> # … with 2,894 more rows

Count Identifiers

count_identifiers()

ListRecords

list_records(from = '2018-05-01T', until = '2018-05-15T')
#> # A tibble: 44 x 26
#>    identifier datestamp setSpec setSpec.1 setSpec.2 title publisher
#>    <chr>      <chr>     <chr>   <chr>     <chr>     <chr> <chr>    
#>  1 18799ce9-… 2018-05-… instal… dataset_… country:… Bird… Sokoine …
#>  2 79f51633-… 2018-05-… instal… dataset_… country:… Impl… Aïgos SAS
#>  3 f83746ee-… 2018-05-… instal… dataset_… country:… NDFF… Dutch Na…
#>  4 a3533a61-… 2018-05-… instal… dataset_… country:… EDP … EDP - En…
#>  5 ba9b66a3-… 2018-05-… instal… dataset_… country:… Ende… Sokoine …
#>  6 78b696d9-… 2018-05-… instal… dataset_… country:… Ende… Sokoine …
#>  7 c791b255-… 2018-05-… instal… dataset_… country:… Ende… Sokoine …
#>  8 b929ccda-… 2018-05-… instal… dataset_… country:… List… Sokoine …
#>  9 da285c2a-… 2018-05-… instal… dataset_… country:… Moni… Corporac…
#> 10 87372877-… 2018-05-… instal… dataset_… country:… Moni… Corporac…
#> # … with 34 more rows, and 19 more variables: identifier.1 <chr>,
#> #   subject <chr>, source <chr>, description <chr>, description.1 <chr>,
#> #   type <chr>, creator <chr>, date <chr>, language <chr>, coverage <chr>,
#> #   coverage.1 <chr>, format <chr>, source.1 <chr>, subject.1 <chr>,
#> #   coverage.2 <chr>, creator.1 <chr>, description.2 <chr>,
#> #   creator.2 <chr>, subject.2 <chr>

GetRecords

ids <- c("87832186-00ea-44dd-a6bf-c2896c4d09b4", "d981c07d-bc43-40a2-be1f-e786e25106ac")
get_records(ids)
#> $`87832186-00ea-44dd-a6bf-c2896c4d09b4`
#> $`87832186-00ea-44dd-a6bf-c2896c4d09b4`$header
#> # A tibble: 1 x 3
#>   identifier             datestamp      setSpec                            
#>   <chr>                  <chr>          <chr>                              
#> 1 87832186-00ea-44dd-a6… 2018-06-29T12… installation:729a7375-b120-4e4f-bb…
#> 
#> $`87832186-00ea-44dd-a6bf-c2896c4d09b4`$metadata
#> # A tibble: 0 x 0
#> 
#> 
#> $`d981c07d-bc43-40a2-be1f-e786e25106ac`
#> $`d981c07d-bc43-40a2-be1f-e786e25106ac`$header
#> # A tibble: 1 x 3
#>   identifier             datestamp      setSpec                            
#>   <chr>                  <chr>          <chr>                              
#> 1 d981c07d-bc43-40a2-be… 2018-01-21T21… installation:804b8dd0-07ac-4a30-bf…
#> 
#> $`d981c07d-bc43-40a2-be1f-e786e25106ac`$metadata
#> # A tibble: 1 x 12
#>   title publisher identifier subject source description type  creator date 
#>   <chr> <chr>     <chr>      <chr>   <chr>  <chr>       <chr> <chr>   <chr>
#> 1 Pece… Institut… https://w… peces,… http:… Caracteriz… Data… Fernan… 2018…
#> # … with 3 more variables: language <chr>, coverage <chr>, format <chr>

List MetadataFormats

list_metadataformats(id = "87832186-00ea-44dd-a6bf-c2896c4d09b4")
#> $`87832186-00ea-44dd-a6bf-c2896c4d09b4`
#>   metadataPrefix                                                   schema
#> 1         oai_dc           http://www.openarchives.org/OAI/2.0/oai_dc.xsd
#> 2            eml http://rs.gbif.org/schema/eml-gbif-profile/1.0.2/eml.xsd
#>                             metadataNamespace
#> 1 http://www.openarchives.org/OAI/2.0/oai_dc/
#> 2          eml://ecoinformatics.org/eml-2.1.1

List Sets

list_sets("http://api.gbif.org/v1/oai-pmh/registry")
#> # A tibble: 572 x 2
#>    setSpec                     setName         
#>    <chr>                       <chr>           
#>  1 dataset_type                per dataset type
#>  2 dataset_type:OCCURRENCE     occurrence      
#>  3 dataset_type:CHECKLIST      checklist       
#>  4 dataset_type:METADATA       metadata        
#>  5 dataset_type:SAMPLING_EVENT sampling_event  
#>  6 country                     per country     
#>  7 country:AD                  Andorra         
#>  8 country:AO                  Angola          
#>  9 country:AR                  Argentina       
#> 10 country:AT                  Austria         
#> # … with 562 more rows

Biodiversity Heritage Library

Identify

id("http://www.biodiversitylibrary.org/oai")
#>                                 repositoryName
#> 1 Biodiversity Heritage Library OAI Repository
#>                                   baseURL protocolVersion
#> 1 https://www.biodiversitylibrary.org/oai             2.0
#>                    adminEmail earliestDatestamp deletedRecord granularity
#> 1 oai@biodiversitylibrary.org        2006-01-01            no  YYYY-MM-DD
#>                                                        description
#> 1 oaibiodiversitylibrary.org:oai:biodiversitylibrary.org:item/1000

Get records

get_records(c("oai:biodiversitylibrary.org:item/7", "oai:biodiversitylibrary.org:item/9"),
            url = "http://www.biodiversitylibrary.org/oai")
#> $`oai:biodiversitylibrary.org:item/7`
#> $`oai:biodiversitylibrary.org:item/7`$header
#> # A tibble: 1 x 3
#>   identifier                         datestamp            setSpec
#>   <chr>                              <chr>                <chr>  
#> 1 oai:biodiversitylibrary.org:item/7 2016-07-13T09:13:41Z item   
#> 
#> $`oai:biodiversitylibrary.org:item/7`$metadata
#> # A tibble: 1 x 11
#>   title creator subject description publisher contributor date  type 
#>   <chr> <chr>   <chr>   <chr>       <chr>     <chr>       <chr> <chr>
#> 1 Die … Fleisc… Bogor;… pt.5:v.1 (… Leiden :… Missouri B… 1900… text…
#> # … with 3 more variables: identifier <chr>, language <chr>, rights <chr>
#> 
#> 
#> $`oai:biodiversitylibrary.org:item/9`
#> $`oai:biodiversitylibrary.org:item/9`$header
#> # A tibble: 1 x 3
#>   identifier                         datestamp            setSpec
#>   <chr>                              <chr>                <chr>  
#> 1 oai:biodiversitylibrary.org:item/9 2016-07-13T09:13:41Z item   
#> 
#> $`oai:biodiversitylibrary.org:item/9`$metadata
#> # A tibble: 1 x 11
#>   title creator subject description publisher contributor date  type 
#>   <chr> <chr>   <chr>   <chr>       <chr>     <chr>       <chr> <chr>
#> 1 Die … Fleisc… Bogor;… pt.5:v.3 (… Leiden :… Missouri B… 1906… text…
#> # … with 3 more variables: identifier <chr>, language <chr>, rights <chr>

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.