rif
is an R wrapper for the Neuroscience Information Framework (NIF) APIs.
Stable rif
version from CRAN
install.packages("rif")
Or, the development version from Github
devtools::install_github("ropensci/rif")
library("rif")
out <- rif_summary("cellular")
head(out$result$federationSummary$results)
#> parentCategory category db
#> 1 Entity Type Molecule PeptideAtlas
#> 2 Type of Data Phenotype IMPC
#> 3 Type of Data Cell line Coriell Cell Repositories
#> 4 Entity Type Cell Coriell Cell Repositories
#> 5 Type of Data Biospecimen Coriell Cell Repositories
#> 6 Type of Data Expression GEO
#> indexable nifId count totalCount snippets
#> 1 MassSpec nif-0000-03266-2 4 76 NULL
#> 2 KnockoutPhenotypes nlx_151660-1 3 22344 NULL
#> 3 NIGMS nif-0000-00182-2 11 12986 NULL
#> 4 NIGMS nif-0000-00182-2 11 12986 NULL
#> 5 NIGMS nif-0000-00182-2 11 12986 NULL
#> 6 MicroarrayExperiment nif-0000-00142-1 4340 68725 NULL
#> summaryString
#> 1 PeptideAtlas: MassSpec (4)[]
#> 2 IMPC: KnockoutPhenotypes (3)[]
#> 3 Coriell Cell Repositories: NIGMS (11)[]
#> 4 Coriell Cell Repositories: NIGMS (11)[]
#> 5 Coriell Cell Repositories: NIGMS (11)[]
#> 6 GEO: MicroarrayExperiment (4340)[]
List NIF query categories
rif_query_categories()
#> [1] "anatomy" "antibody" "catalognumber"
#> [4] "cell" "coordinate" "disease"
#> [7] "environment" "fulltext" "function"
#> [10] "gene" "genetargetreagent" "genomiclocus"
#> [13] "genomiclocusvariant" "genotype" "identifier"
#> [16] "interaction" "interactiontype" "moleculardomain"
#> [19] "molecule" "organism" "pathway"
#> [22] "phenotype" "protocol" "publication"
#> [25] "quality" "resource" "sequence"
#> [28] "sequencefeature" "specimen" "stage"
#> [31] "strain" "subcellularanatomy"
Search for vocabulary terms
vocabulary_search("cell", limit = 3)
#> uuid term id category
#> 1 c2aad6c6-a2b8-4818-b2c8-5271dd5f431c cell NEMO_9559000 cell
#> 2 439b50ba-d1a6-4b4c-bbfd-d1d61d74ba69 cell GO_0005623 cell
#> 3 8ddd9932-4e66-4a25-84af-237c0784cef4 On cell nifext_32 Cell
#> provider inferred acronym abbreviation synonyms definition
#> 1 NIFSTD FALSE FALSE FALSE NA NA
#> 2 NIFSTD FALSE FALSE FALSE NA NA
#> 3 NIFSTD FALSE FALSE FALSE NA NA
Break up text into various things
text <- "Lorem ipsum inceptos dolor nisi torquent porttitor donec, blandit scelerisque
mattis cras quis mi, aliquam sagittis. Convallis placerat commodo imperdiet varius nunc
tempus urna vitae ultrices tristique eu mi ornare velit donec, posuere laoreet pretium
vitae porta augue porta feugiat in sapien egestas. Quam odio nullam pulvinar litora
curabitur amet lacus sociosqu gravida ligula molestie dui netus fusce bibendum
scelerisque, dictum malesuada proin elit etiam aliquam, mattis euismod donec nisl facilisis."
text <- gsub("\n", "", text)
lexical_sentences(text)
#> $sentence
#> [1] "Lorem ipsum inceptos dolor nisi torquent porttitor donec, blandit scelerisquemattis cras quis mi, aliquam sagittis."
#> [2] "Convallis placerat commodo imperdiet varius nunctempus urna vitae ultrices tristique eu mi ornare velit donec, posuere laoreet pretiumvitae porta augue porta feugiat in sapien egestas."
#> [3] "Quam odio nullam pulvinar litoracurabitur amet lacus sociosqu gravida ligula molestie dui netus fusce bibendumscelerisque, dictum malesuada proin elit etiam aliquam, mattis euismod donec nisl facilisis."
head(lexical_chunks(text))
#> token start end
#> 1 Lorem ipsum inceptos 0 20
#> 2 dolor nisi torquent porttitor donec 21 56
#> 3 blandit scelerisquemattis 58 83
#> 4 cras 84 88
#> 5 quis mi 89 96
#> 6 aliquam sagittis 98 114
head(lexical_entities(text))
#> token start end
#> 1 Lorem ipsum inceptos dolor 0 26
#> 2 torquent porttitor donec 32 56
#> 3 blandit scelerisquemattis 58 83
#> 4 cras 84 88
#> 5 quis mi 89 96
#> 6 aliquam sagittis . 98 115
Search
literature_search(query = "cellular", count = 5)
#> $result
#> $result$publications
#> # A tibble: 5 × 18
#> pmid pmcid
#> * <chr> <lgl>
#> 1 12944235 NA
#> 2 27417120 NA
#> 3 8789268 NA
#> 4 2987169 NA
#> 5 15088773 NA
#> # ... with 16 more variables: title <chr>, authors <list>,
#> # grantIds <list>, grantAgencies <list>, retractions <list>,
#> # journal <chr>, journalShort <chr>, day <int>, month <int>, year <int>,
#> # meshHeadings <list>, hasAbstract <lgl>, score <dbl>, snippets <list>,
#> # abstract <chr>, affiliation <list>
#>
#> $result$facets
#> list()
#>
#> $result$debugInfo
#> [1] "grantId:cellular^20.0 abstract:cellular^5.0 journal:cellular author:cellular^3.0 title:cellular^10.0 abstract_exact:cellular^10.0 year_search:cellular^10.0 pmid_search:cellular^10.0 pmcid_search:cellular^10.0 title_exact:cellular^20.0 meshHeading:cellular^7.0"
#>
#> $result$offset
#> [1] 0
#>
#> $result$resultCount
#> [1] 797252
#>
#>
#> $query
#> $query$clauses
#> category property query expansion id quoted require forbid
#> 1 NA NA cellular NULL NA FALSE FALSE FALSE
#>
#> $query$operator
#> [1] "AND"
#>
#>
#> $messages
#> list()
Get retractions
out <- literature_retractions()
out[1:20]
#> [1] "21386829" "21967191" "16519442" "23675629" "15968000" "16934686"
#> [7] "10318977" "16373573" "22360771" "9593639" "19723695" "11108151"
#> [13] "18410446" "17251587" "23551690" "26389933" "27163758" "22992046"
#> [19] "12554767" "11146662"
Then get info on some articles
arts <- literature_pmid(pmid = out[1:2])
lapply(arts, "[[", "title")
#> [[1]]
#> [1] "Treatment of articulatory dysfunction in Parkinson's disease using repetitive transcranial magnetic stimulation."
#>
#> [[2]]
#> [1] "Molecular pathways underlying IBD-associated colorectal neoplasia: therapeutic implications."
Search for data
out <- federation_search("cellular")
out$query
#> $clauses
#> category property query expansion id quoted require forbid
#> 1 NA NA cellular NULL NA FALSE FALSE FALSE
#>
#> $operator
#> [1] "AND"
head(out$result$results)
#> parentCategory category db
#> 1 Entity Type Molecule PeptideAtlas
#> 2 Type of Data Phenotype IMPC
#> 3 Type of Data Biospecimen Coriell Cell Repositories
#> 4 Entity Type Cell Coriell Cell Repositories
#> 5 Type of Data Cell line Coriell Cell Repositories
#> 6 Type of Data Expression GEO
#> indexable nifId count totalCount snippets
#> 1 MassSpec nif-0000-03266-2 4 76 NULL
#> 2 KnockoutPhenotypes nlx_151660-1 3 22344 NULL
#> 3 NIGMS nif-0000-00182-2 11 12986 NULL
#> 4 NIGMS nif-0000-00182-2 11 12986 NULL
#> 5 NIGMS nif-0000-00182-2 11 12986 NULL
#> 6 MicroarrayExperiment nif-0000-00142-1 4340 68725 NULL
#> summaryString
#> 1 PeptideAtlas: MassSpec (4)[]
#> 2 IMPC: KnockoutPhenotypes (3)[]
#> 3 Coriell Cell Repositories: NIGMS (11)[]
#> 4 Coriell Cell Repositories: NIGMS (11)[]
#> 5 Coriell Cell Repositories: NIGMS (11)[]
#> 6 GEO: MicroarrayExperiment (4340)[]
Get some data
out <- federation_data(id = "nlx_152871-2")
out$result$result
#> # A tibble: 20 × 11
#> Gene
#> <chr>
#> 1 B cell CLL/lymphoma 6, member B
#> 2 cyclin-dependent kinase 4
#> 3 calcium channel, voltage-dependent, beta 2 subunit
#> 4 clarin 3
#> 5 afamin
#> 6 Fas death domain-associated protein
#> 7 ring finger protein 14
#> 8 DnaJ (Hsp40) homolog, subfamily C, member 21
#> 9 protein inhibitor of activated STAT 1
#> 10 erythropoietin receptor
#> 11 COP9 (constitutive photomorphogenic) homolog, subunit 3 (Arabidopsis thalia
#> 12 proteasome (prosome, macropain) 26S subunit, ATPase 2
#> 13 speedy homolog A (Xenopus laevis)
#> 14 apolipoprotein M
#> 15 zinc finger protein 623
#> 16 P450 (cytochrome) oxidoreductase
#> 17 erythropoietin receptor
#> 18 zinc and ring finger 1
#> 19 solute carrier family 13 (sodium/sulfate symporters), member 1
#> 20 snail homolog 2 (Drosophila)
#> # ... with 10 more variables: Anatomical.Component <chr>,
#> # Assay.Type <chr>, Theiler.Stage <chr>, Tissue <chr>,
#> # Expression.Strength <chr>, Expression.Pattern <chr>,
#> # Expression.Pattern.Location <chr>, Authors <chr>, Notes <chr>,
#> # Source <chr>