Faceted and highlighted searches

In addition to searchplos() and related searching functions, there are a few slightly different ways to search: faceting and highlighted searches. Faceting allows you to ask, e.g., how many articles are published in each of the PLOS journals. Highlighting allows you to ask, e.g., highlight terms that I search for in the text results given back, which can make downstream processing easier, and help visualize search results (see highbrow() below).

Load package from CRAN

install.packages("rplos")
library('rplos')

Faceted search

Facet by journal

facetplos(q='*:*', facet.field='journal')
#> $facet_queries
#> NULL
#> 
#> $facet_fields
#> $facet_fields$journal
#>                                  X1     X2
#> 1                          plos one 955046
#> 2                     plos genetics  41615
#> 3                    plos pathogens  36230
#> 4        plos computational biology  30544
#> 5                      plos biology  26456
#> 6  plos neglected tropical diseases  26120
#> 7                     plos medicine  18672
#> 8              plos clinical trials    521
#> 9                  plos collections     15
#> 10                     plos medicin      9
#> 
#> 
#> $facet_dates
#> NULL
#> 
#> $facet_ranges
#> NULL

Using facet.query to get counts

facetplos(q='*:*', facet.field='journal', facet.query='cell,bird')
#> $facet_queries
#>        term value
#> 1 cell,bird    18
#> 
#> $facet_fields
#> $facet_fields$journal
#>                                  X1     X2
#> 1                          plos one 955046
#> 2                     plos genetics  41615
#> 3                    plos pathogens  36230
#> 4        plos computational biology  30544
#> 5                      plos biology  26456
#> 6  plos neglected tropical diseases  26120
#> 7                     plos medicine  18672
#> 8              plos clinical trials    521
#> 9                  plos collections     15
#> 10                     plos medicin      9
#> 
#> 
#> $facet_dates
#> NULL
#> 
#> $facet_ranges
#> NULL

Date faceting

facetplos(q='*:*', url=url, facet.date='publication_date',
  facet.date.start='NOW/DAY-5DAYS', facet.date.end='NOW', facet.date.gap='+1DAY')
#> $facet_queries
#> NULL
#> 
#> $facet_fields
#> NULL
#> 
#> $facet_dates
#> $facet_dates$publication_date
#>                   date value
#> 1 2015-01-17T00:00:00Z     0
#> 2 2015-01-18T00:00:00Z     0
#> 3 2015-01-19T00:00:00Z   406
#> 4 2015-01-20T00:00:00Z   930
#> 5 2015-01-21T00:00:00Z   754
#> 6 2015-01-22T00:00:00Z   230
#> 
#> 
#> $facet_ranges
#> NULL

Highlighted search

Search for the term alcohol in the abstracts of articles, return only 10 results

highplos(q='alcohol', hl.fl = 'abstract', rows=2)
#> $`10.1371/journal.pmed.0040151`
#> $`10.1371/journal.pmed.0040151`$abstract
#> [1] "Background: <em>Alcohol</em> consumption causes an estimated 4% of the global disease burden, prompting"
#> 
#> 
#> $`10.1371/journal.pone.0027752`
#> $`10.1371/journal.pone.0027752`$abstract
#> [1] "Background: The negative influences of <em>alcohol</em> on TB management with regard to delays in seeking"

Search for the term alcohol in the abstracts of articles, and return fragment size of 20 characters, return only 5 results

highplos(q='alcohol', hl.fl='abstract', hl.fragsize=20, rows=2)
#> $`10.1371/journal.pmed.0040151`
#> $`10.1371/journal.pmed.0040151`$abstract
#> [1] "Background: <em>Alcohol</em>"
#> 
#> 
#> $`10.1371/journal.pone.0027752`
#> $`10.1371/journal.pone.0027752`$abstract
#> [1] " of <em>alcohol</em> on TB management"

Search for the term experiment across all sections of an article, return id (DOI) and title fl only, search in full articles only (via fq='doc_type:full'), and return only 10 results

highplos(q='everything:"experiment"', fl='id,title', fq='doc_type:full',
   rows=2)
#> $`10.1371/journal.pone.0039681`
#> $`10.1371/journal.pone.0039681`$everything
#> [1] " Selection of Transcriptomics <em>Experiments</em> Improves Guilt-by-Association Analyses Transcriptomics <em>Experiment</em>"
#> 
#> 
#> $`10.1371/annotation/9b8741e2-0f5f-49f9-9eaa-1b0cb9b8d25f`
#> $`10.1371/annotation/9b8741e2-0f5f-49f9-9eaa-1b0cb9b8d25f`$everything
#> [1] " in the labels under the bars. The labels should read <em>Experiment</em> 3 / <em>Experiment</em> 4 instead of <em>Experiment</em>"

Visualize highligted searches

Browse highlighted fragments in your default browser

This first examle, we only looko at 10 results

out <- highplos(q='alcohol', hl.fl = 'abstract', rows=10)
highbrow(out)

highbrow1

But it works quickly with lots of results too

out <- highplos(q='alcohol', hl.fl = 'abstract', rows=1200)
highbrow(out)

highbrow2