On most platforms, starting the MongoDB server is as easy as:
mongod
To install the latest development version:
library(devtools)
install_github("jeroenooms/jsonlite")
install_github("jeroenooms/mongolite")
# Init connection to local mongod
library(mongolite)
m <- mongo(collection = "diamonds")
# Insert test data
data(diamonds, package="ggplot2")
m$insert(diamonds)
# Check records
m$count()
nrow(diamonds)
# Perform a query and retrieve data
out <- m$find('{"cut" : "Premium", "price" : { "$lt" : 1000 } }')
# Compare
nrow(out)
nrow(subset(diamonds, cut == "Premium" & price < 1000))
Some example queries from the dplyr tutorials.
# Insert some data
data(flights, package = "nycflights13")
m <- mongo(collection = "nycflights")
m$insert(flights)
# Basic queries
m$count('{"month":1, "day":1}')
jan1 <- m$find('{"month":1, "day":1}')
# Sorting
jan1 <- m$find('{"$query":{"month":1,"day":1}, "$orderby":{"distance":-1}}')
head(jan1)
# Select columns
jan1 <- m$find('{"month":1,"day":1}', fields = '{"_id":0, "distance":1, "carrier":1}')
# Tabulate
m$aggregate('[{"$group":{"_id":"$carrier", "count": {"$sum":1}, "average":{"$avg":"$distance"}}}]')
Example data with zipcodes from mongolite tutorial. This dataset has an _id
column so you cannot insert it more than once.
library(jsonlite)
library(mongolite)
# Stream from url into mongo
m <- mongo("zips")
stream_in(url("http://media.mongodb.org/zips.json"), handler = function(df){
m$insert(df, verbose = FALSE)
})
# Check count
m$count()
# Import. Note the 'location' column is actually an array!
zips <- m$find()
Stream large bulk samples from openweathermap with deeply nested data (takes a while).
m <- mongo("weather")
stream_in(gzcon(url("http://78.46.48.103/sample/daily_14.json.gz")), handler = function(df){
m$insert(df, verbose = FALSE)
}, pagesize = 50)
berlin <- m$find('{"city.name" : "Berlin"}')
print(berlin$data)