Make sure to configure the library at the begining of every new R session. To do so, invoke dwapi::configure()
passing the data.world authentication token obtained at https://data.world/settings/advanced
Use dwapi::create_dataset()
to create a new dataset. The library includes number of constructor functions to facilitate the praparation of complex requests like this. The example here is dwapi::dataset_create_request()
.
create_cars_dataset = dwapi::dataset_create_request(
title = sprintf("My cars dataset %s", runif(1)),
visibility = "PRIVATE",
license_string = "Other"
)
cars_dataset = dwapi::create_dataset(Sys.getenv("DW_USER"), create_cars_dataset)
cars_dataset
#> $uri
#> [1] "https://data.world/rflprr/my-cars-dataset-0-446630908409134"
#>
#> $message
#> [1] "Dataset created successfully."
#>
#> attr(,"class")
#> [1] "create_dataset_response"
Additional information can be added over time, with dataset updates.
update_cars_dataset = dwapi::dataset_update_request(
description = "This is a dataset created from R's cars dataset."
)
dwapi::update_dataset(cars_dataset$uri, update_cars_dataset)
#> https://api.data.world/v0/datasets/rflprr/my-cars-dataset-0-446630908409134
#> $message
#> [1] "Dataset updated successfully."
#>
#> attr(,"class")
#> [1] "success_message"
Files can be added via URL, from the local file system, or directly as a data frame.
upload_response <- dwapi::upload_data_frame(cars_dataset$uri, cars, "cars.csv")
#> tmp file /var/folders/vq/lcbclw1d7hg68t717fz0_ggr0000gn/T//RtmplCY4Pj/filee1ed3187d15dcsv created.
#>
|
| | 0%
|
|=================================================================| 100%
Sys.sleep(10) # Files are processed asyncronously.
upload_response
#> $message
#> [1] "File uploaded."
#>
#> attr(,"class")
#> [1] "success_message"
data.world extracts tabular data from various tabular data formats. Tables are a logical representation of tabular data that has been extracted and normalized.
tables = dwapi::list_tables(cars_dataset$uri)
tables
#> [1] "cars"
At this point, it is possible to review the schema of dataset tables.
dwapi::get_table_schema(cars_dataset$uri, tables[[1]])
#> $fields
#> $fields[[1]]
#> $fields[[1]]$name
#> [1] "speed"
#>
#> $fields[[1]]$title
#> [1] "speed"
#>
#> $fields[[1]]$description
#> NULL
#>
#> $fields[[1]]$rdf_type
#> [1] "http://www.w3.org/2001/XMLSchema#integer"
#>
#>
#> $fields[[2]]
#> $fields[[2]]$name
#> [1] "dist"
#>
#> $fields[[2]]$title
#> [1] "dist"
#>
#> $fields[[2]]$description
#> NULL
#>
#> $fields[[2]]$rdf_type
#> [1] "http://www.w3.org/2001/XMLSchema#integer"
#>
#>
#>
#> attr(,"class")
#> [1] "table_schema_response"
And also, to annotate fields, providing textual description to make datasets easier to understand and work with.
update_cars_schema = dwapi::table_schema_update_request(
fields = list(dwapi::table_schema_field_update_request(name = "speed", description = "Top speed"))
)
dwapi::update_table_schema(cars_dataset$uri, tables[[1]], update_cars_schema)
#> $category
#> [1] "Success"
#>
#> $reason
#> [1] "OK"
#>
#> $message
#> [1] "Success: (200) OK"
dwapi::get_table_schema(cars_dataset$uri, tables[[1]])
#> $fields
#> $fields[[1]]
#> $fields[[1]]$name
#> [1] "speed"
#>
#> $fields[[1]]$title
#> [1] "speed"
#>
#> $fields[[1]]$description
#> [1] "Top speed"
#>
#> $fields[[1]]$rdf_type
#> [1] "http://www.w3.org/2001/XMLSchema#integer"
#>
#>
#> $fields[[2]]
#> $fields[[2]]$name
#> [1] "dist"
#>
#> $fields[[2]]$title
#> [1] "dist"
#>
#> $fields[[2]]$description
#> NULL
#>
#> $fields[[2]]$rdf_type
#> [1] "http://www.w3.org/2001/XMLSchema#integer"
#>
#>
#>
#> attr(,"class")
#> [1] "table_schema_response"
Datasets can be queried using SQL and SPARQL. Once again, it’s important to keep the concept of tables and their names in mind.
sql_query = "SELECT * FROM cars"
dwapi::sql(cars_dataset$uri, sql_query)
#> # A tibble: 50 × 2
#> speed dist
#> <int> <int>
#> 1 4 2
#> 2 4 10
#> 3 11 28
#> 4 12 14
#> 5 12 20
#> 6 12 24
#> 7 12 28
#> 8 13 26
#> 9 13 34
#> 10 13 34
#> # ... with 40 more rows
These are simple examples of the power of data.world’s REST API and of how this library make using it convenient. To learn more, review the complete documentation, starting with ?dwapi
.