rquery
re-maps a number of symbols during SQL
translation.
During expression parsing the internal rquery
function tokenize_call_for_SQL()
implements the following re-mappings from R
idioms to SQL
notation.
library("rquery")
library("wrapr")
show_translation <- function(strings) {
vapply(strings,
function(si) {
format(rquery::tokenize_for_SQL(str2lang(si), colnames = NULL)$parsed_toks)
}, character(1))
}
mapping_table <- data.frame(
example = c('!x', 'is.na(x)', 'ifelse(a, b, c)', 'a^b', 'a%%b',
'a==b', 'a&&b', 'a&b', 'a||b', 'a|b',
'pmin(a, b)', 'pmax(a, b)'),
stringsAsFactors = FALSE)
mapping_table$translation <- show_translation(mapping_table$example)
knitr::kable(mapping_table)
example | translation |
---|---|
!x | ( NOT ( x ) ) |
is.na(x) | ( ( x ) IS NULL ) |
ifelse(a, b, c) | ( CASE WHEN ( a ) THEN ( b ) WHEN NOT ( a ) THEN ( c ) ELSE NULL END ) |
a^b | POWER ( a , b ) |
a%%b | MOD ( a , b ) |
a==b | a = b |
a&&b | a AND b |
a&b | a AND b |
a||b | a OR b |
a|b | a OR b |
pmin(a, b) | ( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) <= ( b ) THEN ( a ) ELSE ( b ) END ) |
pmax(a, b) | ( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) >= ( b ) THEN ( a ) ELSE ( b ) END ) |
Note: not all possible mappings are implemented. For example we currently do not re-map %in%
, preferring the user to explicitly work with set_indicator()
directly.
In addition to this the database connectors can specify additional re-mappings. This can be found by building a formal connector and inspecting the re-mappings.
raw_connection <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
RSQLite::initExtension(raw_connection)
db <- rquery_db_info(
connection = raw_connection,
is_dbi = TRUE,
connection_options = rq_connection_tests(raw_connection))
# RSQLite has a non-standard modulo operator
db$expr_map[["MOD"]] <- list(pre_sql_token("("),
3,
pre_sql_token("%"),
5,
pre_sql_token(")"))
fn_name_map <- db$connection_options[[paste0("rquery.", rq_connection_name(db), ".", "fn_name_map")]]
fn_name_map
## mean
## "avg"
We see above that “mean
” is re-mapped to “avg
”.
In all cases we can see what re-mappings happen by examining a query.
d_local <- build_frame(
"subjectID", "surveyCategory" , "assessmentTotal", "irrelevantCol1", "irrelevantCol2" |
1L , "withdrawal behavior", 5 , "irrel1" , "irrel2" |
1L , "positive re-framing", 2 , "irrel1" , "irrel2" |
3L , "withdrawal behavior", 3 , "irrel1" , "irrel2" |
3L , "positive re-framing", 4 , "irrel1" , "irrel2" )
table_handle <- rq_copy_to(db, 'd',
d_local,
temporary = TRUE,
overwrite = TRUE)
print(table_handle)
## [1] "table(`d`; subjectID, surveyCategory, assessmentTotal, irrelevantCol1, irrelevantCol2)"
ops <- table_handle %.>%
project(.,
avg_total := avg(pmax(0, assessmentTotal)),
groupby = "subjectID")
cat(to_sql(ops, db))
## SELECT `subjectID`, avg ( ( CASE WHEN ( 0 ) IS NULL THEN ( `assessmentTotal` ) WHEN ( `assessmentTotal` ) IS NULL THEN ( 0 ) WHEN ( 0 ) >= ( `assessmentTotal` ) THEN ( 0 ) ELSE ( `assessmentTotal` ) END ) ) AS `avg_total` FROM (
## SELECT
## `subjectID`,
## `assessmentTotal`
## FROM
## `d`
## ) tsql_35868337402762883376_0000000000
## GROUP BY
## `subjectID`
subjectID | avg_total |
---|---|
1 | 3.5 |
3 | 3.5 |
Additional function re-mappings can be specified by user code. One such example is re-writing MOD
as %
for RSQLite
.
fn_name | sql_mapping | simple_name_mapping |
---|---|---|
mean | avg | TRUE |
as.Date | to_date ( .(3) , ‘YYYY-MM-DD’ ) | FALSE |
MOD | ( .(3) % .(5) ) | FALSE |
## SELECT
## `subjectID`,
## `surveyCategory`,
## `assessmentTotal`,
## `irrelevantCol1`,
## `irrelevantCol2`,
## ( `subjectID` % 3 ) AS `z`
## FROM (
## SELECT
## `subjectID`,
## `surveyCategory`,
## `assessmentTotal`,
## `irrelevantCol1`,
## `irrelevantCol2`
## FROM
## `d`
## ) tsql_14279028501846047745_0000000000
## subjectID surveyCategory assessmentTotal irrelevantCol1
## 1 1 withdrawal behavior 5 irrel1
## 2 1 positive re-framing 2 irrel1
## 3 3 withdrawal behavior 3 irrel1
## 4 3 positive re-framing 4 irrel1
## irrelevantCol2 z
## 1 irrel2 1
## 2 irrel2 1
## 3 irrel2 0
## 4 irrel2 0