The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

R mapping

John Mount

2023-08-19

rquery re-maps a number of symbols during SQL translation.

During expression parsing the internal rquery function tokenize_call_for_SQL() implements the following re-mappings from R idioms to SQL notation.

library("rquery")
library("wrapr")

show_translation <- function(strings) {
  vapply(strings,
         function(si) {
           format(rquery::tokenize_for_SQL(parse(text = si, keep.source = FALSE)[[1]], colnames = NULL)$parsed_toks)
         }, character(1))
}

mapping_table <- data.frame(
  example = c('!x', 'is.na(x)', 'ifelse(a, b, c)', 'a^b', 'a%%b', 
               'a==b', 'a&&b', 'a&b', 'a||b', 'a|b', 
              'pmin(a, b)', 'pmax(a, b)'),
  stringsAsFactors = FALSE)
mapping_table$translation <- show_translation(mapping_table$example)
knitr::kable(mapping_table)

example	translation
!x	( NOT ( x ) )
is.na(x)	( ( x ) IS NULL )
ifelse(a, b, c)	( CASE WHEN ( a ) THEN ( b ) WHEN NOT ( a ) THEN ( c ) ELSE NULL END )
a^b	POWER ( a , b )
a%%b	MOD ( a , b )
a==b	a = b
a&&b	a AND b
a&b	a AND b
a\|\|b	a OR b
a\|b	a OR b
pmin(a, b)	( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) <= ( b ) THEN ( a ) ELSE ( b ) END )
pmax(a, b)	( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) >= ( b ) THEN ( a ) ELSE ( b ) END )

Note: not all possible mappings are implemented. For example we currently do not re-map %in%, preferring the user to explicitly work with set_indicator() directly.

In addition to this the database connectors can specify additional re-mappings. This can be found by building a formal connector and inspecting the re-mappings.

have_RSQLite <- requireNamespace("RSQLite", quietly = TRUE)

raw_RSQLite_connection <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
RSQLite::initExtension(raw_RSQLite_connection)
db <- rquery_db_info(
  connection = raw_RSQLite_connection,
  is_dbi = TRUE,
  connection_options = rq_connection_tests(raw_RSQLite_connection))


fn_name_map <- db$connection_options[[paste0("rquery.", rq_connection_name(db), ".", "fn_name_map")]]
fn_name_map

## NULL

We see above that “mean” is re-mapped to “avg”.

In all cases we can see what re-mappings happen by examining a query.

d_local <- build_frame(
   "subjectID", "surveyCategory"     , "assessmentTotal", "irrelevantCol1", "irrelevantCol2" |
   1L         , "withdrawal behavior", 5                , "irrel1"        , "irrel2"         |
   1L         , "positive re-framing", 2                , "irrel1"        , "irrel2"         |
   3L         , "withdrawal behavior", 3                , "irrel1"        , "irrel2"         |
   3L         , "positive re-framing", 2                , "irrel1"        , "irrel2"         |
   3L         , "other"              , 1                , "irrel1"        , "irrel2"         )
table_handle <- rq_copy_to(db, 'd',
            d_local,
            temporary = TRUE, 
            overwrite = TRUE)
print(table_handle)

## [1] "mk_td(\"d\", c( \"subjectID\", \"surveyCategory\", \"assessmentTotal\", \"irrelevantCol1\", \"irrelevantCol2\"))"
##   subjectID      surveyCategory assessmentTotal irrelevantCol1 irrelevantCol2
## 1         1 withdrawal behavior               5         irrel1         irrel2
## 2         1 positive re-framing               2         irrel1         irrel2
## 3         3 withdrawal behavior               3         irrel1         irrel2
## 4         3 positive re-framing               2         irrel1         irrel2
## 5         3               other               1         irrel1         irrel2

ops <- table_handle %.>% 
  project(., 
          avg_total := avg(pmax(0, assessmentTotal)),
          groupby = "subjectID")

cat(to_sql(ops, db))

## SELECT `subjectID`, avg ( ( CASE WHEN ( 0 ) IS NULL THEN ( `assessmentTotal` ) WHEN ( `assessmentTotal` ) IS NULL THEN ( 0 ) WHEN ( 0 ) >= ( `assessmentTotal` ) THEN ( 0 ) ELSE ( `assessmentTotal` ) END ) ) AS `avg_total` FROM (
##  SELECT
##   `subjectID`,
##   `assessmentTotal`
##  FROM
##   `d`
##  ) tsql_47208226594582856777_0000000000
## GROUP BY
##  `subjectID`

ops %.>%
  execute(db, .) %.>%
  knitr::kable(.)

subjectID	avg_total
1	3.5
3	2.0

The basic mappings are stored in database option structures, and depend on the database. For example MOD is re-mapped back to % for RSQLite.

rquery::rq_function_mappings(db) %.>%
  knitr::kable(.)

fn_name	sql_mapping
as.Date	to_date ( .(3) , ‘YYYY-MM-DD’ )
n	COUNT ( 1 )
mean	AVG ( .(3) )
cumsum	SUM ( .(3) )
shift	LAG ( .(3) )
MOD	( .(3) % .(5) )
rand	ABS ( ( RANDOM ( ) % 268435456 ) / 268435455.0 )

ops <- table_handle %.>% 
  project(., groupby = "subjectID",
          n := 5, 
          count := n(),
          mean := mean(assessmentTotal)) %.>% 
  extend(., was_n := n)
                 
cat(to_sql(ops, db))

## SELECT
##  `subjectID`,
##  `n`,
##  `count`,
##  `mean`,
##  `n`  AS `was_n`
## FROM (
##  SELECT `subjectID`, 5 AS `n`, COUNT ( 1 ) AS `count`, AVG ( `assessmentTotal` ) AS `mean` FROM (
##   SELECT
##    `subjectID`,
##    `assessmentTotal`
##   FROM
##    `d`
##   ) tsql_15438122330756525169_0000000000
##  GROUP BY
##   `subjectID`
##  ) tsql_15438122330756525169_0000000001

ops %.>%
  execute(db, .) %.>%
  knitr::kable(.)

subjectID	n	count	mean	was_n
1	5	2	3.5	5
3	5	3	2.0	5

Additional function re-mappings can be specified by user code. One such example is re-writing MOD as % for RSQLite.

ops <- table_handle %.>% 
  extend(., z := 1 + subjectID %% 3) %.>%
  select_columns(., c("subjectID", "z"))
                 
cat(to_sql(ops, db))

## SELECT
##  `subjectID`,
##  `z`
## FROM (
##  SELECT
##   `subjectID`,
##   1 + ( `subjectID` % 3 )  AS `z`
##  FROM (
##   SELECT
##    `subjectID`
##   FROM
##    `d`
##   ) tsql_93440808473188090527_0000000000
## ) tsql_93440808473188090527_0000000001

ops %.>%
  execute(db, .) %.>%
  knitr::kable(.)

subjectID	z
1	2
1	2
3	1
3	1
3	1

rqdatatable also supplies some re-mappings (described here). This can allow us to use a uniform notation for tasks such as random number generation to allow portable pipelines.

DBI::dbDisconnect(raw_RSQLite_connection)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.