dmtools_intro

Installation

library(dmtools)

Overview

For checking the dataset from EDC in clinical trials. Notice, your dataset should have a postfix( _v1 ) or a prefix( v1_ ) in the names of variables. Column names should be unique.

Usage

laboratory

For laboratory check, you need to create the excel table like in the example.

lab reference ranges
AGELOW AGEHIGH SEX LBTEST LBORRES LBNDIND LBORNRLO LBORNRHI
18 45 f|m gluc gluc gluc_res 3.9 5.9
18 45 m ast ast ast_res 0 42
18 45 f ast ast ast_res 0 39
dataset
id age sex gluc_v1 gluc_res_v1 ast_v2 ast_res_v2
01 19 f 5.5 norm 30 norm
02 20 m 4.1 NA 48 norm
03 22 m 9.7 norm 31 norm
# "norm" and "no" it is an example, necessary variable for the estimate, get from the dataset
refs <- system.file("labs_refer.xlsx", package = "dmtools")
obj_lab <- lab(refs, id, age, sex, "norm", "no")
obj_lab <- obj_lab %>% check(df)

# ok - analysis, which has a correct estimate of the result
obj_lab %>% choose_test("ok")
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 01  19   f   gluc    gluc   _v1      3.9      5.9     5.5    norm
#> 2 01  19   f    ast     ast   _v2      0.0     39.0      30    norm
#> 3 03  22   m    ast     ast   _v2      0.0     42.0      31    norm
#>   RES_TYPE_NUM IND_EXPECTED
#> 1          5.5         norm
#> 2         30.0         norm
#> 3         31.0         norm

# mis - analysis, which has an incorrect estimate of the result
obj_lab %>% choose_test("mis")
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 02  20   m    ast     ast   _v2      0.0     42.0      48    norm
#> 2 03  22   m   gluc    gluc   _v1      3.9      5.9     9.7    norm
#>   RES_TYPE_NUM IND_EXPECTED
#> 1         48.0           no
#> 2          9.7           no

# skip - analysis, which has an empty value of the estimate
obj_lab %>% choose_test("skip")
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 02  20   m   gluc    gluc   _v1      3.9      5.9     4.1    <NA>
#>   RES_TYPE_NUM IND_EXPECTED
#> 1          4.1         <NA>

# all analyzes 
obj_lab %>% get_result()
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 01  19   f   gluc    gluc   _v1      3.9      5.9     5.5    norm
#> 2 01  19   f    ast     ast   _v2      0.0     39.0      30    norm
#> 3 02  20   m   gluc    gluc   _v1      3.9      5.9     4.1    <NA>
#> 4 02  20   m    ast     ast   _v2      0.0     42.0      48    norm
#> 5 03  22   m   gluc    gluc   _v1      3.9      5.9     9.7    norm
#> 6 03  22   m    ast     ast   _v2      0.0     42.0      31    norm
#>   RES_TYPE_NUM IND_EXPECTED IS_RIGHT
#> 1          5.5         norm     TRUE
#> 2         30.0         norm     TRUE
#> 3          4.1         <NA>       NA
#> 4         48.0           no    FALSE
#> 5          9.7           no    FALSE
#> 6         31.0         norm     TRUE

dates

For dates check, you need to create the excel table like in the example.

timeline
VISITNUM VISIT MINUS PLUS VISITDY STARTDAT STARTVISIT IS_EQUAL EQUALDAT
E1 screening 0 3 0 screen_date_E1 date of screening F NA
E2 rand 0 0 0 rand_date_E2 date of randomization T rand_date_E2
E3 visit 2 1 1 5 rand_date_E2 date of randomization T ph_date_E3
dataset
id screen_date_E1 rand_date_E2 ph_date_E3 bio_date_E3
01 1991-03-13 1991-03-15 1991-03-21 1991-03-23
02 1991-03-07 1991-03-11 1991-03-16 1991-03-16
03 1991-03-08 1991-03-10 1991-03-16 1991-03-16
# use parameter str_date for search columns with dates, default:"DAT"
dates <- system.file("dates.xlsx", package = "dmtools")
obj_date <- date(dates, id, dplyr::contains, dplyr::matches)
obj_date <- obj_date %>% check(df)

# out - dates, which are out of the protocol's timeline
obj_date %>% choose_test("out")
#>   id            STARTVISIT   STARTDAT   VISIT        TERM     VISDAT
#> 1 01 date of randomization 1991-03-15 visit 2 bio_date_E3 1991-03-23
#>                          PLANDAT DAYS_OUT
#> 1 1991-03-19 UTC--1991-03-21 UTC        2

# uneq - dates, which are unequal
obj_date %>% choose_test("uneq")
#>   id   VISIT        TERM     VISDAT   EQUALDAT IS_TIMELINE
#> 1 01 visit 2 bio_date_E3 1991-03-23 1991-03-21       FALSE

# ok - correct dates
obj_date %>% choose_test("ok")
#>    id            STARTVISIT   STARTDAT     VISIT           TERM     VISDAT
#> 1  01     date of screening 1991-03-13 screening screen_date_E1 1991-03-13
#> 2  01 date of randomization 1991-03-15      rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization 1991-03-15   visit 2     ph_date_E3 1991-03-21
#> 4  02     date of screening 1991-03-07 screening screen_date_E1 1991-03-07
#> 5  02 date of randomization 1991-03-11      rand   rand_date_E2 1991-03-11
#> 6  02 date of randomization 1991-03-11   visit 2     ph_date_E3 1991-03-16
#> 7  02 date of randomization 1991-03-11   visit 2    bio_date_E3 1991-03-16
#> 8  03     date of screening 1991-03-08 screening screen_date_E1 1991-03-08
#> 9  03 date of randomization 1991-03-10      rand   rand_date_E2 1991-03-10
#> 10 03 date of randomization 1991-03-10   visit 2     ph_date_E3 1991-03-16
#> 11 03 date of randomization 1991-03-10   visit 2    bio_date_E3 1991-03-16
#>                           PLANDAT   EQUALDAT
#> 1  1991-03-13 UTC--1991-03-16 UTC 1991-03-13
#> 2  1991-03-15 UTC--1991-03-15 UTC 1991-03-15
#> 3  1991-03-19 UTC--1991-03-21 UTC 1991-03-21
#> 4  1991-03-07 UTC--1991-03-10 UTC 1991-03-07
#> 5  1991-03-11 UTC--1991-03-11 UTC 1991-03-11
#> 6  1991-03-15 UTC--1991-03-17 UTC 1991-03-16
#> 7  1991-03-15 UTC--1991-03-17 UTC 1991-03-16
#> 8  1991-03-08 UTC--1991-03-11 UTC 1991-03-08
#> 9  1991-03-10 UTC--1991-03-10 UTC 1991-03-10
#> 10 1991-03-14 UTC--1991-03-16 UTC 1991-03-16
#> 11 1991-03-14 UTC--1991-03-16 UTC 1991-03-16

# all dates
obj_date %>% get_result()
#>    id            STARTVISIT   STARTDAT     VISIT           TERM     VISDAT
#> 1  01     date of screening 1991-03-13 screening screen_date_E1 1991-03-13
#> 2  01 date of randomization 1991-03-15      rand   rand_date_E2 1991-03-15
#> 3  01 date of randomization 1991-03-15   visit 2     ph_date_E3 1991-03-21
#> 4  01 date of randomization 1991-03-15   visit 2    bio_date_E3 1991-03-23
#> 5  02     date of screening 1991-03-07 screening screen_date_E1 1991-03-07
#> 6  02 date of randomization 1991-03-11      rand   rand_date_E2 1991-03-11
#> 7  02 date of randomization 1991-03-11   visit 2     ph_date_E3 1991-03-16
#> 8  02 date of randomization 1991-03-11   visit 2    bio_date_E3 1991-03-16
#> 9  03     date of screening 1991-03-08 screening screen_date_E1 1991-03-08
#> 10 03 date of randomization 1991-03-10      rand   rand_date_E2 1991-03-10
#> 11 03 date of randomization 1991-03-10   visit 2     ph_date_E3 1991-03-16
#> 12 03 date of randomization 1991-03-10   visit 2    bio_date_E3 1991-03-16
#>                           PLANDAT   EQUALDAT IS_TIMELINE IS_EQUAL DAYS_OUT
#> 1  1991-03-13 UTC--1991-03-16 UTC 1991-03-13        TRUE     TRUE        0
#> 2  1991-03-15 UTC--1991-03-15 UTC 1991-03-15        TRUE     TRUE        0
#> 3  1991-03-19 UTC--1991-03-21 UTC 1991-03-21        TRUE     TRUE        0
#> 4  1991-03-19 UTC--1991-03-21 UTC 1991-03-21       FALSE    FALSE        2
#> 5  1991-03-07 UTC--1991-03-10 UTC 1991-03-07        TRUE     TRUE        0
#> 6  1991-03-11 UTC--1991-03-11 UTC 1991-03-11        TRUE     TRUE        0
#> 7  1991-03-15 UTC--1991-03-17 UTC 1991-03-16        TRUE     TRUE        0
#> 8  1991-03-15 UTC--1991-03-17 UTC 1991-03-16        TRUE     TRUE        0
#> 9  1991-03-08 UTC--1991-03-11 UTC 1991-03-08        TRUE     TRUE        0
#> 10 1991-03-10 UTC--1991-03-10 UTC 1991-03-10        TRUE     TRUE        0
#> 11 1991-03-14 UTC--1991-03-16 UTC 1991-03-16        TRUE     TRUE        0
#> 12 1991-03-14 UTC--1991-03-16 UTC 1991-03-16        TRUE     TRUE        0

dplyr::contains - A function, which select necessary visit or event e.g. dplyr::start_with, dplyr::contains. It works like df %>% select(contains("E1")). You also can use dplyr::start_with, works like df %>% select(start_with("V1"))

dplyr::matches - A function, which select dates from necessary visit e.g. dplyr::matches, dplyr::contains. It works like visit_one %>% select(contains("DAT")), default: dplyr::contains()

sites

If the clinical trial has different sites and lab reference ranges.

lab reference ranges s01
AGELOW AGEHIGH SEX LBTEST LBORRES LBNDIND LBORNRLO LBORNRHI
18 45 f|m gluc gluc gluc_res 4.0 5.9
18 40 m ast ast ast_res 0 41
18 39 f ast ast ast_res 0 43
lab reference ranges s02
AGELOW AGEHIGH SEX LBTEST LBORRES LBNDIND LBORNRLO LBORNRHI
18 45 f|m gluc gluc gluc_res 4.2 6.1
18 40 m ast ast ast_res 0 35
19 41 f ast ast ast_res 0 41
dataset
site id age sex gluc_v1 gluc_res_v1 ast_v2 ast_res_v2
site 01 01 19 f 5.5 norm 30 NA
site 02 02 20 m 4.1 no 48 norm
refs_s01 <- system.file("labs_refer_s01.xlsx", package = "dmtools")
refs_s02 <- system.file("labs_refer_s02.xlsx", package = "dmtools")

s01_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01")
s02_lab <- lab(refs_s02, id, age, sex, "norm", "no", site = "site 02")

labs <- list(s01_lab, s02_lab)
labs <- labs %>% check_sites(df, site)

# mis - analysis, which has an incorrect estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 02  20   m    ast     ast   _v2        0       35      48    norm
#>   RES_TYPE_NUM IND_EXPECTED num_site
#> 1           48           no  site 02

# ok - analysis, which has a correct estimate of the result
labs %>% test_sites(function (lab) choose_test(lab, "ok")) 
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 01  19   f   gluc    gluc   _v1      4.0      5.9     5.5    norm
#> 2 02  20   m   gluc    gluc   _v1      4.2      6.1     4.1      no
#>   RES_TYPE_NUM IND_EXPECTED num_site
#> 1          5.5         norm  site 01
#> 2          4.1           no  site 02

# skip - analysis, which has an empty value of the estimate
labs %>% test_sites(function (lab) choose_test(lab, "skip"))
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 01  19   f    ast     ast   _v2        0       43      30    <NA>
#>   RES_TYPE_NUM IND_EXPECTED num_site
#> 1           30         <NA>  site 01

# all analyzes
labs %>% test_sites(function (lab) get_result(lab))
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 01  19   f   gluc    gluc   _v1      4.0      5.9     5.5    norm
#> 2 01  19   f    ast     ast   _v2      0.0     43.0      30    <NA>
#> 3 02  20   m   gluc    gluc   _v1      4.2      6.1     4.1      no
#> 4 02  20   m    ast     ast   _v2      0.0     35.0      48    norm
#>   RES_TYPE_NUM IND_EXPECTED IS_RIGHT num_site
#> 1          5.5         norm     TRUE  site 01
#> 2         30.0         <NA>       NA  site 01
#> 3          4.1           no     TRUE  site 02
#> 4         48.0           no    FALSE  site 02

# you can combine sites, use |
comb_lab <- lab(refs_s01, id, age, sex, "norm", "no", site = "site 01|site 02")
comb_labs <- list(comb_lab)

comb_labs <- comb_labs %>% check_sites(df, site)
comb_labs %>% test_sites(function (lab) choose_test(lab, "mis"))
#>   id age sex LBTEST LBTESCD VISIT LBORNRLO LBORNRHI LBORRES LBNRIND
#> 1 02  20   m   gluc    gluc   _v1        4      5.9     4.1      no
#> 2 02  20   m    ast     ast   _v2        0     41.0      48    norm
#>   RES_TYPE_NUM IND_EXPECTED        num_site
#> 1          4.1         norm site 01|site 02
#> 2         48.0           no site 01|site 02

rename

Function to rename the dataset, using crfs.

rename_dataset("./crfs", "old_name", "new_name", 2)