Title: | Preprocessing Tools for Clinical Trial Data |
---|---|
Description: | A collection of functions to preprocess data and organize them in a format amenable to use by chevron. |
Authors: | Liming Li [aut, cre], Benoit Falquet [aut], Xiaoli Duan [ctb], Pawel Rucki [ctb], F. Hoffmann-La Roche AG [cph, fnd] |
Maintainer: | Liming Li <[email protected]> |
License: | Apache License 2.0 |
Version: | 0.1.8 |
Built: | 2024-10-24 12:24:12 UTC |
Source: | https://github.com/insightsengineering/dunlin |
Dunlin
PackageA collection of functions to preprocess data and organize them in a format amenable to use by chevron.
Maintainer: Liming Li [email protected]
Authors:
Benoit Falquet [email protected]
Other contributors:
Xiaoli Duan [email protected] [contributor]
Pawel Rucki [email protected] [contributor]
F. Hoffmann-La Roche AG [copyright holder, funder]
Useful links:
Report bugs at https://github.com/insightsengineering/dunlin/issues
Add whisker values
add_whisker(x)
add_whisker(x)
x |
Named ( |
The names of the character gives the string to be replaced and the value gives the new string.
invisible NULL
. Assign the key-value pair provided as argument in the whisker environment.
my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2") add_whisker(my_whiskers)
my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2") add_whisker(my_whiskers)
Convert Rule to List
## S3 method for class 'rule' as.list(x, ...)
## S3 method for class 'rule' as.list(x, ...)
x |
( |
... |
not used. |
an object of class list
.
x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w")) as.list(x)
x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w")) as.list(x)
list
of data.frame
.Assert that all names are among names of a list
of data.frame
.
assert_all_tablenames(db, tab, null_ok = TRUE, qualifier = NULL)
assert_all_tablenames(db, tab, null_ok = TRUE, qualifier = NULL)
db |
( |
tab |
( |
null_ok |
( |
qualifier |
( |
invisible TRUE
or an error message if the criteria are not fulfilled.
lsd <- list( mtcars = mtcars, iris = iris ) assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:")
lsd <- list( mtcars = mtcars, iris = iris ) assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:")
Assert Nested List can be used as Format Argument in Reformat.
assert_valid_format(object)
assert_valid_format(object)
object |
( |
invisible TRUE
or an error message if the criteria are not fulfilled.
format <- list( df1 = list( var1 = rule("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = rule(), var2 = rule("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_format(format)
format <- list( df1 = list( var1 = rule("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = rule(), var2 = rule("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_format(format)
Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.
assert_valid_list_format(object)
assert_valid_list_format(object)
object |
( |
invisible TRUE
or an error message if the criteria are not fulfilled.
format <- list( df1 = list( var1 = list("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = list(), var2 = list("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_list_format(format)
format <- list( df1 = list( var1 = list("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = list(), var2 = list("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_list_format(format)
Setting the Label Attribute
attr_label(var, label)
attr_label(var, label)
var |
( |
label |
( |
object
with label attribute.
x <- c(1:10) attr(x, "label") y <- attr_label(x, "my_label") attr(y, "label")
x <- c(1:10) attr(x, "label") y <- attr_label(x, "my_label") attr(y, "label")
Setting the Label Attribute to Data Frame Columns
attr_label_df(df, label)
attr_label_df(df, label)
df |
( |
label |
( |
data.frame
with label attributes.
res <- attr_label_df(mtcars, letters[1:11]) res lapply(res, attr, "label")
res <- attr_label_df(mtcars, letters[1:11]) res lapply(res, attr, "label")
Reorder Two Columns Levels Simultaneously
co_relevels(df, primary, secondary, levels_primary)
co_relevels(df, primary, secondary, levels_primary)
df |
( |
primary |
( |
secondary |
( |
levels_primary |
( |
The function expect a 1:1 matching between the elements of the two selected column.
a data.frame
with the secondary
column converted to factor with reordered levels.
df <- data.frame( SUBJID = 1:3, PARAMCD = factor(c("A", "B", "C")), PARAM = factor(paste("letter", LETTERS[1:3])) ) co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
df <- data.frame( SUBJID = 1:3, PARAMCD = factor(c("A", "B", "C")), PARAM = factor(paste("letter", LETTERS[1:3])) ) co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
Combine Rules Found in Lists of Rules.
combine_list_rules(x, val, ...)
combine_list_rules(x, val, ...)
x |
( |
val |
( |
... |
passed to |
a list
of rule
objects.
l1 <- list( r1 = rule( "first" = c("overwritten", "OVERWRITTEN"), "almost first" = c(NA, "almost") ), r2 = rule( ANYTHING = "anything" ) ) l2 <- list( r1 = rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ), r3 = rule( SOMETHING = "something" ) ) combine_list_rules(l1, l2)
l1 <- list( r1 = rule( "first" = c("overwritten", "OVERWRITTEN"), "almost first" = c(NA, "almost") ), r2 = rule( ANYTHING = "anything" ) ) l2 <- list( r1 = rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ), r3 = rule( SOMETHING = "something" ) ) combine_list_rules(l1, l2)
Combine Two Rules
combine_rules(x, y, ...)
combine_rules(x, y, ...)
x |
( |
y |
( |
... |
not used. |
a rule
.
The order of the mappings in the resulting rule corresponds to the order of the mappings in x
followed by the
mappings that are only present in y
.
r1 <- rule( "first" = c("from ori rule", "FROM ORI RULE"), "last" = c(NA, "last"), .to_NA = "X", .drop = TRUE ) r2 <- rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ) combine_rules(r1, r2)
r1 <- rule( "first" = c("from ori rule", "FROM ORI RULE"), "last" = c(NA, "last"), .to_NA = "X", .drop = TRUE ) r2 <- rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ) combine_rules(r1, r2)
Cutting data by group
cut_by_group(df, col_data, col_group, group, cat_col)
cut_by_group(df, col_data, col_group, group, cat_col)
df |
( |
col_data |
( |
col_group |
( |
group |
( |
cat_col |
( |
Function used to categorize numeric data stored in long format depending on their group. Intervals are closed on the right (and open on the left).
data.frame
with a column containing categorical values.
group <- list( list( "Height", c(-Inf, 150, 170, Inf), c("=<150", "150-170", ">170") ), list( "Weight", c(-Inf, 65, Inf), c("=<65", ">65") ), list( "Age", c(-Inf, 31, Inf), c("=<31", ">31") ), list( "PreCondition", c(-Inf, 1, Inf), c("=<1", "<1") ) ) data <- data.frame( SUBJECT = rep(letters[1:10], 4), PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10), AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)), index = 1:40 ) cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
group <- list( list( "Height", c(-Inf, 150, 170, Inf), c("=<150", "150-170", ">170") ), list( "Weight", c(-Inf, 65, Inf), c("=<65", ">65") ), list( "Age", c(-Inf, 31, Inf), c("=<31", ">31") ), list( "PreCondition", c(-Inf, 1, Inf), c("=<1", "<1") ) ) data <- data.frame( SUBJECT = rep(letters[1:10], 4), PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10), AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)), index = 1:40 ) cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
Getting Argument From System, Option or Default
get_arg(opt = NULL, sys = NULL, default = NULL, split = ";")
get_arg(opt = NULL, sys = NULL, default = NULL, split = ";")
opt |
( |
sys |
( |
default |
value to return if neither the environment variable nor the option are set. |
split |
( |
if defined, the value of the option (opt
), a character
from the environment variable (sys
) or the
default
in this order of priority.
get_arg("my.option", "MY_ARG", "default") withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default")) withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default"))
get_arg("my.option", "MY_ARG", "default") withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default")) withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default"))
Get Log
get_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' get_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' get_log(data, incl = TRUE, incl.adsl = TRUE)
get_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' get_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' get_log(data, incl = TRUE, incl.adsl = TRUE)
data |
( |
incl |
( |
incl.adsl |
( |
character
or list of character
describing the filtering applied to data
.
data <- log_filter(iris, Sepal.Length >= 7, "xx") data <- log_filter(data, Sepal.Length < 2) data <- log_filter(data, Sepal.Length >= 2, "yy") get_log(data) data <- log_filter( list(iris1 = iris, iris2 = iris), Sepal.Length >= 7, "iris1", character(0), "Sep" ) get_log(data)
data <- log_filter(iris, Sepal.Length >= 7, "xx") data <- log_filter(data, Sepal.Length < 2) data <- log_filter(data, Sepal.Length >= 2, "yy") get_log(data) data <- log_filter( list(iris1 = iris, iris2 = iris), Sepal.Length >= 7, "iris1", character(0), "Sep" ) get_log(data)
adsub
to adsl
Join adsub
to adsl
join_adsub_adsl( adam_db, keys, continuous_var, categorical_var, continuous_suffix, categorical_suffix, drop_na = TRUE, drop_lvl = TRUE ) ## S3 method for class 'list' join_adsub_adsl( adam_db, keys = c("USUBJID", "STUDYID"), continuous_var = "all", categorical_var = "all", continuous_suffix = "", categorical_suffix = "_CAT", drop_na = TRUE, drop_lvl = FALSE )
join_adsub_adsl( adam_db, keys, continuous_var, categorical_var, continuous_suffix, categorical_suffix, drop_na = TRUE, drop_lvl = TRUE ) ## S3 method for class 'list' join_adsub_adsl( adam_db, keys = c("USUBJID", "STUDYID"), continuous_var = "all", categorical_var = "all", continuous_suffix = "", categorical_suffix = "_CAT", drop_na = TRUE, drop_lvl = FALSE )
adam_db |
( |
keys |
( |
continuous_var |
( |
categorical_var |
( |
continuous_suffix |
( |
categorical_suffix |
( |
drop_na |
( |
drop_lvl |
( |
a list
of data.frame
with new columns in the adsl
table.
adsl <- data.frame( USUBJID = c("S1", "S2", "S3", "S4"), STUDYID = "My_study", AGE = c(60, 44, 23, 31) ) adsub <- data.frame( USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"), STUDYID = "My_study", PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"), PARAMCD = c("w", "w", "w", "w", "h", "h", "h"), AVAL = c(98, 75, 70, 71, 182, 155, 152), AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180") ) db <- list(adsl = adsl, adsub = adsub) x <- join_adsub_adsl(adam_db = db) x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
adsl <- data.frame( USUBJID = c("S1", "S2", "S3", "S4"), STUDYID = "My_study", AGE = c(60, 44, 23, 31) ) adsub <- data.frame( USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"), STUDYID = "My_study", PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"), PARAMCD = c("w", "w", "w", "w", "h", "h", "h"), AVAL = c(98, 75, 70, 71, 182, 155, 152), AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180") ) db <- list(adsl = adsl, adsub = adsub) x <- join_adsub_adsl(adam_db = db) x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
rule
Convert nested list into list of rule
list2rules(obj)
list2rules(obj)
obj |
( |
a list
of rule
objects.
obj <- list( rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"), rule2 = list(Missing = c(NA, "")), rule3 = list(Missing = c(NA, ""), .drop = TRUE), rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy") ) list2rules(obj)
obj <- list( rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"), rule2 = list(Missing = c(NA, "")), rule3 = list(Missing = c(NA, ""), .drop = TRUE), rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy") ) list2rules(obj)
Filter Data with Log
log_filter(data, condition, ...) ## S3 method for class 'data.frame' log_filter(data, condition, suffix = NULL, ...) ## S3 method for class 'list' log_filter( data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, ... )
log_filter(data, condition, ...) ## S3 method for class 'data.frame' log_filter(data, condition, suffix = NULL, ...) ## S3 method for class 'list' log_filter( data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, ... )
data |
( |
condition |
( |
... |
further arguments to be passed to or from other methods. |
suffix |
( |
table |
( |
by |
( |
verbose |
( |
log_filter
will filter the data/named list of data according to the condition
.
All the variables in condition
must exist in the data (as variables) or in the parent
frame(e.g., in global environment).
For named list of data, if ADSL
is available, log_filter
will also try to subset all
other datasets with USUBJID
.
a data.frame
or list
of data.frame
filtered for the provided conditions.
data <- iris attr(data$Sepal.Length, "label") <- "cm" log_filter(data, Sepal.Length >= 7) log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
data <- iris attr(data$Sepal.Length, "label") <- "cm" log_filter(data, Sepal.Length >= 7) log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
list
of data.frame
Encode Categorical Missing Values in a list
of data.frame
ls_explicit_na( data, omit_tables = NULL, omit_columns = NULL, char_as_factor = TRUE, na_level = "<Missing>" )
ls_explicit_na( data, omit_tables = NULL, omit_columns = NULL, char_as_factor = TRUE, na_level = "<Missing>" )
data |
( |
omit_tables |
( |
omit_columns |
( |
char_as_factor |
( |
na_level |
( |
This is a helper function to encode missing values (i.e NA
and empty string
) of every character
and
factor
variable found in a list
of data.frame
. The label
attribute of the columns is preserved.
list
of data.frame
object with explicit missing levels.
df1 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "char2" = c("A", "B", NA, "A", "K", "X"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA) ) df2 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "num" = c(1:5, NA) ) df3 <- data.frame( "char" = c(NA, NA, "A") ) db <- list(df1 = df1, df2 = df2, df3 = df3) ls_explicit_na(db) ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
df1 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "char2" = c("A", "B", NA, "A", "K", "X"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA) ) df2 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "num" = c(1:5, NA) ) df3 <- data.frame( "char" = c(NA, NA, "A") ) db <- list(df1 = df1, df2 = df2, df3 = df3) ls_explicit_na(db) ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
list
of data.frame
.Unite Columns of a Table in a list
of data.frame
.
ls_unite(adam_db, tab, cols, sep = ".", new = NULL)
ls_unite(adam_db, tab, cols, sep = ".", new = NULL)
adam_db |
( |
tab |
( |
cols |
( |
sep |
( |
new |
( |
list
of data.frames
object with a united column.
db <- list(mtcars = mtcars, iris = iris) x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION") x$mtcars
db <- list(mtcars = mtcars, iris = iris) x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION") x$mtcars
Transforming data.frame with Multiple Identifying columns into Wide Format
multi_id_pivot_wider( data, id, param_from, value_from, drop_na = FALSE, drop_lvl = FALSE )
multi_id_pivot_wider( data, id, param_from, value_from, drop_na = FALSE, drop_lvl = FALSE )
data |
( |
id |
( |
param_from |
( |
value_from |
( |
drop_na |
( |
drop_lvl |
( |
This function allows to identify observations on the basis of several columns. Warning: Instead of nesting duplicated values, the function will throw an error if the same parameter is provided twice for the same observation.
data.frame
in a wide format.
test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val") multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val") multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
Transforming data.frame with multiple Data Column into Wide Format
poly_pivot_wider( data, id, param_from, value_from, labels_from = NULL, drop_na = TRUE, drop_lvl = FALSE )
poly_pivot_wider( data, id, param_from, value_from, labels_from = NULL, drop_na = TRUE, drop_lvl = FALSE )
data |
( |
id |
( |
param_from |
( |
value_from |
( |
labels_from |
( |
drop_na |
( |
drop_lvl |
( |
This function is adapted to cases where the data are distributed in several columns while the name of the
parameter is in one. Typical example is adsub
where numeric data are stored in AVAL
while categorical data are
in AVALC
.
list
of data.frame
in a wide format with label attribute attached to each columns.
test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_label = c( "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)", "Gender", "Height (cm)", "Height (cm)", "Pre-condition" ), the_val = c(65, 165, NA, 66, NA, 166, 155, NA), the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) x <- poly_pivot_wider( test_data, c("the_obs", "the_obs2"), "the_param", c("the_val", "the_val2"), "the_label" ) x Reduce(function(u, v) merge(u, v, all = TRUE), x)
test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_label = c( "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)", "Gender", "Height (cm)", "Height (cm)", "Pre-condition" ), the_val = c(65, 165, NA, 66, NA, 166, 155, NA), the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) x <- poly_pivot_wider( test_data, c("the_obs", "the_obs2"), "the_param", c("the_val", "the_val2"), "the_label" ) x Reduce(function(u, v) merge(u, v, all = TRUE), x)
Print Log
print_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' print_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' print_log(data, incl = TRUE, incl.adsl = TRUE)
print_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' print_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' print_log(data, incl = TRUE, incl.adsl = TRUE)
data |
( |
incl |
( |
incl.adsl |
( |
NULL
. Print a description of the filtering applied to data
.
data <- log_filter(iris, Sepal.Length >= 7, "Sep") print_log(data) data <- log_filter( list( adsl = iris, iris2 = iris, mtcars = mtcars, iris3 = iris ), Sepal.Length >= 7, "adsl", character(0), "adsl filter" ) data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter") print_log(data) print_log(data, incl = FALSE) print_log(data, incl.adsl = FALSE, incl = FALSE)
data <- log_filter(iris, Sepal.Length >= 7, "Sep") print_log(data) data <- log_filter( list( adsl = iris, iris2 = iris, mtcars = mtcars, iris3 = iris ), Sepal.Length >= 7, "adsl", character(0), "adsl filter" ) data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter") print_log(data) print_log(data, incl = FALSE) print_log(data, incl.adsl = FALSE, incl = FALSE)
propagate
copy columns from a given table of a list
of data.frame
to all tables based on other
common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving
tables. In safe mode, the key must be unique in the original table.
propagate(db, from, add, by, safe = TRUE) ## S3 method for class 'list' propagate(db, from, add, by, safe = TRUE)
propagate(db, from, add, by, safe = TRUE) ## S3 method for class 'list' propagate(db, from, add, by, safe = TRUE)
db |
( |
from |
( |
add |
( |
by |
( |
safe |
( |
updated list
of data.frame
.
df1 <- data.frame( id1 = c("a", "a", "c", "d", "e", "f"), id2 = c("A", "B", "A", "A", "A", "A"), int = c(1, 2, 3, 4, 5, 6), bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE) ) df2 <- data.frame( id1 = c("a", "a", "d", "e", "f", "g"), id2 = c("A", "B", "A", "A", "A", "A") ) df3 <- data.frame( id1 = c("a", "c", "d", "e", "f", "x"), id2 = c("A", "A", "A", "A", "B", "A"), int = c(11, 22, 33, 44, 55, 66) ) db <- list(df1 = df1, fd2 = df2, df3 = df3) propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
df1 <- data.frame( id1 = c("a", "a", "c", "d", "e", "f"), id2 = c("A", "B", "A", "A", "A", "A"), int = c(1, 2, 3, 4, 5, 6), bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE) ) df2 <- data.frame( id1 = c("a", "a", "d", "e", "f", "g"), id2 = c("A", "B", "A", "A", "A", "A") ) df3 <- data.frame( id1 = c("a", "c", "d", "e", "f", "x"), id2 = c("A", "A", "A", "A", "B", "A"), int = c(11, 22, 33, 44, 55, 66) ) db <- list(df1 = df1, fd2 = df2, df3 = df3) propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
Reformat Values
reformat(obj, ...) ## Default S3 method: reformat(obj, format, ...) ## S3 method for class 'character' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'factor' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'list' reformat( obj, format, ..., verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE) )
reformat(obj, ...) ## Default S3 method: reformat(obj, format, ...) ## S3 method for class 'character' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'factor' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'list' reformat( obj, format, ..., verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE) )
obj |
( |
... |
for compatibility between methods and pass additional special mapping to transform rules.
|
format |
( |
verbose |
( |
(character
, factor
or list of data.frame
) with remapped values.
When the rule is empty rule or when values subject to reformatting are absent from the object, no error is
raised. The conversion to factor if .string_as_fct = TRUE
) is still carried out. The conversion of the levels
declared in .to_NA
to NA
values occurs after the remapping. NA
values created this way are not affected by a
rule declaring a remapping of NA
values. For factors, level dropping is the last step, hence, levels converted to
NA
by the .to_NA
argument, will be removed if .drop
is TRUE
. Arguments passed via reformat
override the
ones defined during rule creation.
the variables listed under the all_dataset
keyword will be reformatted with the corresponding rule in every
data set except where another rule is specified for the same variable under a specific data set name.
# Reformatting of character. obj <- c("a", "b", "x", NA, "") attr(obj, "label") <- "my label" format <- rule("A" = "a", "NN" = NA) reformat(obj, format) reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL) # Reformatting of factor. obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z")) attr(obj, "label") <- "my label" format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P") reformat(obj, format) reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE) # Reformatting of list of data.frame. df1 <- data.frame( var1 = c("a", "b", NA), var2 = factor(c("F1", "F2", NA)) ) df2 <- data.frame( var1 = c("x", NA, "y"), var2 = factor(c("F11", NA, "F22")) ) db <- list(df1 = df1, df2 = df2) format <- list( df1 = list( var1 = rule("X" = "x", "N" = NA, .to_NA = "b") ), df2 = list( var2 = rule("f11" = "F11", "NN" = NA) ), all_datasets = list( var1 = rule("xx" = "x", "aa" = "a") ) ) reformat(db, format)
# Reformatting of character. obj <- c("a", "b", "x", NA, "") attr(obj, "label") <- "my label" format <- rule("A" = "a", "NN" = NA) reformat(obj, format) reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL) # Reformatting of factor. obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z")) attr(obj, "label") <- "my label" format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P") reformat(obj, format) reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE) # Reformatting of list of data.frame. df1 <- data.frame( var1 = c("a", "b", NA), var2 = factor(c("F1", "F2", NA)) ) df2 <- data.frame( var1 = c("x", NA, "y"), var2 = factor(c("F11", NA, "F22")) ) db <- list(df1 = df1, df2 = df2) format <- list( df1 = list( var1 = rule("X" = "x", "N" = NA, .to_NA = "b") ), df2 = list( var2 = rule("f11" = "F11", "NN" = NA) ), all_datasets = list( var1 = rule("xx" = "x", "aa" = "a") ) ) reformat(db, format)
Remove whisker values
remove_whisker(x)
remove_whisker(x)
x |
Named ( |
invisible NULL
. Removes x
from the whisker environment.
Render whiskers safely
render_safe(x)
render_safe(x)
x |
( |
character
with substituted placeholders.
The strings enclosed in {}
are substituted using the key-values pairs set with add_whiskers
.
render_safe("Name of {Patient_label}")
render_safe("Name of {Patient_label}")
Create rule based on mappings
rule( ..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "" )
rule( ..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "" )
... |
Mapping pairs, the argument name is the transformed while its values are original values. |
.lst |
( |
.string_as_fct |
( |
.na_last |
( |
.drop |
( |
.to_NA |
( |
a rule
object.
Conversion to NA
is the last step of the remapping process.
rule("X" = "x", "Y" = c("y", "z")) rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
rule("X" = "x", "Y" = c("y", "z")) rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
Show Whisker Values
show_whisker()
show_whisker()
invisible NULL
. Prints the values stored in the whisker environment.
show_whisker()
show_whisker()