diff --git a/.Rbuildignore b/.Rbuildignore index 5fa08928..18ae4b93 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,3 +23,4 @@ ^\.circleci$ ^\.circleci/config\.yml$ ^\.github$ +^REFACTOR_GUIDE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index ad8e28a6..ffe8e8e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,7 +69,8 @@ Imports: gridExtra, data.table, methods, - dplyr + dplyr, + cli Suggests: lme4, httr, diff --git a/NAMESPACE b/NAMESPACE index 8bdab82e..bd539a11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,8 @@ export(ds.var) export(ds.vectorCalc) import(DSI) import(data.table) +importFrom(DSI,datashield.connections_find) +importFrom(cli,cli_abort) importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) diff --git a/R/ds.Boole.R b/R/ds.Boole.R index 252346bf..c435e4c6 100644 --- a/R/ds.Boole.R +++ b/R/ds.Boole.R @@ -37,11 +37,8 @@ #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} #' objects obtained after login. If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. -#' @return \code{ds.Boole} returns the object specified by the \code{newobj} argument -#' which is written to the server-side. Also, two validity messages are returned -#' to the client-side indicating the name of the \code{newobj} which -#' has been created in each data source and if -#' it is in a valid form. +#' @return \code{ds.Boole} returns the object specified by the \code{newobj} argument +#' which is written to the server-side. #' @examples #' #' \dontrun{ @@ -102,19 +99,12 @@ #' } #' #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' ds.Boole<-function(V1=NULL, V2=NULL, Boolean.operator=NULL, numeric.output=TRUE, na.assign="NA",newobj=NULL, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # check if user has provided the name of the column or scalar that holds V1 if(is.null(V1)){ @@ -178,88 +168,8 @@ ds.Boole<-function(V1=NULL, V2=NULL, Boolean.operator=NULL, numeric.output=TRUE, } # CALL THE MAIN SERVER SIDE FUNCTION - calltext <- call("BooleDS", V1, V2, BO.n, na.assign,numeric.output) + calltext <- call("BooleDS", V1, V2, BO.n, na.assign, numeric.output) DSI::datashield.assign(datasources, newobj, calltext) -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # -#TRACER # -#return(test.obj.name) # -#} # - # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.Boole diff --git a/R/ds.c.R b/R/ds.c.R index 2093ac01..7be360a4 100644 --- a/R/ds.c.R +++ b/R/ds.c.R @@ -10,7 +10,9 @@ #' @param x a vector of character string providing the names of the objects to be combined. #' @param newobj a character string that provides the name for the output object #' that is stored on the data servers. Default \code{c.newobj}. -#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} +#' @param classConsistencyCheck logical. If TRUE, verifies that each input object has +#' the same class across all studies before concatenation. Default TRUE. +#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} #' objects obtained after login. If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @return \code{ds.c} returns the vector of concatenating R @@ -53,19 +55,12 @@ #' #' } #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export -#' -ds.c <- function(x=NULL, newobj=NULL, datasources=NULL){ +#' +ds.c <- function(x=NULL, newobj=NULL, datasources=NULL, classConsistencyCheck=TRUE){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("x=NULL. Please provide the names of the objects to concatenate!", call.=FALSE) @@ -76,19 +71,14 @@ ds.c <- function(x=NULL, newobj=NULL, datasources=NULL){ newobj <- "c.newobj" } - # check if the input object(s) is(are) defined in all the studies - lapply(x, function(k){isDefined(datasources, obj=k)}) - - # call the internal function that checks the input object(s) is(are) of the same class in all studies. - for(i in 1:length(x)){ - typ <- checkClass(datasources, x[i]) + if(classConsistencyCheck){ + for(i in seq_along(x)){ + checkClass(datasources, x[i]) + } } # call the server side function that does the job - cally <- paste0("cDS(list(",paste(x,collapse=","),"))") - DSI::datashield.assign(datasources, newobj, as.symbol(cally)) - - # check that the new object has been created and display a message accordingly - finalcheck <- isAssigned(datasources, newobj) + cally <- call("cDS", x) + DSI::datashield.assign(datasources, newobj, cally) } diff --git a/R/ds.cbind.R b/R/ds.cbind.R index e21cb961..0f85ca99 100644 --- a/R/ds.cbind.R +++ b/R/ds.cbind.R @@ -30,17 +30,16 @@ #' @param force.colnames can be NULL (recommended) or a vector of characters that specifies #' column names of the output object. If it is not NULL the user should take some caution. #' For more information see \strong{Details}. -#' @param newobj a character string that provides the name for the output variable -#' that is stored on the data servers. Defaults \code{cbind.newobj}. -#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' @param classConsistencyCheck logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE. +#' @param newobj a character string that provides the name for the output variable +#' that is stored on the data servers. Defaults \code{cbind.newobj}. +#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. #' If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @param notify.of.progress specifies if console output should be produced to indicate #' progress. Default FALSE. -#' @return \code{ds.cbind} returns a data frame combining the columns of the R -#' objects specified in the function which is written to the server-side. -#' It also returns to the client-side two messages with the name of \code{newobj} -#' that has been created in each data source and \code{DataSHIELD.checks} result. +#' @return \code{ds.cbind} returns a data frame combining the columns of the R +#' objects specified in the function which is written to the server-side. #' @examples #' #' \dontrun{ @@ -113,37 +112,29 @@ #' } #' #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export -#' -ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newobj=NULL, datasources=NULL, notify.of.progress=FALSE){ - - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } +#' +ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newobj=NULL, datasources=NULL, notify.of.progress=FALSE, classConsistencyCheck=TRUE){ - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide a vector of character strings holding the name of the input elements!", call.=FALSE) } if(DataSHIELD.checks){ - - # check if the input object(s) is(are) defined in all the studies - lapply(x, function(k){isDefined(datasources, obj=k)}) - + # call the internal function that checks the input object(s) is(are) of the same legal class in all studies. + if(classConsistencyCheck){ for(i in 1:length(x)){ typ <- checkClass(datasources, x[i]) if(!('data.frame' %in% typ) & !('matrix' %in% typ) & !('factor' %in% typ) & !('character' %in% typ) & !('numeric' %in% typ) & !('integer' %in% typ) & !('logical' %in% typ)){ stop("Only objects of type 'data.frame', 'matrix', 'numeric', 'integer', 'character', 'factor' and 'logical' are allowed.", call.=FALSE) } } - + } + # check that there are no duplicated column names in the input components for(j in 1:length(datasources)){ colNames <- list() @@ -158,10 +149,10 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob colNames <- unlist(colNames) if(anyDuplicated(colNames) != 0){ message("\n Warning: Some column names in study", j, "are duplicated and a suffix '.k' will be added to the kth replicate \n") - } - } - } - + } + } + } + # check that the number of rows is the same in all componets to be cbind for(j in 1:length(datasources)){ nrows <- list() @@ -178,8 +169,8 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob if(any(nrows != nrows[1])){ stop("The number of rows is not the same in all of the components to be cbind", call.=FALSE) } - } - + } + } # check newobj not actively declared as null @@ -238,63 +229,6 @@ ds.cbind <- function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newob DSI::datashield.assign(datasources[std], newobj, calltext) } - ############################################################################################################# - # DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED - - # SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION - test.obj.name <- newobj - - # CALL SEVERSIDE FUNCTION - calltext <- call("testObjExistsDS", test.obj.name) - object.info <- DSI::datashield.aggregate(datasources, calltext) - - # CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS - # AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS - num.datasources <- length(object.info) - - obj.name.exists.in.all.sources <- TRUE - obj.non.null.in.all.sources <- TRUE - - for(j in 1:num.datasources){ - if(!object.info[[j]]$test.obj.exists){ - obj.name.exists.in.all.sources <- FALSE - } - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ - obj.non.null.in.all.sources <- FALSE - } - } - - if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ - return.message <- paste0("A data object <", test.obj.name, "> has been created in all specified data sources") - }else{ - return.message.1 <- paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") - return.message.2 <- paste0("It is either ABSENT and/or has no valid content/class,see return.info above") - return.message.3 <- paste0("Please use ds.ls() to identify where missing") - return.message <- list(return.message.1,return.message.2,return.message.3) - } - - calltext <- call("messageDS", test.obj.name) - studyside.message <- DSI::datashield.aggregate(datasources, calltext) - no.errors <- TRUE - for(nd in 1:num.datasources){ - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ - no.errors <- FALSE - } - } - - if(no.errors){ - validity.check <- paste0("<",test.obj.name, "> appears valid in all sources") - return(list(is.object.created=return.message,validity.check=validity.check)) - } - - if(!no.errors){ - validity.check <- paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") - return(list(is.object.created=return.message,validity.check=validity.check, - studyside.messages=studyside.message)) - } - - # END OF CHECK OBJECT CREATED CORECTLY MODULE - ####################################################################################################### } #ds.cbind diff --git a/R/ds.dataFrame.R b/R/ds.dataFrame.R index eeddcdd9..ccd671c4 100644 --- a/R/ds.dataFrame.R +++ b/R/ds.dataFrame.R @@ -30,17 +30,16 @@ #' 3. if there are any duplicated column names in the input objects in each study\cr #' 4. the number of rows of the data frames or matrices and the length of all component variables #' are the same -#' @param newobj a character string that provides the name for the output data frame -#' that is stored on the data servers. Default \code{dataframe.newobj}. -#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' @param classConsistencyCheck logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE. +#' @param newobj a character string that provides the name for the output data frame +#' that is stored on the data servers. Default \code{dataframe.newobj}. +#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. #' If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @param notify.of.progress specifies if console output should be produced to indicate #' progress. Default is FALSE. #' @return \code{ds.dataFrame} returns the object specified by the \code{newobj} argument -#' which is written to the serverside. Also, two validity messages are returned to the -#' client-side indicating the name of the \code{newobj} that has been created in each data source -#' and if it is in a valid form. +#' which is written to the server-side. #' @examples #' #' \dontrun{ @@ -89,18 +88,11 @@ #' datashield.logout(connections) #' } #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export -ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=TRUE, stringsAsFactors=TRUE, completeCases=FALSE, DataSHIELD.checks=FALSE, newobj=NULL, datasources=NULL, notify.of.progress=FALSE){ +ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=TRUE, stringsAsFactors=TRUE, completeCases=FALSE, DataSHIELD.checks=FALSE, newobj=NULL, datasources=NULL, notify.of.progress=FALSE, classConsistencyCheck=TRUE){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of the list that holds the input vectors!", call.=FALSE) @@ -112,17 +104,16 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T } if(DataSHIELD.checks){ - - # check if the input object(s) is(are) defined in all the studies - lapply(x, function(k){isDefined(datasources, obj=k)}) - + # call the internal function that checks the input object(s) is(are) of the same legal class in all studies. + if(classConsistencyCheck){ for(i in 1:length(x)){ typ <- checkClass(datasources, x[i]) if(!('data.frame' %in% typ) & !('matrix' %in% typ) & !('factor' %in% typ) & !('character' %in% typ) & !('numeric' %in% typ) & !('integer' %in% typ) & !('logical' %in% typ)){ stop("Only objects of type 'data.frame', 'matrix', 'numeric', 'integer', 'character', 'factor' and 'logical' are allowed.", call.=FALSE) } } + } # check that there are no duplicated column names in the input components for(j in 1:length(datasources)){ @@ -229,85 +220,5 @@ ds.dataFrame <- function(x=NULL, row.names=NULL, check.rows=FALSE, check.names=T DSI::datashield.assign(datasources[std], newobj, cally) } -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # -#TRACER # -#return(test.obj.name) # -#} # - # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.dataFrame diff --git a/R/ds.dataFrameFill.R b/R/ds.dataFrameFill.R index 3de389b7..79c8b68f 100644 --- a/R/ds.dataFrameFill.R +++ b/R/ds.dataFrameFill.R @@ -13,7 +13,8 @@ #' filled with extra columns of missing values. #' @param newobj a character string that provides the name for the output data frame #' that is stored on the data servers. Default value is "dataframefill.newobj". -#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' @param classConsistencyCheck logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE. +#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. #' If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @return \code{ds.dataFrameFill} returns the object specified by the \code{newobj} argument which @@ -21,7 +22,8 @@ #' client-side indicating the name of the \code{newobj} that has been created in each data source #' and if it is in a valid form. #' @author Demetris Avraam for DataSHIELD Development Team -#' +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands +#' #' @examples #' \dontrun{ #' @@ -74,17 +76,9 @@ #' } #' @export #' -ds.dataFrameFill <- function(df.name=NULL, newobj=NULL, datasources=NULL){ +ds.dataFrameFill <- function(df.name=NULL, newobj=NULL, datasources=NULL, classConsistencyCheck=TRUE){ - # if no connections details are provided look for 'connection' objects in the environment - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # check if user has provided the name of the data.frame to be subsetted if(is.null(df.name)){ @@ -96,9 +90,6 @@ ds.dataFrameFill <- function(df.name=NULL, newobj=NULL, datasources=NULL){ newobj <- "dataframefill.newobj" } - # check if the input dataframe is defined in all the studies - defined <- isDefined(datasources, df.name) - # call the internal function that checks the input object is of the same class in all studies. typ <- checkClass(datasources, df.name) @@ -172,63 +163,6 @@ ds.dataFrameFill <- function(df.name=NULL, newobj=NULL, datasources=NULL){ calltext <- call("dataFrameFillDS", df.name, allNames.transmit, class.vect.transmit, levels.vec.transmit) DSI::datashield.assign(datasources, newobj, calltext) - ############################################################################################################# - # DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED - - # SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION - test.obj.name <- newobj - - # CALL SEVERSIDE FUNCTION - calltext <- call("testObjExistsDS", test.obj.name) - object.info <- DSI::datashield.aggregate(datasources, calltext) - - # CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS - # AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS - num.datasources <- length(object.info) - - obj.name.exists.in.all.sources <- TRUE - obj.non.null.in.all.sources <- TRUE - - for(j in 1:num.datasources){ - if(!object.info[[j]]$test.obj.exists){ - obj.name.exists.in.all.sources <- FALSE - } - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ - obj.non.null.in.all.sources <- FALSE - } - } - - if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ - return.message <- paste0("A data object <", test.obj.name, "> has been created in all specified data sources") - }else{ - return.message.1 <- paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") - return.message.2 <- paste0("It is either ABSENT and/or has no valid content/class, see return.info above") - return.message.3 <- paste0("Please use ds.ls() to identify where missing") - return.message <- list(return.message.1, return.message.2, return.message.3) - } - - calltext <- call("messageDS", test.obj.name) - studyside.message <- DSI::datashield.aggregate(datasources, calltext) - - no.errors <- TRUE - for(nd in 1:num.datasources){ - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ - no.errors <- FALSE - } - } - - if(no.errors){ - validity.check <- paste0("<",test.obj.name, "> appears valid in all sources") - return(list(is.object.created=return.message, validity.check=validity.check)) - } - - if(!no.errors){ - validity.check <- paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") - return(list(is.object.created=return.message, validity.check=validity.check, studyside.messages=studyside.message)) - } - - # END OF CHECK OBJECT CREATED CORRECTLY MODULE - ############################################################################################################# } # ds.dataFrameFill diff --git a/R/ds.dataFrameSort.R b/R/ds.dataFrameSort.R index de59d61e..359fabb6 100644 --- a/R/ds.dataFrameSort.R +++ b/R/ds.dataFrameSort.R @@ -81,20 +81,13 @@ #' #' } #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export -#' +#' ds.dataFrameSort<-function(df.name=NULL, sort.key.name=NULL, sort.descending=FALSE, sort.method="default", newobj=NULL, datasources=NULL){ - - # if no opal login details are provided look for 'opal' objects in the environment - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(sort.method)){ sort.method <- "default" @@ -123,84 +116,6 @@ ds.dataFrameSort<-function(df.name=NULL, sort.key.name=NULL, sort.descending=FAL # Call to assign function calltext <- call("dataFrameSortDS", df.name, sort.key.name, sort.descending, sort.method) datashield.assign(datasources, newobj, calltext) - - ########################################################################################################### - #DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # # - #SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # - test.obj.name<-newobj # - # # - # # - # CALL SEVERSIDE FUNCTION # - calltext <- call("testObjExistsDS", test.obj.name) # - # # - # object.info<-opal::datashield.aggregate(datasources, calltext) # - object.info<-datashield.aggregate(datasources, calltext) # - # # - # CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # - # AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # - num.datasources<-length(object.info) # - # # - # # - obj.name.exists.in.all.sources<-TRUE # - obj.non.null.in.all.sources<-TRUE # - # # - for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # # - if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # # - # # - }else{ # - # # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # # - # # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # # - } # - # # - calltext <- call("messageDS", test.obj.name) # - # studyside.message<-opal::datashield.aggregate(datasources, calltext) # - studyside.message<-datashield.aggregate(datasources, calltext) # - # # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # # - # # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # - if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # - #END OF CHECK OBJECT CREATED CORRECTLY MODULE # - ########################################################################################################### + } #ds.dataFrameSort diff --git a/R/ds.dataFrameSubset.R b/R/ds.dataFrameSubset.R index 1ae6278d..b09a900f 100644 --- a/R/ds.dataFrameSubset.R +++ b/R/ds.dataFrameSubset.R @@ -105,19 +105,12 @@ #' #' } #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export ds.dataFrameSubset<-function(df.name=NULL, V1.name=NULL, V2.name=NULL, Boolean.operator=NULL, keep.cols=NULL, rm.cols=NULL, keep.NAs=NULL, newobj=NULL, datasources=NULL, notify.of.progress=FALSE){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # check if user has provided the name of the data.frame to be subsetted if(is.null(df.name)){ @@ -242,81 +235,5 @@ if(!is.null(rm.cols)){ } } -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.dataFrameSubset diff --git a/R/ds.list.R b/R/ds.list.R index b0847703..ebbf5749 100644 --- a/R/ds.list.R +++ b/R/ds.list.R @@ -8,11 +8,14 @@ #' @param x a character string specifying the names of the objects to coerce into a list. #' @param newobj a character string that provides the name for the output variable #' that is stored on the data servers. Default \code{list.newobj}. +#' @param classConsistencyCheck logical. If TRUE, verifies that each input object has +#' the same class across all studies before coercion. Default TRUE. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} #' objects obtained after login. If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @return \code{ds.list} returns a list of objects for each study that is stored on the server-side. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -51,49 +54,31 @@ #' datashield.logout(connections) #' } #' -ds.list <- function(x=NULL, newobj=NULL, datasources=NULL){ +ds.list <- function(x=NULL, newobj=NULL, datasources=NULL, classConsistencyCheck=TRUE){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("x=NULL. Please provide the names of the objects to coerce into a list!", call.=FALSE) } - # check if the input object is defined in all the studies - isDefined(datasources, x) - - # call the internal function that checks the input object(s) is(are) of the same class in all studies. - for(i in 1:length(x)){ - typ <- checkClass(datasources, x[i]) - } - # create a name by default if user did not provide a name for the new variable if(is.null(newobj)){ newobj <- "list.newobj" } - + # get the variable names xnames <- extract(x) varnames <- xnames$elements - # get the names of the list elements if the user has not specified any - if(is.null(names)){ - names <- varnames + if(classConsistencyCheck){ + for(i in seq_along(x)){ + checkClass(datasources, x[i]) + } } # call the server side function that does the job - cally <- paste0("listDS(list(",paste(x,collapse=","),"), list('",paste(varnames,collapse="','"),"'))") - DSI::datashield.assign(datasources, newobj, as.symbol(cally)) - - # check that the new object has been created and display a message accordingly - finalcheck <- isAssigned(datasources, newobj) + cally <- call("listDS", x, as.list(varnames)) + DSI::datashield.assign(datasources, newobj, cally) } diff --git a/R/ds.merge.R b/R/ds.merge.R index 4ac436fb..358b81d7 100644 --- a/R/ds.merge.R +++ b/R/ds.merge.R @@ -53,6 +53,7 @@ #' indicating whether the new object has been created in each data source and if so whether #' it is in a valid form. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @examples #' \dontrun{ #' @@ -116,15 +117,7 @@ ds.merge <- function(x.name=NULL,y.name=NULL, by.x.names=NULL, by.y.names=NULL,all.x=FALSE,all.y=FALSE, sort=TRUE, suffixes = c(".x",".y"), no.dups=TRUE, incomparables=NULL, newobj=NULL, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # dataframe names if(is.null(x.name)){ @@ -134,10 +127,6 @@ ds.merge <- function(x.name=NULL,y.name=NULL, by.x.names=NULL, by.y.names=NULL,a if(is.null(y.name)){ stop("Please provide the name (eg 'name2') of second dataframe to be merged (called y) ", call.=FALSE) } - - # check if the input objects are defined in all the studies - isDefined(datasources, x.name) - isDefined(datasources, y.name) # names of columns to merge on (may be more than one) if(is.null(by.x.names)){ @@ -171,82 +160,5 @@ ds.merge <- function(x.name=NULL,y.name=NULL, by.x.names=NULL, by.y.names=NULL,a DSI::datashield.assign(datasources, newobj, calltext) -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - - } # ds.merge diff --git a/R/ds.rbind.R b/R/ds.rbind.R index d0aca96a..2943312d 100644 --- a/R/ds.rbind.R +++ b/R/ds.rbind.R @@ -20,18 +20,17 @@ #' input objects exist and are of an appropriate class. #' @param force.colnames can be NULL or a vector of characters that #' specifies column names of the output object. -#' @param newobj a character string that provides the name for the output variable -#' that is stored on the data servers. Defaults \code{rbind.newobj}. +#' @param classConsistencyCheck logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE. +#' @param newobj a character string that provides the name for the output variable +#' that is stored on the data servers. Defaults \code{rbind.newobj}. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. #' If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @param notify.of.progress specifies if console output should be produced to indicate #' progress. Default FALSE. -#' @return \code{ds.rbind} returns a matrix combining the rows of the +#' @return \code{ds.rbind} returns a matrix combining the rows of the #' R objects specified in the function -#' which is written to the server-side. -#' It also returns two messages to the client-side with the name of \code{newobj} -#' that has been created in each data source and \code{DataSHIELD.checks} result. +#' which is written to the server-side. #' @examples #' #' \dontrun{ @@ -78,20 +77,13 @@ #' } #' #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export -#' -ds.rbind<-function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newobj=NULL, - datasources=NULL, notify.of.progress=FALSE){ - - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } +#' +ds.rbind<-function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newobj=NULL, + datasources=NULL, notify.of.progress=FALSE, classConsistencyCheck=TRUE){ - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide a vector of character strings holding the name of the input elements!", call.=FALSE) @@ -99,17 +91,16 @@ ds.rbind<-function(x=NULL, DataSHIELD.checks=FALSE, force.colnames=NULL, newobj= if(DataSHIELD.checks){ - - # check if the input object(s) is(are) defined in all the studies - lapply(x, function(k){isDefined(datasources, obj=k)}) # call the internal function that checks the input object(s) is(are) of the same legal class in all studies. + if(classConsistencyCheck){ for(i in 1:length(x)){ typ <- checkClass(datasources, x[i]) if(!('data.frame' %in% typ) & !('matrix' %in% typ) & !('factor' %in% typ) & !('character' %in% typ) & !('numeric' %in% typ) & !('integer' %in% typ) & !('logical' %in% typ)){ stop(" Only objects of type 'data.frame', 'matrix', 'numeric', 'integer', 'character', 'factor' and 'logical' are allowed.", call.=FALSE) } } + } } # check newobj not actively declared as null @@ -195,84 +186,5 @@ for(j in length(colname.vector):2) calltext <- call("rbindDS", x.names.transmit, colnames.transmit) DSI::datashield.assign(datasources, newobj, calltext) - -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # - # - # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.rbind diff --git a/R/ds.recodeValues.R b/R/ds.recodeValues.R index 184ccea2..a37ebc54 100644 --- a/R/ds.recodeValues.R +++ b/R/ds.recodeValues.R @@ -29,6 +29,7 @@ #' indicating whether the new object has been created in each data source and if so whether #' it is in a valid form. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @examples #' \dontrun{ #' @@ -81,24 +82,13 @@ ds.recodeValues <- function(var.name=NULL, values2replace.vector=NULL, new.values.vector=NULL, missing=NULL, newobj=NULL, datasources=NULL, notify.of.progress=FALSE){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # check user has provided the name of the variable to be recoded if(is.null(var.name)){ stop("Please provide the name of the variable to be recoded: eg 'xxx'", call.=FALSE) } - - # check if the input object is defined in all the studies - isDefined(datasources, var.name) - + # check user has provided the vector specifying the set of values to be replaced if(is.null(values2replace.vector)){ stop("Please provide a vector in the 'values2replace.vector' argument specifying @@ -140,81 +130,5 @@ ds.recodeValues <- function(var.name=NULL, values2replace.vector=NULL, new.value calltext <- call("recodeValuesDS", var.name, values2replace.transmit, new.values.transmit, missing) DSI::datashield.assign(datasources, newobj, calltext) -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.recodeValues diff --git a/R/ds.rep.R b/R/ds.rep.R index 2f3e0301..0eb62f52 100644 --- a/R/ds.rep.R +++ b/R/ds.rep.R @@ -91,22 +91,15 @@ #' } #' #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' -ds.rep<-function(x1=NULL, times=NA, length.out=NA, each=1, +ds.rep<-function(x1=NULL, times=NA, length.out=NA, each=1, source.x1='clientside', source.times=NULL, source.length.out=NULL,source.each=NULL, x1.includes.characters=FALSE,newobj=NULL,datasources=NULL){ - - # if no connection login details are provided look for 'connection' objects in the environment - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # check if a value has been provided for x1 if(is.null(x1)){ @@ -282,85 +275,6 @@ if(source.each=='s')source.each<-'serverside' DSI::datashield.assign(datasources, newobj, calltext) -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # -#TRACER # -#return(test.obj.name) # -#} # - # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# } #ds.rep diff --git a/R/ds.replaceNA.R b/R/ds.replaceNA.R index 28a51adb..8a4666c6 100644 --- a/R/ds.replaceNA.R +++ b/R/ds.replaceNA.R @@ -26,6 +26,7 @@ #' with the missing values replaced by the specified values. #' The class of the vector is the same as the initial vector. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -91,22 +92,11 @@ #' ds.replaceNA <- function(x=NULL, forNA=NULL, newobj=NULL, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of a vector!", call.=FALSE) } - - # check if the input object is defined in all the studies - isDefined(datasources, x) # check if replacement values have been provided if(is.null(forNA)){ @@ -136,11 +126,8 @@ ds.replaceNA <- function(x=NULL, forNA=NULL, newobj=NULL, datasources=NULL){ # call the server side function and doo the replacement for each server for(i in 1:length(datasources)){ message(paste0("--Processing ", names(datasources)[i], "...")) - cally <- paste0("replaceNaDS(", x, paste0(", vectorDS(",paste(forNA[[i]],collapse=","),")"), ")") - DSI::datashield.assign(datasources[i], newobj, as.symbol(cally)) - - # check that the new object has been created and display a message accordingly - finalcheck <- isAssigned(datasources[i], newobj) + cally <- call("replaceNaDS", x, forNA[[i]]) + DSI::datashield.assign(datasources[i], newobj, cally) # if the input vector is within a table structure append the new vector to that table inputElts <- extract(x) diff --git a/R/ds.seq.R b/R/ds.seq.R index d21522a0..91b10978 100644 --- a/R/ds.seq.R +++ b/R/ds.seq.R @@ -63,7 +63,8 @@ #' indicating whether the new object has been created in each data source and if so whether #' it is in a valid form. #' @author DataSHIELD Development Team -#' @examples +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands +#' @examples #' \dontrun{ #' #' ## Version 6, for version 5 see the Wiki @@ -119,15 +120,7 @@ ds.seq<-function(FROM.value.char = "1", BY.value.char = "1", TO.value.char=NULL, LENGTH.OUT.value.char = NULL, ALONG.WITH.name=NULL, newobj="newObj", datasources=NULL) { ###datasources - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) ###FROM.value.char # check FROM.value.char is valid @@ -191,82 +184,5 @@ if(is.null(TO.value.char)&&is.null(LENGTH.OUT.value.char)&&is.null(ALONG.WITH.na calltext <- call("seqDS", FROM.value.char,TO.value.char,BY.value.char,LENGTH.OUT.value.char,ALONG.WITH.name) DSI::datashield.assign(datasources, newobj, calltext) - -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.seq diff --git a/R/ds.unList.R b/R/ds.unList.R index fa14a4f2..35ebbe27 100644 --- a/R/ds.unList.R +++ b/R/ds.unList.R @@ -26,7 +26,8 @@ #' indicating whether the new object has been created in each data source and if so whether #' it is in a valid form. #' @author DataSHIELD Development Team -#' @examples +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands +#' @examples #' \dontrun{ #' ## Version 6, for version 5 see the Wiki #' @@ -72,15 +73,7 @@ #' @export ds.unList <- function(x.name=NULL, newobj=NULL, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x.name)){ stop("Please provide the name of the input vector!", call.=FALSE) @@ -96,82 +89,6 @@ ds.unList <- function(x.name=NULL, newobj=NULL, datasources=NULL){ calltext <- call("unListDS", x.name) DSI::datashield.assign(datasources, newobj, calltext) - -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # # # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } #ds.unList diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 00000000..51ef63e2 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,69 @@ +#' Retrieve datasources if not specified +#' +#' @param datasources An optional list of data sources. If not provided, the function will attempt +#' to find available data sources. +#' @importFrom DSI datashield.connections_find +#' @return A list of data sources. +#' @noRd +.get_datasources <- function(datasources) { + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + return(datasources) +} + +#' Verify that the provided data sources are of class 'DSConnection'. +#' +#' @param datasources A list of data sources. +#' @importFrom cli cli_abort +#' @noRd +.verify_datasources <- function(datasources) { + is_connection_class <- sapply(datasources, function(x) inherits(unlist(x), "DSConnection")) + if (!all(is_connection_class)) { + cli_abort("The 'datasources' were expected to be a list of DSConnection-class objects") + } +} + +#' Set and verify data sources. +#' +#' @param datasources An optional list of data sources. If not provided, the function will attempt +#' to find available data sources. +#' @return A list of verified data sources. +#' @noRd +.set_datasources <- function(datasources) { + datasources <- .get_datasources(datasources) + .verify_datasources(datasources) + return(datasources) +} + +#' Check cross-study class consistency from a list of server aggregate results +#' +#' Batch-refactored server functions return a list per study that includes a +#' `class` field. This helper verifies that the class field is identical across +#' all studies and aborts if not. +#' +#' @param results A named list of server-side aggregate results, one per study, +#' each containing a `class` element. +#' @importFrom cli cli_abort +#' @return Invisibly returns `NULL`. Called for its side effect (error checking). +#' @noRd +.checkClassConsistency <- function(results) { + classes <- lapply(results, function(r) r$class) + if (length(unique(lapply(classes, sort))) > 1) { + cli_abort("The input object is not of the same class in all studies!") + } +} + +#' Check That a Data Frame Name Is Provided +#' +#' Internal helper that checks whether a data frame or matrix object +#' has been provided. If `NULL`, it aborts with a user-friendly error. +#' +#' @param df A data.frame or matrix. +#' @return Invisibly returns `NULL`. Called for its side effect (error checking). +#' @noRd +.check_df_name_provided <- function(df) { + if(is.null(df)){ + cli_abort("Please provide the name of a data.frame or matrix!", call.=FALSE) + } +} diff --git a/REFACTOR_GUIDE.md b/REFACTOR_GUIDE.md new file mode 100644 index 00000000..f7226e83 --- /dev/null +++ b/REFACTOR_GUIDE.md @@ -0,0 +1,449 @@ +# Refactoring Plan: dsBase & dsBaseClient Function Pairs + +> **Action:** Replace `/Users/tcadman/github-repos/ds-core/dsBaseClient/REFACTOR_GUIDE.md` with this plan content so it's accessible across branches. + +## Context + +The `ds.colnames` / `colnamesDS` pair has been refactored as a reference implementation. The pattern shifts server-state validation (object existence, type checking) from client to server, reducing network round trips and centralizing validation where data lives. This needs to be applied across all remaining function pairs in both packages. + +The refactored `ds.colnames` branch (`v7.0-dev-colnames`) also introduces shared helpers: +- **Client:** `R/utils.R` with `.set_datasources()`, `.check_df_name_provided()` +- **Server:** `R/utils.R` with `.loadServersideObject()`, `.checkClass()` + +## Relationship Between Packages + +- **dsBaseClient** (`/Users/tcadman/github-repos/ds-core/dsBaseClient/R/`) — Client functions (`ds.functionName`) that validate inputs and dispatch calls to server +- **dsBase** (`/Users/tcadman/github-repos/ds-core/dsBase/R/`) — Server functions (`functionNameDS`) that execute on the data + +## What Changes Per Function Pair + +### Client-side (dsBaseClient) + +1. **Replace datasource boilerplate** with `datasources <- .set_datasources(datasources)` + - Removes: `datashield.connections_find()` + DSConnection class check (~8 lines) + +2. **Remove `isDefined()` calls** — server handles via `.loadServersideObject()` + +3. **Remove `checkClass()` calls and subsequent type guards** — server handles via `.checkClass()` + +4. **Add `classConsistencyCheck` parameter** — For any function where the input accepts more than one permitted class, add a `classConsistencyCheck` parameter. The server function returns `class = class(obj)` in its result list; the client checks consistency via `.checkClassConsistency()` when the parameter is TRUE, then strips the `class` field before returning to the user. Rules for the default value: + - **TRUE** when permitted classes include genuinely different types (e.g. data.frame + matrix, factor + character + integer) + - **FALSE** when permitted classes are only `numeric` and `integer` (these are effectively interchangeable) + - **No parameter** when only one class is permitted (e.g. `ds.levels` only permits factor — consistency is guaranteed by `.checkClass()`) + + **Verification:** after refactoring, inspect every `return()` in the client function — `class` (or `class.x` / `class.index` for multi-input functions) must not appear as a named element of the returned list. Stripping can be explicit (`r$class <- NULL`) or implicit (building the return from a specific subset of fields), but the absence of `class` in the final returned value must be visible at the `return()` site. + +5. **Remove `ValidityMessage`** — Server functions that returned `ValidityMessage = "VALID ANALYSIS"` should remove it. Failures should call `stop()` instead of returning a failure message. Remove `ValidityMessage` from client returns too. This is a major release so API changes are acceptable. + +6. **Remove `isAssigned()` calls** — no longer verify object creation client-side + +7. **Remove MODULE 5 boilerplate** — the ~40-80 line "CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED" block + +8. **Remove `checks` parameter** — functions like `ds.dim` and `ds.length` have a `checks` parameter that gates `isDefined()`/`checkClass()` calls. Once those calls are removed, the parameter serves no purpose. Remove it from the function signature and delete the associated conditional block. + +9. **Replace per-study loops with single aggregate calls** — some functions (e.g. `ds.isNA`) loop over datasources one at a time (`datashield.aggregate(datasources[i], ...)`). Since `datashield.aggregate` already supports multiple datasources and returns a named list, replace these loops with a single call and process results client-side. This collapses N sequential round trips into 1 parallel call. + +10. **Keep**: null-input checks (or replace with `.check_df_name_provided()`), default `newobj` naming, the actual server call dispatch, any pure client-side logic + +### Server-side (dsBase) + +Two refactor patterns, depending on how the server function currently receives its input. Pick the one that matches. + +**Pattern A — function already receives a string name and uses `eval(parse())` internally.** + +1. Replace `eval(parse(text=x), envir=parent.frame())` with `.loadServersideObject(x)`. +2. Add `.checkClass(obj = x.val, obj_name = x, permitted_classes = …)` right after loading, where the client previously enforced type constraints. +3. Keep all computation, disclosure controls, privacy checks untouched. + +**Pattern B — function currently receives a resolved R object via dispatch-layer evaluation (`as.symbol()` or `call()` in the client).** + +1. Rename the function parameter from its descriptive body-variable name (e.g. `xvect`, `X`) to a simple string-name parameter (e.g. `x`). Do **not** rename the body-variable usages. +2. At the top of the body, load into the original body-variable name: `xvect <- .loadServersideObject(x)`. +3. Add `.checkClass(obj = xvect, obj_name = x, permitted_classes = …)`. +4. On the client, switch dispatch from `as.symbol(paste0("funcDS(", x, ")"))` to `call("funcDS", x)` so the string is passed through instead of being evaluated. +5. Update the `@param` roxygen line to describe the string-name form. + +Both patterns leave the function body untouched — **no renaming inside the body, no restyling.** Minimise diff. + +### Returning class from the server + +Some client functions previously called `checkClass()` purely to drive **client-side routing** (e.g. decide which server function to dispatch, or which output format to use, or to warn about an argument being ignored). That's a separate network round trip solely to discover the class of the input object — redundant, because the server that runs the aggregate already has the object in hand. + +The batch 2 precedent (`dimDS`, `lengthDS`) is: **return the class as a field of the aggregate result**. For example: + +```r +lengthDS <- function(x){ + x.val <- .loadServersideObject(x) + .checkClass(obj = x.val, obj_name = x, permitted_classes = c(...)) + list(length = length(x.val), class = class(x.val)) +} +``` + +The client then reads `result$class` for any post-hoc routing, consistency check, or warning — no extra `checkClass()` call needed. + +Cross-study class consistency is checked via the shared helper `.checkClassConsistency(results)` (in `dsBaseClient/R/utils.R`), which aborts if the `class` field differs across studies. Always use this helper instead of inlining the check. + +**When to apply this:** +- The client was previously calling `checkClass()` to select output format, format warnings, or check class consistency across studies. +- The class information can be derived from the aggregate's input. + +**When not to apply this:** +- The client needs the class *before* choosing which server function to call (true composite dispatchers like `ds.summary`, which branch to completely different server functions per class). These still need a pre-call class lookup, for example via `call("classDS", x)` as a single lightweight aggregate. +- The class is genuinely irrelevant to the client after the call. + +Prefer this pattern over keeping a client-side `checkClass()` call whenever a client function currently does both a `checkClass()` and an aggregate call on the same object. + +### Minimal-diff rule + +Refactors should change as little as possible. Do not rename variables, restyle comments, reformat whitespace, or bundle unrelated cleanups. If a variable rename is stylistically tempting but not strictly required by the refactor, skip it. If the user pushes back on a change, stop and ask — do not iterate with more edits on the same file. + +### Do not change existing behaviour + +The refactor must not alter which input types a function accepts or what it returns (beyond adding `class` to the return list). If the original function accepted data.frames, the refactored version must too. Check previous behaviour before setting permitted classes in `.checkClass()`. + +This extends to **test coverage**. If a refactor removes output fields that tests asserted on, the refactor is not done until equivalent coverage is added. Do not merely delete assertions to make tests pass — that silently reduces what the suite verifies. The change is reviewable by diffing `test-smk-*.R` against the base branch: every removed `expect_*` must be either (a) redundant because the same behaviour is covered elsewhere in the same file, or (b) replaced with an assertion covering the same server-side behaviour. + +### Adding new parameters to existing exported functions + +When adding a parameter to an already-released function (e.g. `classConsistencyCheck`, a new behaviour flag), **place it after all existing named parameters** — never in the middle of the signature. Inserting a parameter mid-signature silently breaks every caller that used positional argument order for anything to its right. Append it to the end (after `datasources=NULL` is acceptable even though `datasources` is conventionally last), and document the default value in `@param`. + +### Tests + +**Server-side unit tests** (new `test-smk-functionNameDS.R` in dsBase): +- Happy path: call with valid input, assert correct output +- Unhappy: nonexistent object → `expect_error(..., "does not exist")` +- Unhappy: wrong type → `expect_error(..., "must be of type")` (only where `.checkClass()` is used) + +**Client-side end-to-end tests** (update existing `test-smk-ds.functionName.R` in dsBaseClient): +- Happy path: existing tests should still pass +- Unhappy: nonexistent object → `expect_error(..., "DataSHIELD errors")` +- Unhappy: wrong type → `expect_error(..., "DataSHIELD errors")` (where type was previously checked client-side) +- Update any tests that expected client-side error messages to expect server-originated errors +- **When MODULE 5 assertions are removed, add comparable replacements.** The old MODULE 5 block returned `$is.object.created` and `$validity.check` messages asserting that `newobj` existed on every server. When those assertions are stripped, add equivalent checks inside the same `test_that` block that verify the object was created on all sources — e.g. `ds_expect_variables(c(""))` or `expect_no_error(ds.class(""))`. Relying on the shutdown-block `ds_expect_variables()` alone is not sufficient because it can't pinpoint which test created the missing object. + +**Client-side smoke tests** (new `test-smk-ds.functionName.R` if none exists): +- If no smoke test file exists for a refactored client function, create one. Every refactored function must have at least a basic happy-path smoke test that exercises the server call and verifies the result. +- Follow the existing test pattern: `connect.studies.dataset.cnsim(...)`, `test_that("setup", ...)`, main test block, `test_that("shutdown", ...)`, `disconnect.studies.dataset.cnsim()`. + +**Client-side performance tests** (new `test-perf-ds.functionName.R` in dsBaseClient): +- Add a performance test for each refactored client function. Follow the pattern in `test-perf-ds.class.R`: call the function in a timed loop, compare against a reference rate from the perf profile CSV. +- Run with `PERF_DURATION_SEC=2 devtools::test(filter = "perf-")` during development; the default 30-second duration is for CI. +- **Do not** include Arjuna Technologies copyright headers in new test files. The existing headers in pre-refactor files should be left as-is, but new files we create should not carry third-party copyright. +- **The perf test must replicate the smoke test.** Before writing a perf test, read the corresponding `test-smk-ds.functionName.R` and copy: + 1. The `connect.studies.dataset.*()` line (same dataset, same columns) + 2. The `disconnect.studies.dataset.*()` line + 3. The function call (same parameters, same column names, same argument names) + + The perf test should exercise the same code path as the smoke test's happy-path call. Do not use generic placeholder calls or different datasets. + +**Design decisions:** +- Functions accepting any class: use `.loadServersideObject()` only, no `.checkClass()` +- Client tests must include unhappy paths testing server error propagation +- Start with Batch 1 (simple coercions) + +### Authorship + +After the refactor commits for a batch have landed, add `Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands` as a new `@author` roxygen line in every R/ file touched on each branch (dsBase and dsBaseClient), matching the existing `@author` line(s) below: + +``` +#' @author +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands +``` + +Skip files that have no existing `@author` line (e.g. `R/utils.R`). Do this as a separate trailing commit per repo with message `docs: updated authorship`, not bundled with the refactor commits. + +**Only add the tag to files you actually refactored** (replaced `eval(parse())`, added `.loadServersideObject` / `.checkClass`, replaced MODULE 5, converted dispatch to `call()`, etc.). If a file in a batch's function list turns out not to need any substantive change — for example a server function whose inputs are client-transmitted literal data rather than object names (`dmtC2SDS` is one such case) — leave its author line as-is. Adding `@author Tim Cadman` to an untouched file is incorrect authorship attribution. + +## Excluded Functions + +**Deprecated (12):** ds.look, ds.meanByClass, ds.message, ds.recodeLevels, ds.setDefaultOpals, ds.subset, ds.subsetByClass, ds.table1D, ds.table2D, ds.vectorCalc, ds.listOpals, ds.listServersideFunctions + +**Already done:** ds.colnames / colnamesDS + +**Client-only (no server pair):** checkClass.R, isDefined.R, isAssigned.R, extract.R, glmChecks.R, getPooledMean.R, getPooledVar.R, helpers (meanByClassHelper*, subsetHelper, logical2int, colPercent, rowPercent) + +## Batches + +### Batch 1 — Simple Type Coercions (11 pairs) +Single input → single output, straightforward `eval(parse())` replacement. + +**TODO:** Add `classConsistencyCheck` parameter to batch-1 functions with >1 permitted class. For numeric/integer-only functions (ds.abs, ds.exp, ds.log, ds.sqrt) default to FALSE; for others (ds.asDataMatrix: data.frame/matrix, ds.asLogical: numeric/integer/character/matrix) default to TRUE. Server functions need to return class in results to support this. dsBase batch-1 is already merged — server changes need a new branch or inclusion in a later batch. + +**TODO:** `test-smk-asLogicalDS.R` in dsBase is missing a wrong-type test case (e.g. passing a list). Other `.checkClass()` functions (absDS, expDS, logDS, sqrtDS) have this test. dsBase batch-1 is merged — fix in a follow-up. + +| Client | Server | Permitted classes | Notes | +|--------|--------|-------------------|-------| +| ds.abs | absDS | numeric, integer | | +| ds.asCharacter | asCharacterDS | * | | +| ds.asDataMatrix | asDataMatrixDS | data.frame, matrix | | +| ds.asInteger | asIntegerDS | * | | +| ds.asList | asListDS | * | **AGGREGATE** (not assign); server takes 2 params (x.name, newobj) | +| ds.asLogical | asLogicalDS | * | Server has existing type validation (numeric/integer/character/matrix) — preserve as `.checkClass()` | +| ds.asMatrix | asMatrixDS | * | | +| ds.asNumeric | asNumericDS | * | Server has complex factor/character conversion logic — preserve | +| ds.exp | **NEW: expDS** | numeric, integer | No server DS function exists — client currently calls native `exp()` via `as.symbol()`. Must create `expDS.R` | +| ds.log | **NEW: logDS** | numeric, integer | No server DS function exists — client currently calls native `log()` via `as.symbol()`. Must create `logDS.R`. Has `base` parameter | +| ds.sqrt | sqrtDS | numeric, integer | | + +`*` = accept any class — only use `.loadServersideObject()`, no `.checkClass()` needed + +**Batch 1 sub-patterns discovered:** +- **Math ops (abs, exp, log, sqrt):** Client uses `checkClass()` + `isAssigned()`, no MODULE 5 +- **Type conversions (asCharacter, asDataMatrix, asInteger, asLogical, asMatrix, asNumeric):** Client uses `isDefined()` + MODULE 5 block (except asList which has neither) +- **asList is unique:** Uses `datashield.aggregate` instead of `datashield.assign` + +### Batch 2 — Simple Aggregations (10 pairs) +Return results to client, no server-side assignment. + +| Client | Server | Permitted classes | +|--------|--------|-------------------| +| ds.class | classDS | * | +| ds.dim | dimDS | data.frame, matrix | +| ds.length | lengthDS | character, factor, integer, logical, numeric, list | +| ds.names | namesDS | * | +| ds.isNA | isNaDS | character, factor, integer, logical, numeric, data.frame, matrix | +| ds.numNA | numNaDS | * | +| ds.ls | lsDS | (no object input) | +| ds.completeCases | completeCasesDS | * (no .checkClass — server handles via own branching) | +| ds.levels | levelsDS | factor | +| ds.unique | uniqueDS | * | + +**Deferred from Batch 2:** ds.isValid / isValidDS — `isValidDS` is used as an internal disclosure-control helper by `replaceNaDS` (Batch 4), `quantileMeanDS` (Batch 3), and `rowColCalcDS` (Batch 10), all passing objects directly. Cannot change `isValidDS` signature until those callers are refactored. Refactor ds.isValid/isValidDS when the last internal caller is refactored (see Batch 10 notes). + +**Batch 2 sub-patterns:** +- **Standard eval(parse()) functions (classDS, dimDS, lengthDS, namesDS, lsDS, completeCasesDS, uniqueDS):** Server uses `eval(parse(text=x), envir=parent.frame())` — replace with `.loadServersideObject()` +- **Dispatch-layer resolution functions (isNaDS, numNaDS, levelsDS):** Server receives resolved R objects via client `as.symbol()`/`call()` dispatch — change server to accept string name + `.loadServersideObject()`, change client to `call("funcDS", x)` +- **Assign functions (completeCases, unique):** Use `datashield.assign` not `datashield.aggregate` — still remove MODULE 5 / isAssigned +- **Client-side processing to preserve:** ds.dim and ds.length have `type` parameter with alias normalization and cross-study pooling; ds.isNA has per-study loop with conditional messaging; ds.ls has wildcard `*` → `_:A:_` escaping +- **Pooling functions (dimDS, lengthDS):** Return `list(dim=..., class=...)` / `list(length=..., class=...)` so client can check cross-study class consistency before pooling results + +### Batch 3 — Statistics (10 pairs) +Aggregate functions returning computed values. Some have multi-step server calls. + +| Client | Server | Notes | +|--------|--------|-------| +| ds.mean | meanDS | has disclosure controls | +| ds.var | varDS | has disclosure controls | +| ds.cor | corDS | two inputs | +| ds.corTest | corTestDS | two inputs | +| ds.cov | covDS | two inputs | +| ds.kurtosis | kurtosisDS1/DS2 | multi-step | +| ds.skewness | skewnessDS1/DS2 | multi-step | +| ds.quantileMean | quantileMeanDS | aggregate | +| ds.meanSdGp | meanSdGpDS | aggregate | +| ds.summary | (check server) | aggregate | + +### Batch 4 — Data Manipulation / Assign (15 pairs) +Create/modify server objects. Many have MODULE 5 blocks. + +| Client | Server | Notes | +|--------|--------|-------| +| ds.Boole | BooleDS | assign, MODULE 5 | +| ds.c | cDS | multi-input assign | +| ds.cbind | cbindDS | multi-input, permissive check | +| ds.rbind | rbindDS | multi-input | +| ds.dataFrame | dataFrameDS | multi-input, complex | +| ds.dataFrameSort | dataFrameSortDS | assign, MODULE 5 | +| ds.dataFrameSubset | dataFrameSubsetDS1/DS2 | multi-step | +| ds.dataFrameFill | dataFrameFillDS | assign | +| ds.list | listDS | assign | +| ds.unList | unListDS | assign | +| ds.merge | mergeDS | assign, MODULE 5 | +| ds.rep | repDS | assign | +| ds.seq | seqDS | assign | +| ds.replaceNA | replaceNaDS | assign, per-source loop | +| ds.recodeValues | recodeValuesDS | assign | + +### Batch 5 — Matrix Operations (8 pairs) + +| Client | Server | +|--------|--------| +| ds.matrix | matrixDS | +| ds.matrixDet | matrixDetDS1/DS2 | +| ds.matrixDet.report | matrixDetDS2 | +| ds.matrixDiag | matrixDiagDS | +| ds.matrixDimnames | matrixDimnamesDS | +| ds.matrixInvert | matrixInvertDS | +| ds.matrixMult | matrixMultDS | +| ds.matrixTranspose | matrixTransposeDS | + +### Batch 6 — Factor & Recoding (5 pairs) + +| Client | Server | +|--------|--------| +| ds.asFactor | asFactorDS1/DS2 | +| ds.asFactorSimple | asFactorSimpleDS | +| ds.changeRefGroup | changeRefGroupDS | +| ds.reShape | reShapeDS | +| ds.dmtC2S | dmtC2SDS | + +### Batch 7 — Modelling (8 pairs) +Most complex. Multiple server calls, complex validation logic. + +| Client | Server | +|--------|--------| +| ds.glm | glmDS1/DS2 | +| ds.glmSLMA | glmSLMADS1/DS2/assign | +| ds.glmPredict | glmPredictDS.ag/as | +| ds.glmSummary | glmSummaryDS.ag/as | +| ds.glmerSLMA | glmerSLMADS2/assign | +| ds.lmerSLMA | lmerSLMADS2/assign | +| ds.gamlss | gamlssDS | +| ds.mice | miceDS | + +### Batch 8 — Random Generation & Sampling (6 pairs) + +| Client | Server | +|--------|--------| +| ds.rBinom | rBinomDS | +| ds.rNorm | rNormDS | +| ds.rPois | rPoisDS | +| ds.rUnif | rUnifDS | +| ds.sample | sampleDS | +| ds.setSeed | setSeedDS | + +### Batch 9 — Plotting & Visualization (7 pairs) + +| Client | Server | +|--------|--------| +| ds.histogram | histogramDS1/DS2 | +| ds.heatmapPlot | heatmapPlotDS | +| ds.contourPlot | (check server name) | +| ds.densityGrid | densityGridDS | +| ds.scatterPlot | scatterPlotDS | +| ds.boxPlot | (check server) | +| ds.boxPlotGG | boxPlotGGDS | + +**Batch 9 note:** `ds.heatmapPlot`, `ds.contourPlot`, and `ds.densityGrid` call `rangeDS` which has **not** been refactored. These calls still use `as.symbol(paste0("rangeDS(", x, ")"))`. Once `rangeDS` is refactored (batch 10 or later), go back and update these three client functions to use `call("rangeDS", x=x)`. + +### Batch 10 — Splines, Tables, Misc (14 pairs) + +| Client | Server | +|--------|--------| +| ds.elspline | elsplineDS | +| ds.lspline | lsplineDS | +| ds.ns | nsDS | +| ds.qlspline | qlsplineDS | +| ds.table | tableDS/tableDS.assign/tableDS2 | +| ds.tapply | tapplyDS | +| ds.tapply.assign | tapplyDS.assign | +| ds.rowColCalc | rowColCalcDS | +| ds.make | (check server) | +| ds.assign | (check server) | +| ds.metadata | metadataDS | +| ds.getWGSR | getWGSRDS | +| ds.lexis | lexisDS1/DS2/DS3 | +| ds.hetcor | hetcorDS | + +**Batch 10 dependency:** `rowColCalcDS` calls `isValidDS(result)` internally as a disclosure check. When refactoring `rowColCalcDS`, replace this with direct disclosure logic or `.loadServersideObject()` + `.checkClass()`. Once done, also refactor `ds.isValid` / `isValidDS` (deferred from Batch 2). Similarly, `replaceNaDS` (Batch 4) and `quantileMeanDS` (Batch 3) call `isValidDS()` internally — refactor those callers first before changing `isValidDS`'s signature. + +## Known Issues + +**Batch 4:** `ds.dataFrameFill` perf test cannot run — function requires columns to differ across studies, which is hard to set up in a perf loop. + +**Batch 6:** `ds.asFactor` and `ds.changeRefGroup` perf tests fail with server-side errors. The `asFactorDS1` aggregate call errors out. `ds.changeRefGroup` may have a known pre-existing issue. Both need investigation of the batch-6 server refactoring. + +**Batch 7:** `ds.gamlss` perf test fails with server-side error. May be a batch-7 refactoring issue in `gamlssDS` or a dataset availability issue (gamlss dataset may not be configured on all Armadillo instances). + +**Batch 8:** `ds.sample` smoke test fails at the `ds.length("newobj.sample")` call — this is because the batch-2 client PR has not been merged to v7.0-dev yet, so the old `ds.length` client code cannot handle the new `list(length=..., class=...)` return from the refactored `lengthDS`. + +**Batch 9:** `rangeDS` has not been refactored, so `ds.heatmapPlot`, `ds.contourPlot`, and `ds.densityGrid` still use `as.symbol(paste0("rangeDS(", x, ")"))` for `rangeDS` calls. Once `rangeDS` is refactored, update these to use `call("rangeDS", x=x)`. + +## Per-Batch Workflow + +**Important:** dsBase and dsBaseClient are separate git repos. Changes must be committed and tested in the correct order since the client depends on the server package being installed. + +### Step 0 — Branch bootstrap + +When creating a new batch branch (in either repo) from `origin/v7.0-dev`: + +1. **dsBaseClient:** copy `R/utils.R` from the most recently refactored client branch (e.g. `origin/refactor/perf-batch-4`). `origin/v7.0-dev` on the client does not yet contain it — it only enters `v7.0-dev` once the batch-1 or batch-2 client PR merges. +2. **dsBaseClient:** copy `REFACTOR_GUIDE.md` from the same branch, and add `^REFACTOR_GUIDE\.md$` to `.Rbuildignore` if not already there. This keeps the guide alongside the code being refactored so rules added in later batches are visible to everyone. +3. **dsBase:** no bootstrap copy needed — `R/utils.R` with `.loadServersideObject` / `.checkClass` is already in `origin/v7.0-dev` (merged with batch-1). + +Commit the bootstrap separately (message: `chore: bootstrap batch-N from batch-M`) before starting the refactor work. + +### Step 1 — Server-side (dsBase repo) +1. Create feature branch from `v7.0-dev` in dsBase +2. Refactor server functions: + - Replace `eval(parse())` → `.loadServersideObject()` + - Add `.checkClass()` where the client had type guards +3. Write server-side unit tests (`test-smk-functionNameDS.R`) with happy + unhappy paths +4. Run `devtools::check(args = '--no-tests')` and `devtools::test()` in dsBase +5. Build package: `devtools::build()` + +### Step 2 — Install refactored dsBase on Armadillo +6. Ensure `inst/DATASHIELD` has `default.datashield.privacyControlLevel="permissive"` before building. **Must be the literal string `"permissive"`** — other values like `"banana"` will not work for all functions (e.g. `levelsDS` checks for `'permissive'` explicitly). +7. Build package: `devtools::build()` in dsBase +8. Copy the built tar to dsBaseClient as `dsBase_7.0.0-permissive.tar.gz` (this is the filename the CI pipeline references in `armadillo_azure-pipelines.yml`) +9. Install on local Armadillo: `armadillo.login("http://localhost:8080")` then `armadillo.install_packages(paths = "", profile = "default")` + +### Step 3 — Client-side (dsBaseClient repo) +7. Create feature branch from `v7.0-dev` in dsBaseClient +8. Ensure `R/utils.R` exists (copy from `v7.0-dev-colnames` branch if needed) +9. Refactor client functions: + - Replace datasource boilerplate → `.set_datasources()` + - Remove `isDefined()`, `checkClass()`, `isAssigned()` calls + - Remove MODULE 5 blocks + - Replace null-input checks with `.check_df_name_provided()` where applicable +10. Update/add client end-to-end tests with happy + unhappy paths +11. Run `devtools::check(args = '--no-tests')` in dsBaseClient +12. Run tests against Armadillo, **not DSLite**. Set the driver to `"ArmadilloDriver"` in `tests/testthat/connection_to_datasets/login_details.R` (default is `"DSLiteDriver"`). DSLite uses whatever dsBase is installed locally in R, which may not match the refactored version on Armadillo. Run `devtools::test(filter = "smk-|disc|arg")` for affected functions (requires refactored dsBase to be installed on Armadillo) + +### Step 4 — Verify +13. Run full test suite to check no regressions +14. Run perf tests at 30 seconds (default): `devtools::test(filter = "perf-")` +15. Compare perf results against the v7.0-dev branch baseline to detect any regressions from the refactoring + +### Step 5 — Pre-merge audit (mandatory) + +Before marking a batch complete: + +1. **Diff every touched `test-smk-*.R` / `test-arg-*.R` / `test-disc-*.R` against the branch base.** For each removed `expect_*` assertion, confirm it falls into one of: + - (a) redundant — the same behaviour is covered by another assertion still present in the same file; + - (b) replaced — a new assertion was added that covers the same server-side behaviour (e.g. `ds_expect_variables` replacing `$is.object.created`, or `ds.summary`/`ds.class` on the newobj). + + Any removed assertion that doesn't fall into (a) or (b) is a coverage loss that must be restored before merge. + +2. **Inspect every `test_that(…)` block touched by the refactor.** Each block must still contain at least one `expect_*` assertion after the refactor. Blocks stripped to just the function call are not acceptable — add `ds_expect_variables`, `expect_no_error`, or a downstream property check. + +3. **Diff every signature of every exported function touched.** Confirm no parameter was added in the middle of the signature; new parameters must be at the end (see "Adding new parameters to existing exported functions" above). + +4. **Confirm docs match signature.** `@param` blocks present for every parameter, no stale `@param` for removed arguments, `@return` not promising MODULE 5 output fields. + +5. **Grep for residual patterns that should have been removed:** `isDefined(`, `isAssigned(`, `CLIENTSIDE MODULE`, `testObjExistsDS`, `is.object.created`, `validity.check`, `studyside.messages` in source files (acceptable in test files only if the MODULE 5 pattern is being deliberately preserved with a replacement). + +## Key Files + +### Reference implementation +- Client refactored: `git show v7.0-dev-colnames:R/ds.colnames.R` +- Server refactored: `/Users/tcadman/github-repos/ds-core/dsBase/R/colnamesDS.R` +- Client utils: `git show v7.0-dev-colnames:R/utils.R` +- Server utils: `/Users/tcadman/github-repos/ds-core/dsBase/R/utils.R` +- Server tests: `/Users/tcadman/github-repos/ds-core/dsBase/tests/testthat/test-smk-colnamesDS.R` +- Client tests: `/Users/tcadman/github-repos/ds-core/dsBaseClient/tests/testthat/test-smk-ds.colnames.R` + +### Guides +- `/Users/tcadman/github-repos/ds-core/dsBaseClient/REFACTOR_GUIDE.md` +- `/Users/tcadman/github-repos/ds-core/dsBase/.github/pull_request_template` + +## Verification + +For each batch: +1. Run server-side unit tests: `cd dsBase && devtools::test(filter = "functionNameDS")` +2. Run client-side smoke tests: `cd dsBaseClient && devtools::test(filter = "smk-ds.functionName")` +3. Run `devtools::check(args = '--no-tests')` on both packages +4. Run full test suite: `devtools::test(filter = "smk-|disc|arg")` to check no regressions +5. Run perf tests: `PERF_DURATION_SEC=2 devtools::test(filter = "perf-")` to verify no performance regression + +## Follow-up: refactor client-side `checkClass` + +The client-side helper `checkClass()` currently does two jobs in one: (a) fetches the class of a server-side object and (b) checks the class is consistent across studies. After this refactor, the first job is redundant (the server returns `class` in aggregate results), but the second is still needed by composite dispatchers (`ds.summary` and similar). + +Planned cleanup (defer to a dedicated branch): + +- Rename `checkClass` → `.checkClass` to mark it internal (matches `.checkClassConsistency`, `.set_datasources`). +- Split its responsibilities: one helper that fetches class for pre-call routing, one that checks cross-study consistency on a set of classes. +- Update the remaining callers (`ds.summary` and any others still holding a client-side class pre-fetch). + +Not done as part of any single batch because the rename touches callers outside that batch's function set and would break functions not yet refactored. Schedule once all batches are merged — the rename then becomes one small, isolated commit. diff --git a/dsBase_7.0.0-permissive.tar.gz b/dsBase_7.0.0-permissive.tar.gz index ab4b862e..9d1c8dbf 100644 Binary files a/dsBase_7.0.0-permissive.tar.gz and b/dsBase_7.0.0-permissive.tar.gz differ diff --git a/man/ds.Boole.Rd b/man/ds.Boole.Rd index 46d27e4f..af451932 100644 --- a/man/ds.Boole.Rd +++ b/man/ds.Boole.Rd @@ -38,11 +38,8 @@ objects obtained after login. If the \code{datasources} argument is not specifie the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} } \value{ -\code{ds.Boole} returns the object specified by the \code{newobj} argument -which is written to the server-side. Also, two validity messages are returned -to the client-side indicating the name of the \code{newobj} which -has been created in each data source and if -it is in a valid form. +\code{ds.Boole} returns the object specified by the \code{newobj} argument +which is written to the server-side. } \description{ It compares R objects using the standard set of @@ -129,4 +126,6 @@ Server function called: \code{BooleDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.c.Rd b/man/ds.c.Rd index bc899891..c0046d46 100644 --- a/man/ds.c.Rd +++ b/man/ds.c.Rd @@ -4,7 +4,7 @@ \alias{ds.c} \title{Combines values into a vector or list in the server-side} \usage{ -ds.c(x = NULL, newobj = NULL, datasources = NULL) +ds.c(x = NULL, newobj = NULL, datasources = NULL, classConsistencyCheck = TRUE) } \arguments{ \item{x}{a vector of character string providing the names of the objects to be combined.} @@ -12,9 +12,12 @@ ds.c(x = NULL, newobj = NULL, datasources = NULL) \item{newobj}{a character string that provides the name for the output object that is stored on the data servers. Default \code{c.newobj}.} -\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} +\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} + +\item{classConsistencyCheck}{logical. If TRUE, verifies that each input object has +the same class across all studies before concatenation. Default TRUE.} } \value{ \code{ds.c} returns the vector of concatenating R @@ -71,4 +74,6 @@ Server function called: \code{cDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.cbind.Rd b/man/ds.cbind.Rd index ca20b9e8..82ddbd88 100644 --- a/man/ds.cbind.Rd +++ b/man/ds.cbind.Rd @@ -10,7 +10,8 @@ ds.cbind( force.colnames = NULL, newobj = NULL, datasources = NULL, - notify.of.progress = FALSE + notify.of.progress = FALSE, + classConsistencyCheck = TRUE ) } \arguments{ @@ -27,21 +28,21 @@ Default FALSE.} column names of the output object. If it is not NULL the user should take some caution. For more information see \strong{Details}.} -\item{newobj}{a character string that provides the name for the output variable +\item{newobj}{a character string that provides the name for the output variable that is stored on the data servers. Defaults \code{cbind.newobj}.} -\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} \item{notify.of.progress}{specifies if console output should be produced to indicate progress. Default FALSE.} + +\item{classConsistencyCheck}{logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE.} } \value{ -\code{ds.cbind} returns a data frame combining the columns of the R -objects specified in the function which is written to the server-side. -It also returns to the client-side two messages with the name of \code{newobj} -that has been created in each data source and \code{DataSHIELD.checks} result. +\code{ds.cbind} returns a data frame combining the columns of the R +objects specified in the function which is written to the server-side. } \description{ Takes a sequence of vector, matrix or data-frame arguments @@ -141,4 +142,6 @@ Server function called: \code{cbindDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.dataFrame.Rd b/man/ds.dataFrame.Rd index 5c7d82d7..918a5044 100644 --- a/man/ds.dataFrame.Rd +++ b/man/ds.dataFrame.Rd @@ -14,7 +14,8 @@ ds.dataFrame( DataSHIELD.checks = FALSE, newobj = NULL, datasources = NULL, - notify.of.progress = FALSE + notify.of.progress = FALSE, + classConsistencyCheck = TRUE ) } \arguments{ @@ -48,18 +49,18 @@ are the same} \item{newobj}{a character string that provides the name for the output data frame that is stored on the data servers. Default \code{dataframe.newobj}.} -\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} \item{notify.of.progress}{specifies if console output should be produced to indicate progress. Default is FALSE.} + +\item{classConsistencyCheck}{logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE.} } \value{ \code{ds.dataFrame} returns the object specified by the \code{newobj} argument -which is written to the serverside. Also, two validity messages are returned to the -client-side indicating the name of the \code{newobj} that has been created in each data source -and if it is in a valid form. +which is written to the server-side. } \description{ Creates a data frame from its elemental components: @@ -124,4 +125,6 @@ Server functions called: \code{classDS}, \code{colnamesDS}, \code{dataFrameDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.dataFrameFill.Rd b/man/ds.dataFrameFill.Rd index 44eef9e5..8c18d805 100644 --- a/man/ds.dataFrameFill.Rd +++ b/man/ds.dataFrameFill.Rd @@ -4,7 +4,12 @@ \alias{ds.dataFrameFill} \title{Creates missing values columns in the server-side} \usage{ -ds.dataFrameFill(df.name = NULL, newobj = NULL, datasources = NULL) +ds.dataFrameFill( + df.name = NULL, + newobj = NULL, + datasources = NULL, + classConsistencyCheck = TRUE +) } \arguments{ \item{df.name}{a character string representing the name of the input data frame that will be @@ -13,9 +18,11 @@ filled with extra columns of missing values.} \item{newobj}{a character string that provides the name for the output data frame that is stored on the data servers. Default value is "dataframefill.newobj".} -\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} + +\item{classConsistencyCheck}{logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE.} } \value{ \code{ds.dataFrameFill} returns the object specified by the \code{newobj} argument which @@ -89,4 +96,6 @@ Server function called: \code{dataFrameFillDS} } \author{ Demetris Avraam for DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.dataFrameSort.Rd b/man/ds.dataFrameSort.Rd index 252227e5..6a7f2e9b 100644 --- a/man/ds.dataFrameSort.Rd +++ b/man/ds.dataFrameSort.Rd @@ -108,4 +108,6 @@ Server function called: \code{dataFrameSortDS}. } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.dataFrameSubset.Rd b/man/ds.dataFrameSubset.Rd index 4d2afc18..36c9eb23 100644 --- a/man/ds.dataFrameSubset.Rd +++ b/man/ds.dataFrameSubset.Rd @@ -141,4 +141,6 @@ Server functions called: \code{dataFrameSubsetDS1} and \code{dataFrameSubsetDS2} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.list.Rd b/man/ds.list.Rd index d6ebd00e..424d1023 100644 --- a/man/ds.list.Rd +++ b/man/ds.list.Rd @@ -4,7 +4,12 @@ \alias{ds.list} \title{Constructs a list of objects in the server-side} \usage{ -ds.list(x = NULL, newobj = NULL, datasources = NULL) +ds.list( + x = NULL, + newobj = NULL, + datasources = NULL, + classConsistencyCheck = TRUE +) } \arguments{ \item{x}{a character string specifying the names of the objects to coerce into a list.} @@ -15,6 +20,9 @@ that is stored on the data servers. Default \code{list.newobj}.} \item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} + +\item{classConsistencyCheck}{logical. If TRUE, verifies that each input object has +the same class across all studies before coercion. Default TRUE.} } \value{ \code{ds.list} returns a list of objects for each study that is stored on the server-side. @@ -68,4 +76,6 @@ Server function called: \code{listDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.merge.Rd b/man/ds.merge.Rd index d6e18558..51533260 100644 --- a/man/ds.merge.Rd +++ b/man/ds.merge.Rd @@ -152,4 +152,6 @@ Server function called: \code{mergeDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.rbind.Rd b/man/ds.rbind.Rd index 75c2a03f..48315a37 100644 --- a/man/ds.rbind.Rd +++ b/man/ds.rbind.Rd @@ -10,7 +10,8 @@ ds.rbind( force.colnames = NULL, newobj = NULL, datasources = NULL, - notify.of.progress = FALSE + notify.of.progress = FALSE, + classConsistencyCheck = TRUE ) } \arguments{ @@ -22,7 +23,7 @@ input objects exist and are of an appropriate class.} \item{force.colnames}{can be NULL or a vector of characters that specifies column names of the output object.} -\item{newobj}{a character string that provides the name for the output variable +\item{newobj}{a character string that provides the name for the output variable that is stored on the data servers. Defaults \code{rbind.newobj}.} \item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. @@ -31,13 +32,13 @@ the default set of connections will be used: see \code{\link[DSI]{datashield.con \item{notify.of.progress}{specifies if console output should be produced to indicate progress. Default FALSE.} + +\item{classConsistencyCheck}{logical. If TRUE, verifies that each input object has the same class across all studies. Default TRUE.} } \value{ -\code{ds.rbind} returns a matrix combining the rows of the +\code{ds.rbind} returns a matrix combining the rows of the R objects specified in the function -which is written to the server-side. -It also returns two messages to the client-side with the name of \code{newobj} -that has been created in each data source and \code{DataSHIELD.checks} result. +which is written to the server-side. } \description{ It takes a sequence of vector, matrix or data-frame arguments @@ -105,4 +106,6 @@ Server functions called: \code{rbindDS}. } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.recodeValues.Rd b/man/ds.recodeValues.Rd index 6b775bc9..69087634 100644 --- a/man/ds.recodeValues.Rd +++ b/man/ds.recodeValues.Rd @@ -107,4 +107,6 @@ Server function called: \code{recodeValuesDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.rep.Rd b/man/ds.rep.Rd index b552ff43..9fb44b51 100644 --- a/man/ds.rep.Rd +++ b/man/ds.rep.Rd @@ -128,4 +128,6 @@ Server function called: \code{repDS}. } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.replaceNA.Rd b/man/ds.replaceNA.Rd index 3b8a4ec0..f73a3ab5 100644 --- a/man/ds.replaceNA.Rd +++ b/man/ds.replaceNA.Rd @@ -107,4 +107,6 @@ Server function called: \code{replaceNaDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.seq.Rd b/man/ds.seq.Rd index e8b7365f..e34edfb2 100644 --- a/man/ds.seq.Rd +++ b/man/ds.seq.Rd @@ -146,4 +146,6 @@ Server function called: \code{seqDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.unList.Rd b/man/ds.unList.Rd index cb8ef0a1..658a6865 100644 --- a/man/ds.unList.Rd +++ b/man/ds.unList.Rd @@ -88,4 +88,6 @@ Server function called: \code{unListDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/tests/testthat/test-arg-ds.dataFrameFill.R b/tests/testthat/test-arg-ds.dataFrameFill.R index 2eb77e4f..2862145c 100644 --- a/tests/testthat/test-arg-ds.dataFrameFill.R +++ b/tests/testthat/test-arg-ds.dataFrameFill.R @@ -22,7 +22,7 @@ connect.studies.dataset.cnsim(list("LAB_TSC")) # context("ds.dataFrameFill::arg::test errors") test_that("dataFrameFill_erros", { expect_error(ds.dataFrameFill(), "Please provide the name of the data.frame to be filled as a character string: eg 'xxx'", fixed=TRUE) - expect_error(ds.dataFrameFill("NonDF"), "The input object NonDF is not defined in sim1, sim2, sim3!", fixed=TRUE) + expect_error(ds.dataFrameFill("NonDF"), "DataSHIELD errors") expect_error(ds.dataFrameFill("D$LAB_TSC"), "The input vector must be of type 'data.frame' or a 'matrix'!", fixed=TRUE) }) diff --git a/tests/testthat/test-arg-ds.dataFrameSubset.R b/tests/testthat/test-arg-ds.dataFrameSubset.R index 53ec07e3..0799acf9 100644 --- a/tests/testthat/test-arg-ds.dataFrameSubset.R +++ b/tests/testthat/test-arg-ds.dataFrameSubset.R @@ -53,9 +53,9 @@ test_that("dataFrameSubset_erros", { res.errors <- DSI::datashield.errors() expect_length(res.errors, 3) - expect_match(res.errors$sim1, "* object 'M' not found*") - expect_match(res.errors$sim2, "* object 'M' not found*") - expect_match(res.errors$sim3, "* object 'M' not found*") + expect_match(res.errors$sim1, "The server-side object 'M' does not exist") + expect_match(res.errors$sim2, "The server-side object 'M' does not exist") + expect_match(res.errors$sim3, "The server-side object 'M' does not exist") }) # context("ds.dataFrameSubset::arg::missing 'V1' value server-side") @@ -65,9 +65,9 @@ test_that("dataFrameSubset_erros", { res.errors <- DSI::datashield.errors() expect_length(res.errors, 3) - expect_match(res.errors$sim1, "* Error in eval\\(parse\\(text = V1.name\\), envir = parent.frame\\(\\)\\) : \\n object 'A' not found*") - expect_match(res.errors$sim2, "* Error in eval\\(parse\\(text = V1.name\\), envir = parent.frame\\(\\)\\) : \\n object 'A' not found*") - expect_match(res.errors$sim3, "* Error in eval\\(parse\\(text = V1.name\\), envir = parent.frame\\(\\)\\) : \\n object 'A' not found*") + expect_match(res.errors$sim1, "The server-side object 'A' does not exist") + expect_match(res.errors$sim2, "The server-side object 'A' does not exist") + expect_match(res.errors$sim3, "The server-side object 'A' does not exist") }) # context("ds.dataFrameSubset::arg::missing 'V2' value server-side") @@ -77,9 +77,9 @@ test_that("dataFrameSubset_erros", { res.errors <- DSI::datashield.errors() expect_length(res.errors, 3) - expect_match(res.errors$sim1, "* Error in eval\\(parse\\(text = V2.name\\), envir = parent.frame\\(\\)\\) : \\n object 'B' not found*") - expect_match(res.errors$sim2, "* Error in eval\\(parse\\(text = V2.name\\), envir = parent.frame\\(\\)\\) : \\n object 'B' not found*") - expect_match(res.errors$sim3, "* Error in eval\\(parse\\(text = V2.name\\), envir = parent.frame\\(\\)\\) : \\n object 'B' not found*") + expect_match(res.errors$sim1, "The server-side object 'B' does not exist") + expect_match(res.errors$sim2, "The server-side object 'B' does not exist") + expect_match(res.errors$sim3, "The server-side object 'B' does not exist") }) # context("ds.dataFrameSubset::arg::invalid 'Boolean.operator' value server-side") diff --git a/tests/testthat/test-arg-ds.merge.R b/tests/testthat/test-arg-ds.merge.R index 4bb0bb8b..03a6967a 100644 --- a/tests/testthat/test-arg-ds.merge.R +++ b/tests/testthat/test-arg-ds.merge.R @@ -23,7 +23,7 @@ connect.studies.dataset.cnsim(list("LAB_TSC")) test_that("merge_erros", { expect_error(ds.merge(), "Please provide the name (eg 'name1') of first dataframe to be merged (called x) ", fixed=TRUE) expect_error(ds.merge(x.name="A"), "Please provide the name (eg 'name2') of second dataframe to be merged (called y) ", fixed=TRUE) - expect_error(ds.merge(x.name="D$LAB_TSC", y.name="B"), "The input object B is not defined in sim1, sim2, sim3!", fixed=TRUE) + expect_error(ds.merge(x.name="D$LAB_TSC", y.name="B", by.x.names="LAB_TSC", by.y.names="LAB_TSC"), "DataSHIELD errors") expect_error(ds.merge(x.name="D$LAB_TSC", y.name="D$LAB_TSC", by.x.names="C"), "Please provide the names of columns in y dataframe on which to merge (eg c('id', 'time'))", fixed=TRUE) expect_error(ds.merge(x.name="D$LAB_TSC", y.name="D$LAB_TSC", by.x.names="D$LAB_TSC", by.y.names="D", suffixes=NULL), "Please provide the suffixes to append to disambiguate duplicate column names (default = c('.x','.y'))", fixed=TRUE) }) diff --git a/tests/testthat/test-arg-ds.seq.R b/tests/testthat/test-arg-ds.seq.R index 3d047e7e..12714769 100644 --- a/tests/testthat/test-arg-ds.seq.R +++ b/tests/testthat/test-arg-ds.seq.R @@ -21,7 +21,7 @@ connect.studies.dataset.cnsim(list("LAB_TSC")) # context("ds.seq::arg::test errors") test_that("seq_erros", { - expect_error(ds.seq(FROM.value.char="Test"), "object 'Test' not found", fixed=TRUE) + expect_error(ds.seq(FROM.value.char="Test"), "'Test'") }) # diff --git a/tests/testthat/test-perf-ds.Boole.R b/tests/testthat/test-perf-ds.Boole.R new file mode 100644 index 00000000..5e8d198c --- /dev/null +++ b/tests/testthat/test-perf-ds.Boole.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.Boole::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.Boole::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.Boole("D$LAB_TSC", "D$LAB_TRIG", Boolean.operator="==", newobj="boole.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.Boole::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.Boole::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.Boole::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.Boole::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.Boole::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.Boole::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.Boole::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.Boole::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.Boole::perf::done") diff --git a/tests/testthat/test-perf-ds.c.R b/tests/testthat/test-perf-ds.c.R new file mode 100644 index 00000000..6ab2964d --- /dev/null +++ b/tests/testthat/test-perf-ds.c.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.c::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC")) + +# +# Tests +# + +# context("ds.c::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.c(x=c("D$LAB_TSC", "D$LAB_TRIG"), newobj="c.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.c::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.c::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.c::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.c::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.c::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.c::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.c::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.c::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.c::perf::done") diff --git a/tests/testthat/test-perf-ds.cbind.R b/tests/testthat/test-perf-ds.cbind.R new file mode 100644 index 00000000..897fdb59 --- /dev/null +++ b/tests/testthat/test-perf-ds.cbind.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.cbind::perf::setup") +connect.studies.dataset.survival(list("survtime", "time.id", "female", "age.60")) + +# +# Tests +# + +# context("ds.cbind::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.cbind(x=c("D$LAB_TSC", "D$LAB_TRIG"), newobj="cbind.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.cbind::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.cbind::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.cbind::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.cbind::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.cbind::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.cbind::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.cbind::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.cbind::perf::shutdown") +disconnect.studies.dataset.survival() +# context("ds.cbind::perf::done") diff --git a/tests/testthat/test-perf-ds.dataFrame.R b/tests/testthat/test-perf-ds.dataFrame.R new file mode 100644 index 00000000..7db2ae87 --- /dev/null +++ b/tests/testthat/test-perf-ds.dataFrame.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.dataFrame::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_HDL")) + +# +# Tests +# + +# context("ds.dataFrame::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.dataFrame(x=c("D$LAB_TSC", "D$LAB_TRIG"), newobj="df.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.dataFrame::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.dataFrame::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.dataFrame::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.dataFrame::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.dataFrame::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.dataFrame::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.dataFrame::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.dataFrame::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.dataFrame::perf::done") diff --git a/tests/testthat/test-perf-ds.dataFrameFill.R b/tests/testthat/test-perf-ds.dataFrameFill.R new file mode 100644 index 00000000..c12bcac4 --- /dev/null +++ b/tests/testthat/test-perf-ds.dataFrameFill.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.dataFrameFill::perf::setup") + + +# +# Tests +# + +# context("ds.dataFrameFill::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.dataFrameFill("D", newobj="fill.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.dataFrameFill::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.dataFrameFill::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.dataFrameFill::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.dataFrameFill::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.dataFrameFill::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.dataFrameFill::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.dataFrameFill::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.dataFrameFill::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.dataFrameFill::perf::done") diff --git a/tests/testthat/test-perf-ds.dataFrameSort.R b/tests/testthat/test-perf-ds.dataFrameSort.R new file mode 100644 index 00000000..9f8a4775 --- /dev/null +++ b/tests/testthat/test-perf-ds.dataFrameSort.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.dataFrameSort::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_HDL")) + +# +# Tests +# + +# context("ds.dataFrameSort::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.dataFrameSort(df.name="D", sort.key.name="D$LAB_TSC", newobj="sorted.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.dataFrameSort::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.dataFrameSort::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.dataFrameSort::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.dataFrameSort::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.dataFrameSort::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.dataFrameSort::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.dataFrameSort::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.dataFrameSort::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.dataFrameSort::perf::done") diff --git a/tests/testthat/test-perf-ds.dataFrameSubset.R b/tests/testthat/test-perf-ds.dataFrameSubset.R new file mode 100644 index 00000000..90be64b5 --- /dev/null +++ b/tests/testthat/test-perf-ds.dataFrameSubset.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.dataFrameSubset::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_HDL", "PM_BMI_CATEGORICAL")) + +# +# Tests +# + +# context("ds.dataFrameSubset::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.dataFrameSubset(df.name="D", V1.name="D$LAB_TSC", V2.name="D$LAB_TRIG", Boolean.operator=">=", newobj="subset.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.dataFrameSubset::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.dataFrameSubset::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.dataFrameSubset::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.dataFrameSubset::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.dataFrameSubset::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.dataFrameSubset::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.dataFrameSubset::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.dataFrameSubset::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.dataFrameSubset::perf::done") diff --git a/tests/testthat/test-perf-ds.list.R b/tests/testthat/test-perf-ds.list.R new file mode 100644 index 00000000..5b6fa417 --- /dev/null +++ b/tests/testthat/test-perf-ds.list.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.list::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_HDL")) + +# +# Tests +# + +# context("ds.list::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.list("D$LAB_TSC", newobj="list.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.list::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.list::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.list::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.list::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.list::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.list::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.list::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.list::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.list::perf::done") diff --git a/tests/testthat/test-perf-ds.merge.R b/tests/testthat/test-perf-ds.merge.R new file mode 100644 index 00000000..c0f77437 --- /dev/null +++ b/tests/testthat/test-perf-ds.merge.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.merge::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG", "LAB_HDL", "DIS_CVA", "DIS_AMI")) + +# +# Tests +# + +# context("ds.merge::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.merge(x.name="D", y.name="D", by.x.names="LAB_TSC", by.y.names="LAB_TSC", newobj="merge.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.merge::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.merge::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.merge::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.merge::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.merge::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.merge::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.merge::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.merge::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.merge::perf::done") diff --git a/tests/testthat/test-perf-ds.rbind.R b/tests/testthat/test-perf-ds.rbind.R new file mode 100644 index 00000000..9ad0f843 --- /dev/null +++ b/tests/testthat/test-perf-ds.rbind.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.rbind::perf::setup") +connect.studies.dataset.survival(list("survtime", "time.id", "female", "age.60")) + +# +# Tests +# + +# context("ds.rbind::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.rbind(x=c("D", "D"), newobj="rbind.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.rbind::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.rbind::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.rbind::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.rbind::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.rbind::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.rbind::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.rbind::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.rbind::perf::shutdown") +disconnect.studies.dataset.survival() +# context("ds.rbind::perf::done") diff --git a/tests/testthat/test-perf-ds.recodeValues.R b/tests/testthat/test-perf-ds.recodeValues.R new file mode 100644 index 00000000..0eaf36c4 --- /dev/null +++ b/tests/testthat/test-perf-ds.recodeValues.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.recodeValues::perf::setup") +connect.studies.dataset.survival(list("survtime", "time.id", "female", "age.60")) + +# +# Tests +# + +# context("ds.recodeValues::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.recodeValues(var.name="D$LAB_TSC", values2replace.vector=0, new.values.vector=NA, newobj="recode.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.recodeValues::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.recodeValues::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.recodeValues::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.recodeValues::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.recodeValues::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.recodeValues::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.recodeValues::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.recodeValues::perf::shutdown") +disconnect.studies.dataset.survival() +# context("ds.recodeValues::perf::done") diff --git a/tests/testthat/test-perf-ds.rep.R b/tests/testthat/test-perf-ds.rep.R new file mode 100644 index 00000000..bb1108ff --- /dev/null +++ b/tests/testthat/test-perf-ds.rep.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.rep::perf::setup") +connect.studies.dataset.survival(list("survtime", "time.id", "female")) + +# +# Tests +# + +# context("ds.rep::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.rep(x1=5, times=10, newobj="rep.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.rep::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.rep::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.rep::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.rep::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.rep::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.rep::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.rep::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.rep::perf::shutdown") +disconnect.studies.dataset.survival() +# context("ds.rep::perf::done") diff --git a/tests/testthat/test-perf-ds.replaceNA.R b/tests/testthat/test-perf-ds.replaceNA.R new file mode 100644 index 00000000..e6935222 --- /dev/null +++ b/tests/testthat/test-perf-ds.replaceNA.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.replaceNA::perf::setup") +connect.studies.dataset.cnsim(list("LAB_HDL", "LAB_TRIG", "DIS_CVA")) + +# +# Tests +# + +# context("ds.replaceNA::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.replaceNA(x="D$LAB_TSC", forNA=c(999, 999, 999), newobj="replace.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.replaceNA::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.replaceNA::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.replaceNA::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.replaceNA::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.replaceNA::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.replaceNA::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.replaceNA::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.replaceNA::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.replaceNA::perf::done") diff --git a/tests/testthat/test-perf-ds.seq.R b/tests/testthat/test-perf-ds.seq.R new file mode 100644 index 00000000..d1285857 --- /dev/null +++ b/tests/testthat/test-perf-ds.seq.R @@ -0,0 +1,50 @@ + +# +# Set up +# + +# context("ds.seq::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC")) + +# +# Tests +# + +# context("ds.seq::perf::0") +test_that("performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.seq(FROM.value.char="1", BY.value.char="1", LENGTH.OUT.value.char="10", newobj="seq.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.seq::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.seq::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.seq::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.seq::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.seq::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.seq::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.seq::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.seq::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.seq::perf::done") diff --git a/tests/testthat/test-perf-ds.unList.R b/tests/testthat/test-perf-ds.unList.R new file mode 100644 index 00000000..2e0e47fd --- /dev/null +++ b/tests/testthat/test-perf-ds.unList.R @@ -0,0 +1,51 @@ + +# +# Set up +# + +# context("ds.unList::perf::setup") +connect.studies.dataset.cnsim(list("GENDER")) + +# +# Tests +# + +# context("ds.unList::perf::0") +test_that("performance", { + ds.asList("D$LAB_TSC", newobj="list.newobj") + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.unList(x.name="list.newobj", newobj="unlist.newobj") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.unList::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.unList::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.unList::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.unList::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.unList::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.unList::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.unList::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.unList::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.unList::perf::done") diff --git a/tests/testthat/test-smk-ds.Boole.R b/tests/testthat/test-smk-ds.Boole.R index 56983c6e..c5bbd76c 100644 --- a/tests/testthat/test-smk-ds.Boole.R +++ b/tests/testthat/test-smk-ds.Boole.R @@ -29,9 +29,6 @@ test_that("setup", { test_that("simple boole, variable", { res <- ds.Boole("D$LAB_TSC", "D$LAB_TRIG", "==") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res.boole <- ds.summary("boole.newobj") @@ -78,9 +75,6 @@ test_that("simple boole, variable", { test_that("simple boole, small neg constant V2", { res <- ds.Boole("D$LAB_TRIG", "-1", "<", numeric.output = TRUE, newobj = "boole01.obj") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res.boole <- ds.summary("boole01.obj") @@ -126,9 +120,6 @@ test_that("simple boole, small neg constant V2", { test_that("simple boole, big neg constant V2", { res <- ds.Boole("D$LAB_TRIG", "-10", "<", numeric.output = TRUE, newobj = "boole02.obj") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res.boole <- ds.summary("boole02.obj") diff --git a/tests/testthat/test-smk-ds.cbind.R b/tests/testthat/test-smk-ds.cbind.R index 720d5d94..b3aac867 100644 --- a/tests/testthat/test-smk-ds.cbind.R +++ b/tests/testthat/test-smk-ds.cbind.R @@ -29,8 +29,6 @@ test_that("setup", { test_that("simple test, from dataframe variables", { res <- ds.cbind(c("D$survtime", "D$time.id", "D$female", "D$age.60"), newobj="cbind1_newobj") - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res1 <- ds.class("cbind1_newobj") expect_true("data.frame" %in% res1$survival1) @@ -82,8 +80,6 @@ test_that("simple test, from root variables", { res <- ds.cbind(c("survtime", "time.id", "female", "age.60"), newobj="cbind2_newobj") - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.rm("survtime") ds.rm("time.id") @@ -135,8 +131,6 @@ test_that("simple test, from root variables", { test_that("simple test, from dataframe variables, DataSHIELD.check=TRUE", { res <- ds.cbind(c("D$survtime", "D$time.id", "D$female", "D$age.60"), DataSHIELD.check=TRUE, newobj="cbind3_newobj") - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res1 <- ds.class("cbind3_newobj") expect_true("data.frame" %in% res1$survival1) @@ -188,8 +182,6 @@ test_that("simple test, from root variables, DataSHIELD.check=TRUE", { res <- ds.cbind(c("survtime", "time.id", "female", "age.60"), DataSHIELD.check=TRUE, newobj="cbind4_newobj") - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.rm("survtime") ds.rm("time.id") diff --git a/tests/testthat/test-smk-ds.dataFrame.R b/tests/testthat/test-smk-ds.dataFrame.R index 4740efd0..8bfb2d7c 100644 --- a/tests/testthat/test-smk-ds.dataFrame.R +++ b/tests/testthat/test-smk-ds.dataFrame.R @@ -33,9 +33,6 @@ test_that("dataframe_exists", { res <- ds.dataFrame(x=vectors) - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.rm("LAB_TSC") ds.rm("LAB_HDL") @@ -77,9 +74,6 @@ test_that("dataframe_exists, with DataSHIELD.checks", { res <- ds.dataFrame(x=vectors, DataSHIELD.checks=TRUE, newobj="dataframe1") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.rm("LAB_TSC") ds.rm("LAB_HDL") @@ -113,9 +107,6 @@ test_that("dataframe_exists, from dataframe variables", { vectors <- c('D$LAB_TSC', 'D$LAB_HDL') res <- ds.dataFrame(x=vectors) - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res.ls <- ds.ls() expect_length(res.ls, 3) @@ -151,9 +142,6 @@ test_that("dataframe_exists, with DataSHIELD.checks, from dataframe variables", vectors <- c('D$LAB_TSC', 'D$LAB_HDL') res <- ds.dataFrame(x=vectors, DataSHIELD.checks=TRUE, newobj="dataframe1") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") res.ls <- ds.ls() expect_length(res.ls, 3) diff --git a/tests/testthat/test-smk-ds.dataFrameFill-factor.R b/tests/testthat/test-smk-ds.dataFrameFill-factor.R index bc5464dc..7d2c9510 100644 --- a/tests/testthat/test-smk-ds.dataFrameFill-factor.R +++ b/tests/testthat/test-smk-ds.dataFrameFill-factor.R @@ -70,9 +70,6 @@ test_that("setup", { test_that("dataFrameFill_exists", { res <- ds.dataFrameFill(df.name="DD", newobj="filled_df") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") colnamesFilled <- ds.colnames('filled_df') diff --git a/tests/testthat/test-smk-ds.dataFrameFill.R b/tests/testthat/test-smk-ds.dataFrameFill.R index 74bc1c1e..c05064fa 100644 --- a/tests/testthat/test-smk-ds.dataFrameFill.R +++ b/tests/testthat/test-smk-ds.dataFrameFill.R @@ -42,9 +42,6 @@ test_that("dataFrameFill_exists", { res <- ds.dataFrameFill(df.name="D", newobj="filled_df") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") colnamesFilled <- ds.colnames('filled_df') diff --git a/tests/testthat/test-smk-ds.dataFrameSort.R b/tests/testthat/test-smk-ds.dataFrameSort.R index 805c2b2d..73ca599b 100644 --- a/tests/testthat/test-smk-ds.dataFrameSort.R +++ b/tests/testthat/test-smk-ds.dataFrameSort.R @@ -30,18 +30,9 @@ test_that("dataFrameSort_exists", { myvectors <- c('D$LAB_TSC', 'D$LAB_HDL') ds.dataFrame(x=myvectors, newobj="unsorted_df") - res <- ds.dataFrameSort(df.name="unsorted_df", sort.key.name="D$LAB_TSC", newobj="sorted_df") - - expect_length(res, 3) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") - expect_length(res$studyside.messages, 3) - expect_length(res$studyside.messages$sim1, 1) - expect_equal(res$studyside.messages$sim1, "ALL OK: there are no studysideMessage(s) on this datasource") - expect_length(res$studyside.messages$sim2, 1) - expect_equal(res$studyside.messages$sim2, "ALL OK: there are no studysideMessage(s) on this datasource") - expect_length(res$studyside.messages$sim3, 1) - expect_equal(res$studyside.messages$sim3, "ALL OK: there are no studysideMessage(s) on this datasource") + ds.dataFrameSort(df.name="unsorted_df", sort.key.name="D$LAB_TSC", newobj="sorted_df") + + ds_expect_variables(c("D", "unsorted_df", "sorted_df")) }) # diff --git a/tests/testthat/test-smk-ds.dataFrameSubset.R b/tests/testthat/test-smk-ds.dataFrameSubset.R index 0d3fbe68..b5f60145 100644 --- a/tests/testthat/test-smk-ds.dataFrameSubset.R +++ b/tests/testthat/test-smk-ds.dataFrameSubset.R @@ -56,9 +56,6 @@ test_that("dataFrameSubset_exists", { expect_equal(res.dim2$`dimensions of subset_df in sim3`[1], 3472) expect_equal(res.dim2$`dimensions of subset_df in sim3`[2], 2) - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") }) # context("ds.dataFrameSubset::smk::create a subset dataframe, based on scalar") @@ -92,9 +89,6 @@ test_that("dataFrameSubset_exists scalar", { expect_equal(res.dim2$`dimensions of subset_df in sim3`[1], 2769) expect_equal(res.dim2$`dimensions of subset_df in sim3`[2], 3) - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") }) # diff --git a/tests/testthat/test-smk-ds.merge.R b/tests/testthat/test-smk-ds.merge.R index fb58dcf5..31b6c806 100644 --- a/tests/testthat/test-smk-ds.merge.R +++ b/tests/testthat/test-smk-ds.merge.R @@ -32,11 +32,9 @@ test_that("simple test", { ds.dataFrame(x=spec_vectors_1, newobj="test_1_df") ds.dataFrame(x=spec_vectors_2, newobj="test_2_df") - res <- ds.merge(x.name="test_1_df", y.name="test_2_df", by.x.names="LAB_TSC", by.y.names="LAB_TSC", newobj="merge_newobj") + ds.merge(x.name="test_1_df", y.name="test_2_df", by.x.names="LAB_TSC", by.y.names="LAB_TSC", newobj="merge_newobj") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") + ds_expect_variables(c("D", "test_1_df", "test_2_df", "merge_newobj")) class.res <- ds.class("merge_newobj") diff --git a/tests/testthat/test-smk-ds.rbind.R b/tests/testthat/test-smk-ds.rbind.R index af01b50e..627b5ffa 100644 --- a/tests/testthat/test-smk-ds.rbind.R +++ b/tests/testthat/test-smk-ds.rbind.R @@ -27,10 +27,9 @@ test_that("setup", { # context("ds.rbind::smk") test_that("simple test", { - res <- ds.rbind(c("D$survtime", "D$time.id", "D$female", "D$age.60"), newobj="rbind_newobj") + ds.rbind(c("D$survtime", "D$time.id", "D$female", "D$age.60"), newobj="rbind_newobj") - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") + ds_expect_variables(c("D", "rbind_newobj")) res1 <- ds.class("rbind_newobj") diff --git a/tests/testthat/test-smk-ds.recodeValues.R b/tests/testthat/test-smk-ds.recodeValues.R index a20d4db4..a9ebd893 100644 --- a/tests/testthat/test-smk-ds.recodeValues.R +++ b/tests/testthat/test-smk-ds.recodeValues.R @@ -27,11 +27,9 @@ test_that("setup", { # context("ds.recodeValues::smk") test_that("simple test", { - res <- ds.recodeValues("D$survtime", values2replace.vector=c(0,1), new.values.vector=c(-10,10), newobj="recodevalues_newobj") + ds.recodeValues("D$survtime", values2replace.vector=c(0,1), new.values.vector=c(-10,10), newobj="recodevalues_newobj") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") + ds_expect_variables(c("D", "recodevalues_newobj")) new.res <- ds.class("recodevalues_newobj") diff --git a/tests/testthat/test-smk-ds.rep-complex.R b/tests/testthat/test-smk-ds.rep-complex.R index 7d86a826..83000f50 100644 --- a/tests/testthat/test-smk-ds.rep-complex.R +++ b/tests/testthat/test-smk-ds.rep-complex.R @@ -28,17 +28,13 @@ test_that("setup", { # context("ds.rep::smk::complex") test_that("complex test", { - res1 <- ds.rep(x1 = 4, times = 6, length.out = NA, each = 1, source.x1 = "clientside", source.times = "c", source.length.out = NULL, source.each = "c", x1.includes.characters = FALSE, newobj = "rep1.seq") - - expect_length(res1, 2) - expect_equal(res1$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res1$validity.check, " appears valid in all sources") + ds.rep(x1 = 4, times = 6, length.out = NA, each = 1, source.x1 = "clientside", source.times = "c", source.length.out = NULL, source.each = "c", x1.includes.characters = FALSE, newobj = "rep1.seq") - res2 <- ds.rep(x1 = "lung", times = 6, length.out = 7, each = 1, source.x1 = "clientside", source.times = "c", source.length.out = "c", source.each = "c", x1.includes.characters = TRUE, newobj = "rep2.seq") + ds_expect_variables(c("D", "rep1.seq")) - expect_length(res2, 2) - expect_equal(res2$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res2$validity.check, " appears valid in all sources") + ds.rep(x1 = "lung", times = 6, length.out = 7, each = 1, source.x1 = "clientside", source.times = "c", source.length.out = "c", source.each = "c", x1.includes.characters = TRUE, newobj = "rep2.seq") + + ds_expect_variables(c("D", "rep1.seq", "rep2.seq")) }) # diff --git a/tests/testthat/test-smk-ds.rep.R b/tests/testthat/test-smk-ds.rep.R index e3819cc7..076619b2 100644 --- a/tests/testthat/test-smk-ds.rep.R +++ b/tests/testthat/test-smk-ds.rep.R @@ -27,11 +27,9 @@ test_that("setup", { # context("ds.rep::smk") test_that("simple test", { - res <- ds.rep("D$survtime") + ds.rep("D$survtime") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") + ds_expect_variables(c("D", "seq.vect")) }) # diff --git a/tests/testthat/test-smk-ds.seq.R b/tests/testthat/test-smk-ds.seq.R index 7da50458..e0fb65a0 100644 --- a/tests/testthat/test-smk-ds.seq.R +++ b/tests/testthat/test-smk-ds.seq.R @@ -27,19 +27,15 @@ test_that("setup", { # context("ds.seq::smk") test_that("simplest ds.seq", { - seq.res <- ds.seq(FROM.value.char="1", BY.value.char="1", LENGTH.OUT.value.char="10", ALONG.WITH.name=NULL, newobj="obj1") + ds.seq(FROM.value.char="1", BY.value.char="1", LENGTH.OUT.value.char="10", ALONG.WITH.name=NULL, newobj="obj1") - expect_true(length(seq.res) == 2) - expect_equal(seq.res[[1]], "A data object has been created in all specified data sources") - expect_equal(seq.res[[2]], " appears valid in all sources") + ds_expect_variables(c("D", "obj1")) }) test_that("simplest ds.seq", { - seq.res <- ds.seq(FROM.value.char="1", BY.value.char="1", LENGTH.OUT.value.char=NULL, ALONG.WITH.name="D$LAB_TSC", newobj="obj2") + ds.seq(FROM.value.char="1", BY.value.char="1", LENGTH.OUT.value.char=NULL, ALONG.WITH.name="D$LAB_TSC", newobj="obj2") - expect_true(length(seq.res) == 2) - expect_equal(seq.res[[1]], "A data object has been created in all specified data sources") - expect_equal(seq.res[[2]], " appears valid in all sources") + ds_expect_variables(c("D", "obj1", "obj2")) }) # diff --git a/tests/testthat/test-smk-ds.unList.R b/tests/testthat/test-smk-ds.unList.R index 806b1331..c8fdcd69 100644 --- a/tests/testthat/test-smk-ds.unList.R +++ b/tests/testthat/test-smk-ds.unList.R @@ -29,11 +29,9 @@ test_that("setup", { test_that("simple test", { ds.asList(x.name="D$GENDER", newobj="GENDER.list") - res <- ds.unList("GENDER.list") + ds.unList("GENDER.list") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") + ds_expect_variables(c("D", "GENDER.list", "unlist.newobj")) }) # diff --git a/tests/testthat/test-smk_dgr-ds.recodeValues-factor.R b/tests/testthat/test-smk_dgr-ds.recodeValues-factor.R index 180278c9..ae3defa6 100644 --- a/tests/testthat/test-smk_dgr-ds.recodeValues-factor.R +++ b/tests/testthat/test-smk_dgr-ds.recodeValues-factor.R @@ -29,9 +29,6 @@ test_that("setup", { test_that("simple factor 1", { res <- ds.recodeValues("D$GENDER", values2replace.vector=c('0'), new.values.vector=c('2'), newobj="GENDER") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$GENDER'), newobj='odf_1') odf <- ds.DANGERdfEXTRACT('odf_1') @@ -84,9 +81,6 @@ test_that("simple factor 1", { test_that("simple factor 2", { res <- ds.recodeValues("D$GENDER", values2replace.vector=c(0), new.values.vector=c(2), newobj="GENDER") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$GENDER'), newobj='odf_2') odf <- ds.DANGERdfEXTRACT('odf_2') @@ -139,9 +133,6 @@ test_that("simple factor 2", { test_that("simple factor 3", { res <- ds.recodeValues("D$GENDER", values2replace.vector=c(0), new.values.vector=c(2), missing='3', newobj="GENDER") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$GENDER'), newobj='odf_3') odf <- ds.DANGERdfEXTRACT('odf_3') @@ -200,9 +191,6 @@ test_that("simple factor 3", { test_that("simple factor 4", { res <- ds.recodeValues("D$GENDER", values2replace.vector=c(0,1), new.values.vector=c(10,20), newobj="GENDER") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$GENDER'), newobj='odf_4') odf <- ds.DANGERdfEXTRACT('odf_4') diff --git a/tests/testthat/test-smk_dgr-ds.recodeValues-factor_missing.R b/tests/testthat/test-smk_dgr-ds.recodeValues-factor_missing.R index 2172c27f..e091080e 100644 --- a/tests/testthat/test-smk_dgr-ds.recodeValues-factor_missing.R +++ b/tests/testthat/test-smk_dgr-ds.recodeValues-factor_missing.R @@ -29,9 +29,6 @@ test_that("setup", { test_that("simple missing factor 1", { res <- ds.recodeValues("D$PM_BMI_CATEGORICAL", values2replace.vector=c(1,2,3), new.values.vector=c(10,20,30), missing='999', newobj="PM_BMI_CATEGORICAL") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$PM_BMI_CATEGORICAL'), newobj='odf_1') odf <- ds.DANGERdfEXTRACT('odf_1') @@ -114,9 +111,6 @@ test_that("simple missing factor 1", { test_that("simple missing factor 2", { res <- ds.recodeValues("D$PM_BMI_CATEGORICAL", values2replace.vector=c(1,2), new.values.vector=c(1,2), missing=99, newobj="PM_BMI_CATEGORICAL") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$PM_BMI_CATEGORICAL'), newobj='odf_2') odf <- ds.DANGERdfEXTRACT('odf_2') @@ -181,9 +175,6 @@ test_that("simple missing factor 2", { test_that("simple missing factor 3", { res <- ds.recodeValues("D$PM_BMI_CATEGORICAL", values2replace.vector=c(1,2,3), new.values.vector=c('low','medium','high'), newobj="PM_BMI_CATEGORICAL") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$PM_BMI_CATEGORICAL'), newobj='odf_3') odf <- ds.DANGERdfEXTRACT('odf_3') diff --git a/tests/testthat/test-smk_dgr-ds.recodeValues.R b/tests/testthat/test-smk_dgr-ds.recodeValues.R index a7bf7880..0828712f 100644 --- a/tests/testthat/test-smk_dgr-ds.recodeValues.R +++ b/tests/testthat/test-smk_dgr-ds.recodeValues.R @@ -29,9 +29,6 @@ test_that("setup", { test_that("simple test", { res <- ds.recodeValues("D$time.id", values2replace.vector=c(1,2), new.values.vector=c(10,20), newobj="time.id") - expect_length(res, 2) - expect_equal(res$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res$validity.check, " appears valid in all sources") ds.dataFrame(c('D$time.id'), newobj='odf') odf <- ds.DANGERdfEXTRACT('odf')