diff --git a/NAMESPACE b/NAMESPACE index 8bdab82e..bd539a11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,8 @@ export(ds.var) export(ds.vectorCalc) import(DSI) import(data.table) +importFrom(DSI,datashield.connections_find) +importFrom(cli,cli_abort) importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) diff --git a/R/ds.class.R b/R/ds.class.R index 036848ad..ab6e8937 100644 --- a/R/ds.class.R +++ b/R/ds.class.R @@ -11,6 +11,7 @@ #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @return \code{ds.class} returns the type of the R object. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @seealso \code{\link{ds.exists}} to verify if an object is defined (exists) on the server-side. #' @examples #' \dontrun{ @@ -54,23 +55,12 @@ #' ds.class <- function(x=NULL, datasources=NULL) { - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of the input object!", call.=FALSE) } - # check if the input object is defined in all the studies - defined <- isDefined(datasources, x) - cally <- call('classDS', x) output <- DSI::datashield.aggregate(datasources, cally) diff --git a/R/ds.completeCases.R b/R/ds.completeCases.R index ed95bf6d..107f70de 100644 --- a/R/ds.completeCases.R +++ b/R/ds.completeCases.R @@ -68,123 +68,22 @@ #' } #' #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' ds.completeCases <- function(x1=NULL, newobj=NULL, datasources=NULL){ - - # if no connection login details are provided look for 'connection' objects in the environment - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) - # check if a value has been provided for x1 if(is.null(x1)){ return("Error: x1 must be a character string naming a serverside data.frame, matrix or vector") } - - # check if the input object is defined in all the studies - isDefined(datasources, x1) - - # rename target object for transfer (not strictly necessary as string will pass parser anyway) - # but maintains consistency with other functions - x1.transmit <- x1 - # if no value specified for output object, then specify a default if(is.null(newobj)){ newobj <- paste0(x1,"_complete.cases") } - # CALL THE MAIN SERVER SIDE FUNCTION - calltext <- call("completeCasesDS", x1.transmit) + calltext <- call("completeCasesDS", x1) DSI::datashield.assign(datasources, newobj, calltext) - -############################################################################################################# -#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED # - # -#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION # -test.obj.name<-newobj # - # -#TRACER # -#return(test.obj.name) # -#} # - # - # -# CALL SEVERSIDE FUNCTION # -calltext <- call("testObjExistsDS", test.obj.name) # - # -object.info<-DSI::datashield.aggregate(datasources, calltext) # - # -# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS # -# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS # -num.datasources<-length(object.info) # - # - # -obj.name.exists.in.all.sources<-TRUE # -obj.non.null.in.all.sources<-TRUE # - # -for(j in 1:num.datasources){ # - if(!object.info[[j]]$test.obj.exists){ # - obj.name.exists.in.all.sources<-FALSE # - } # - if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ # - obj.non.null.in.all.sources<-FALSE # - } # - } # - # -if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ # - # - return.message<- # - paste0("A data object <", test.obj.name, "> has been created in all specified data sources") # - # - # - }else{ # - # - return.message.1<- # - paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") # - # - return.message.2<- # - paste0("It is either ABSENT and/or has no valid content/class,see return.info above") # - # - return.message.3<- # - paste0("Please use ds.ls() to identify where missing") # - # - # - return.message<-list(return.message.1,return.message.2,return.message.3) # - # - } # - # - calltext <- call("messageDS", test.obj.name) # - studyside.message<-DSI::datashield.aggregate(datasources, calltext) # - # - no.errors<-TRUE # - for(nd in 1:num.datasources){ # - if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ # - no.errors<-FALSE # - } # - } # - # - # - if(no.errors){ # - validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") # - return(list(is.object.created=return.message,validity.check=validity.check)) # - } # - # -if(!no.errors){ # - validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") # - return(list(is.object.created=return.message,validity.check=validity.check, # - studyside.messages=studyside.message)) # - } # - # -#END OF CHECK OBJECT CREATED CORECTLY MODULE # -############################################################################################################# - } -#ds.completeCases - - diff --git a/R/ds.dim.R b/R/ds.dim.R index 4a6cd3a7..0031037b 100644 --- a/R/ds.dim.R +++ b/R/ds.dim.R @@ -7,21 +7,18 @@ #' from every single study and the pooled dimension of the object by summing up the individual #' dimensions returned from each study. #' -#' In \code{checks} parameter is suggested that checks should only be undertaken once the -#' function call has failed. -#' #' Server function called: \code{dimDS} -#' -#' @param x a character string providing the name of the input object. -#' @param type a character string that represents the type of analysis to carry out. +#' +#' @param x a character string providing the name of the input object. +#' @param type a character string that represents the type of analysis to carry out. #' If \code{type} is set to \code{'combine'}, \code{'combined'}, \code{'combines'} or \code{'c'}, -#' the global dimension is returned. -#' If \code{type} is set to \code{'split'}, \code{'splits'} or \code{'s'}, +#' the global dimension is returned. +#' If \code{type} is set to \code{'split'}, \code{'splits'} or \code{'s'}, #' the dimension is returned separately for each study. #' If \code{type} is set to \code{'both'} or \code{'b'}, both sets of outputs are produced. -#' Default \code{'both'}. -#' @param checks logical. If TRUE undertakes all DataSHIELD checks (time-consuming). -#' Default FALSE. +#' Default \code{'both'}. +#' @param classConsistencyCheck logical. If TRUE, checks that the input object has the same +#' class across all studies. Default TRUE. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} #' objects obtained after login. If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. @@ -29,6 +26,7 @@ #' in the form of a vector where the first #' element indicates the number of rows and the second element indicates the number of columns. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @seealso \code{\link{ds.dataFrame}} to generate a table of the type data frame. #' @seealso \code{\link{ds.changeRefGroup}} to change the reference level of a factor. #' @seealso \code{\link{ds.colnames}} to obtain the column names of a matrix or a data frame @@ -67,68 +65,44 @@ #' # Calculate the dimension #' ds.dim(x="D", #' type="combine", #global dimension -#' checks = FALSE, -#' datasources = connections)#all opal servers are used +#'#' datasources = connections)#all opal servers are used #' ds.dim(x="D", #' type = "both",#separate dimension for each study #' #and the pooled dimension (default) -#' checks = FALSE, -#' datasources = connections)#all opal servers are used +#'#' datasources = connections)#all opal servers are used #' ds.dim(x="D", #' type="split", #separate dimension for each study -#' checks = FALSE, -#' datasources = connections[1])#only the first opal server is used ("study1") +#'#' datasources = connections[1])#only the first opal server is used ("study1") #' #' # clear the Datashield R sessions and logout #' datashield.logout(connections) #' #' } #' -ds.dim <- function(x=NULL, type='both', checks=FALSE, datasources=NULL) { +ds.dim <- function(x=NULL, type='both', classConsistencyCheck=TRUE, datasources=NULL) { - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of a data.frame or matrix!", call.=FALSE) } - ######################################################################################################## - # MODULE: GENERIC OPTIONAL CHECKS TO ENSURE CONSISTENT STRUCTURE OF KEY VARIABLES IN DIFFERENT SOURCES # - # beginning of optional checks - the process stops and reports as soon as one check fails # - # # - if(checks){ # - message(" -- Verifying the variables in the model") # - # check if the input object(s) is(are) defined in all the studies # - defined <- isDefined(datasources, x) # # - # call the internal function that checks the input object is suitable in all studies # - typ <- checkClass(datasources, x) # - # throw a message and stop if input is not table structure # - if(!('data.frame' %in% typ) & !('matrix' %in% typ)){ # - stop("The input object must be a table structure!", call.=FALSE) # - } # - } # - ######################################################################################################## - - ################################################################################################### #MODULE: EXTEND "type" argument to include "both" and enable valid aliases # if(type == 'combine' | type == 'combined' | type == 'combines' | type == 'c') type <- 'combine' # if(type == 'split' | type == 'splits' | type == 's') type <- 'split' # if(type == 'both' | type == 'b' ) type <- 'both' # - # - #MODIFY FUNCTION CODE TO DEAL WITH ALL THREE TYPES # ################################################################################################### cally <- call("dimDS", x) - dimensions <- DSI::datashield.aggregate(datasources, cally) + results <- DSI::datashield.aggregate(datasources, cally) + + if(classConsistencyCheck){ + .checkClassConsistency(results) + } + + # extract dimensions from results + dimensions <- lapply(results, function(r) r$dim) # names of the studies to be used in the output stdnames <- names(datasources) diff --git a/R/ds.isNA.R b/R/ds.isNA.R index 1d84577f..16bbe058 100644 --- a/R/ds.isNA.R +++ b/R/ds.isNA.R @@ -14,6 +14,7 @@ #' @return \code{ds.isNA} returns a boolean. If it is TRUE the vector is empty #' (all values are NA), FALSE otherwise. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -55,48 +56,29 @@ #' #' } #' -ds.isNA <- function(x=NULL, datasources=NULL){ +ds.isNA <- function(x=NULL, classConsistencyCheck=TRUE, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of the input vector!", call.=FALSE) } - # check if the input object is defined in all the studies - isDefined(datasources, x) - - # call the internal function that checks the input object is of the same class in all studies. - typ <- checkClass(datasources, x) - - # the input object must be a vector - if(!('character' %in% typ) & !('factor' %in% typ) & !('integer' %in% typ) & !('logical' %in% typ) & !('numeric' %in% typ) & !('data.frame' %in% typ) & !('matrix' %in% typ)){ - stop("The input object must be a character, factor, integer, logical or numeric vector.", call.=FALSE) - } - - # name of the studies to be used in the plots' titles stdnames <- names(datasources) - - # name of the variable xnames <- extract(x) varname <- xnames$elements - # keep of the results of the checks for each study - track <- list() + cally <- call("isNaDS", x) + results <- DSI::datashield.aggregate(datasources, cally) - # call server side function 'isNaDS' to check, in each study, if the vector is empty - for(i in 1: length(datasources)){ - cally <- call("isNaDS", x) - out <- DSI::datashield.aggregate(datasources[i], cally) - if(out[[1]]){ + if(classConsistencyCheck){ + .checkClassConsistency(results) + } + + # report per-study if all NA + track <- list() + for(i in 1:length(results)){ + if(results[[i]]$is.na){ track[[i]] <- TRUE message("The variable ", varname, " in ", stdnames[i], " is missing at complete (all values are 'NA').") }else{ diff --git a/R/ds.length.R b/R/ds.length.R index 83cb5cae..fdb1dad8 100644 --- a/R/ds.length.R +++ b/R/ds.length.R @@ -14,15 +14,15 @@ #' if \code{type} is set to \code{'both'} or \code{'b'}, #' both sets of outputs are produced. #' Default \code{'both'}. -#' @param checks logical. If TRUE the model components are checked. -#' Default FALSE to save time. It is suggested that checks -#' should only be undertaken once the function call has failed. +#' @param classConsistencyCheck logical. If TRUE, checks that the input object has the same +#' class across all studies. Default TRUE. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} #' objects obtained after login. If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @return \code{ds.length} returns to the client-side the pooled length of a vector or a list, #' or the length of a vector or a list for each study separately. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -74,50 +74,33 @@ #' datashield.logout(connections) #' } #' -ds.length <- function(x=NULL, type='both', checks='FALSE', datasources=NULL){ +ds.length <- function(x=NULL, type='both', classConsistencyCheck=TRUE, datasources=NULL){ + + datasources <- .set_datasources(datasources) - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } - if(is.null(x)){ stop("Please provide the name of the input object!", call.=FALSE) - } - - # beginning of optional checks - the process stops and reports as soon as one check fails - if(checks){ - - # check if the input object is defined in all the studies - isDefined(datasources, x) - - # call the internal function that checks the input object is suitable in all studies - typ <- checkClass(datasources, x) - - # the input object must be a vector or a list - if(!('character' %in% typ) & !('factor' %in% typ) & !('integer' %in% typ) & !('logical' %in% typ) & !('numeric' %in% typ) & !('list' %in% typ)){ - stop("The input object must be a character, factor, integer, logical or numeric vector or a list.", call.=FALSE) - } - - } + } ################################################################################################### - # MODULE: EXTEND "type" argument to include "both" and enable valid alisases # + # MODULE: EXTEND "type" argument to include "both" and enable valid aliases # if(type == 'combine' | type == 'combined' | type == 'combines' | type == 'c') type <- 'combine' # if(type == 'split' | type == 'splits' | type == 's') type <- 'split' # if(type == 'both' | type == 'b' ) type <- 'both' # if(type != 'combine' & type != 'split' & type != 'both'){ # stop('Function argument "type" has to be either "both", "combine" or "split"', call.=FALSE) # } - + # call the server-side function cally <- call("lengthDS", x) - lengths <- DSI::datashield.aggregate(datasources, cally) + results <- DSI::datashield.aggregate(datasources, cally) + + if(classConsistencyCheck){ + .checkClassConsistency(results) + } + + # extract lengths from results + lengths <- lapply(results, function(r) r$length) # names of the studies to be used in the output stdnames <- names(datasources) diff --git a/R/ds.levels.R b/R/ds.levels.R index b32a5d1c..5dc650b4 100644 --- a/R/ds.levels.R +++ b/R/ds.levels.R @@ -12,6 +12,7 @@ #' @return \code{ds.levels} returns to the client-side the levels of a factor #' class variable stored in the server-side. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -58,35 +59,16 @@ #' ds.levels <- function(x=NULL, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of the input vector!", call.=FALSE) } - # check if the input object is defined in all the studies - isDefined(datasources, x) - - # call the internal function that checks the input object is of the same class in all studies. - typ <- checkClass(datasources, x) - - # the input object must be a factor - if(!('factor' %in% typ)){ - stop("The input object must be a factor.", call.=FALSE) - } - - # call the server-side function - cally <- paste0("levelsDS(", x, ")") - output <- DSI::datashield.aggregate(datasources, as.symbol(cally)) + cally <- call("levelsDS", x) + results <- DSI::datashield.aggregate(datasources, cally) + output <- lapply(results, function(r) list(Levels = r$Levels)) return(output) - + } diff --git a/R/ds.ls.R b/R/ds.ls.R index 2f65a3c8..ce96c901 100644 --- a/R/ds.ls.R +++ b/R/ds.ls.R @@ -61,6 +61,7 @@ #' specified R server-side environment;\cr #' (3) the nature of the search filter string as it was applied. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @examples #' \dontrun{ #' @@ -117,15 +118,8 @@ #' #' @export ds.ls <- function(search.filter=NULL, env.to.search=1L, search.GlobalEnv=TRUE, datasources=NULL){ - - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) # make default to .GlobalEnv unambiguous if(search.GlobalEnv||is.null(env.to.search)){ @@ -191,7 +185,7 @@ if(!is.null(transmit.object)) # call the server side function calltext <- call("lsDS", search.filter=transmit.object.final, env.to.search) - output <- datashield.aggregate(datasources, calltext) + output <- DSI::datashield.aggregate(datasources, calltext) return(output) diff --git a/R/ds.names.R b/R/ds.names.R index 97ebbdfd..e348f002 100644 --- a/R/ds.names.R +++ b/R/ds.names.R @@ -20,6 +20,7 @@ #' of a list object stored on the server-side. #' @author Amadou Gaye, updated by Paul Burton for DataSHIELD development #' team 25/06/2020 +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -68,25 +69,14 @@ #' ds.names <- function(xname=NULL, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(xname)){ stop("Please provide the name of the input list!", call.=FALSE) } - - # check if the input object is defined in all the studies - isDefined(datasources, xname) calltext <- call("namesDS", xname) - output <- datashield.aggregate(datasources, calltext) + output <- DSI::datashield.aggregate(datasources, calltext) return(output) } #ds.names diff --git a/R/ds.numNA.R b/R/ds.numNA.R index 0bd75185..bae9c1d7 100644 --- a/R/ds.numNA.R +++ b/R/ds.numNA.R @@ -13,6 +13,7 @@ #' @return \code{ds.numNA} returns to the client-side the number of missing values #' on a server-side vector. #' @author DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' @examples #' \dontrun{ @@ -52,31 +53,21 @@ #' #' } #' -ds.numNA <- function(x=NULL, datasources=NULL){ +ds.numNA <- function(x=NULL, classConsistencyCheck=TRUE, datasources=NULL){ - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } + datasources <- .set_datasources(datasources) if(is.null(x)){ stop("Please provide the name of a vector!", call.=FALSE) } - # check if the input object is defined in all the studies - isDefined(datasources, x) - - # call the internal function that checks the input object is of the same class in all studies. - typ <- checkClass(datasources, x) + cally <- call("numNaDS", x) + results <- DSI::datashield.aggregate(datasources, cally) - # call the server side function - cally <- paste0("numNaDS(", x, ")") - numNAs <- DSI::datashield.aggregate(datasources, as.symbol(cally)) + if(classConsistencyCheck){ + .checkClassConsistency(results) + } + numNAs <- lapply(results, function(r) r$numNA) return(numNAs) } diff --git a/R/ds.unique.R b/R/ds.unique.R index 8f271705..dd8e5e53 100644 --- a/R/ds.unique.R +++ b/R/ds.unique.R @@ -43,32 +43,22 @@ #' datashield.logout(connections) #' } #' @author Stuart Wheater, DataSHIELD Development Team +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export #' ds.unique <- function(x.name = NULL, newobj = NULL, datasources = NULL) { - # look for DS connections - if (is.null(datasources)) { - datasources <- datashield.connections_find() - } - # ensure datasources is a list of DSConnection-class - if (!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))) { - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call. = FALSE) - } + datasources <- .set_datasources(datasources) if (is.null(x.name)) { stop("x.name=NULL. Please provide the names of the objects to de-duplicated!", call. = FALSE) } - # create a name by default if user did not provide a name for the new variable if (is.null(newobj)) { newobj <- "unique.newobj" } - # call the server side function that does the job cally <- call('uniqueDS', x.name) DSI::datashield.assign(datasources, newobj, cally) - # check that the new object has been created and display a message accordingly - finalcheck <- isAssigned(datasources, newobj) } diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 00000000..51ef63e2 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,69 @@ +#' Retrieve datasources if not specified +#' +#' @param datasources An optional list of data sources. If not provided, the function will attempt +#' to find available data sources. +#' @importFrom DSI datashield.connections_find +#' @return A list of data sources. +#' @noRd +.get_datasources <- function(datasources) { + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + return(datasources) +} + +#' Verify that the provided data sources are of class 'DSConnection'. +#' +#' @param datasources A list of data sources. +#' @importFrom cli cli_abort +#' @noRd +.verify_datasources <- function(datasources) { + is_connection_class <- sapply(datasources, function(x) inherits(unlist(x), "DSConnection")) + if (!all(is_connection_class)) { + cli_abort("The 'datasources' were expected to be a list of DSConnection-class objects") + } +} + +#' Set and verify data sources. +#' +#' @param datasources An optional list of data sources. If not provided, the function will attempt +#' to find available data sources. +#' @return A list of verified data sources. +#' @noRd +.set_datasources <- function(datasources) { + datasources <- .get_datasources(datasources) + .verify_datasources(datasources) + return(datasources) +} + +#' Check cross-study class consistency from a list of server aggregate results +#' +#' Batch-refactored server functions return a list per study that includes a +#' `class` field. This helper verifies that the class field is identical across +#' all studies and aborts if not. +#' +#' @param results A named list of server-side aggregate results, one per study, +#' each containing a `class` element. +#' @importFrom cli cli_abort +#' @return Invisibly returns `NULL`. Called for its side effect (error checking). +#' @noRd +.checkClassConsistency <- function(results) { + classes <- lapply(results, function(r) r$class) + if (length(unique(lapply(classes, sort))) > 1) { + cli_abort("The input object is not of the same class in all studies!") + } +} + +#' Check That a Data Frame Name Is Provided +#' +#' Internal helper that checks whether a data frame or matrix object +#' has been provided. If `NULL`, it aborts with a user-friendly error. +#' +#' @param df A data.frame or matrix. +#' @return Invisibly returns `NULL`. Called for its side effect (error checking). +#' @noRd +.check_df_name_provided <- function(df) { + if(is.null(df)){ + cli_abort("Please provide the name of a data.frame or matrix!", call.=FALSE) + } +} diff --git a/dsBase_7.0.0-permissive.tar.gz b/dsBase_7.0.0-permissive.tar.gz index ab4b862e..8a19cee6 100644 Binary files a/dsBase_7.0.0-permissive.tar.gz and b/dsBase_7.0.0-permissive.tar.gz differ diff --git a/man/ds.class.Rd b/man/ds.class.Rd index b2fc0f07..861eeddc 100644 --- a/man/ds.class.Rd +++ b/man/ds.class.Rd @@ -69,4 +69,6 @@ Server function called: \code{classDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.completeCases.Rd b/man/ds.completeCases.Rd index f5df7658..8a8f4ea4 100644 --- a/man/ds.completeCases.Rd +++ b/man/ds.completeCases.Rd @@ -85,4 +85,6 @@ Server function called: \code{completeCasesDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.dim.Rd b/man/ds.dim.Rd index ea3aaa6d..338ee25f 100644 --- a/man/ds.dim.Rd +++ b/man/ds.dim.Rd @@ -4,21 +4,26 @@ \alias{ds.dim} \title{Retrieves the dimension of a server-side R object} \usage{ -ds.dim(x = NULL, type = "both", checks = FALSE, datasources = NULL) +ds.dim( + x = NULL, + type = "both", + classConsistencyCheck = TRUE, + datasources = NULL +) } \arguments{ \item{x}{a character string providing the name of the input object.} -\item{type}{a character string that represents the type of analysis to carry out. +\item{type}{a character string that represents the type of analysis to carry out. If \code{type} is set to \code{'combine'}, \code{'combined'}, \code{'combines'} or \code{'c'}, - the global dimension is returned. -If \code{type} is set to \code{'split'}, \code{'splits'} or \code{'s'}, + the global dimension is returned. +If \code{type} is set to \code{'split'}, \code{'splits'} or \code{'s'}, the dimension is returned separately for each study. If \code{type} is set to \code{'both'} or \code{'b'}, both sets of outputs are produced. Default \code{'both'}.} -\item{checks}{logical. If TRUE undertakes all DataSHIELD checks (time-consuming). -Default FALSE.} +\item{classConsistencyCheck}{logical. If TRUE, checks that the input object has the same +class across all studies. Default TRUE.} \item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified @@ -39,9 +44,6 @@ input object (e.g. array, matrix or data frame) from every single study and the pooled dimension of the object by summing up the individual dimensions returned from each study. -In \code{checks} parameter is suggested that checks should only be undertaken once the -function call has failed. - Server function called: \code{dimDS} } \examples{ @@ -76,17 +78,14 @@ Server function called: \code{dimDS} # Calculate the dimension ds.dim(x="D", type="combine", #global dimension - checks = FALSE, - datasources = connections)#all opal servers are used +#' datasources = connections)#all opal servers are used ds.dim(x="D", type = "both",#separate dimension for each study #and the pooled dimension (default) - checks = FALSE, - datasources = connections)#all opal servers are used +#' datasources = connections)#all opal servers are used ds.dim(x="D", type="split", #separate dimension for each study - checks = FALSE, - datasources = connections[1])#only the first opal server is used ("study1") +#' datasources = connections[1])#only the first opal server is used ("study1") # clear the Datashield R sessions and logout datashield.logout(connections) @@ -107,4 +106,6 @@ Server function called: \code{dimDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.isNA.Rd b/man/ds.isNA.Rd index ec6b2f6f..7e82f700 100644 --- a/man/ds.isNA.Rd +++ b/man/ds.isNA.Rd @@ -4,7 +4,7 @@ \alias{ds.isNA} \title{Checks if a server-side vector is empty} \usage{ -ds.isNA(x = NULL, datasources = NULL) +ds.isNA(x = NULL, classConsistencyCheck = TRUE, datasources = NULL) } \arguments{ \item{x}{a character string specifying the name of the vector to check.} @@ -71,4 +71,6 @@ Server function called: \code{isNaDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.length.Rd b/man/ds.length.Rd index 27e105bc..da61ec87 100644 --- a/man/ds.length.Rd +++ b/man/ds.length.Rd @@ -4,7 +4,12 @@ \alias{ds.length} \title{Gets the length of an object in the server-side} \usage{ -ds.length(x = NULL, type = "both", checks = "FALSE", datasources = NULL) +ds.length( + x = NULL, + type = "both", + classConsistencyCheck = TRUE, + datasources = NULL +) } \arguments{ \item{x}{a character string specifying the name of a vector or list.} @@ -18,9 +23,8 @@ if \code{type} is set to \code{'both'} or \code{'b'}, both sets of outputs are produced. Default \code{'both'}.} -\item{checks}{logical. If TRUE the model components are checked. -Default FALSE to save time. It is suggested that checks -should only be undertaken once the function call has failed.} +\item{classConsistencyCheck}{logical. If TRUE, checks that the input object has the same +class across all studies. Default TRUE.} \item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified @@ -91,4 +95,6 @@ Server function called: \code{lengthDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.levels.Rd b/man/ds.levels.Rd index fbdab0c4..da714bf5 100644 --- a/man/ds.levels.Rd +++ b/man/ds.levels.Rd @@ -71,4 +71,6 @@ Server function called: \code{levelsDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.ls.Rd b/man/ds.ls.Rd index 207af854..ae54bd5c 100644 --- a/man/ds.ls.Rd +++ b/man/ds.ls.Rd @@ -139,4 +139,6 @@ Server function called: \code{lsDS}. } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.names.Rd b/man/ds.names.Rd index 199b20d9..984e2596 100644 --- a/man/ds.names.Rd +++ b/man/ds.names.Rd @@ -82,4 +82,6 @@ is formally of class "glm" and "ls" but responds TRUE to is.list(), \author{ Amadou Gaye, updated by Paul Burton for DataSHIELD development team 25/06/2020 + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.numNA.Rd b/man/ds.numNA.Rd index 896c76ee..de4d4dbb 100644 --- a/man/ds.numNA.Rd +++ b/man/ds.numNA.Rd @@ -4,7 +4,7 @@ \alias{ds.numNA} \title{Gets the number of missing values in a server-side vector} \usage{ -ds.numNA(x = NULL, datasources = NULL) +ds.numNA(x = NULL, classConsistencyCheck = TRUE, datasources = NULL) } \arguments{ \item{x}{a character string specifying the name of the vector.} @@ -67,4 +67,6 @@ Server function called: \code{numNaDS} } \author{ DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/man/ds.unique.Rd b/man/ds.unique.Rd index 61d6355b..18d77005 100644 --- a/man/ds.unique.Rd +++ b/man/ds.unique.Rd @@ -61,4 +61,6 @@ Server function called: \code{uniqueDS} } \author{ Stuart Wheater, DataSHIELD Development Team + +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands } diff --git a/tests/testthat/test-arg-ds.cor.R b/tests/testthat/test-arg-ds.cor.R index 47b5a5ab..4554e7ec 100644 --- a/tests/testthat/test-arg-ds.cor.R +++ b/tests/testthat/test-arg-ds.cor.R @@ -22,7 +22,11 @@ connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) # context("ds.cor::arg::test errors") test_that("cor_erros", { expect_error(ds.cor(), 'x=NULL. Please provide the name of a matrix or dataframe or the names of two numeric vectors!', fixed=TRUE) - expect_error(ds.cor(x='D$LAB_TSC'), 'If x is a numeric vector, y must be a numeric vector!', fixed=TRUE) + + expect_error(ds.cor(x='D$LAB_TSC'), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + res.errors <- DSI::datashield.errors() + expect_match(res.errors[[1]], "If x is a numeric vector, y must be a numeric vector!") + expect_error(ds.cor(x='D$LAB_TSC', y='D$LAB_TRIG', type='datashield'), 'Function argument "type" has to be either "combine" or "split"', fixed=TRUE) }) diff --git a/tests/testthat/test-arg-ds.cov.R b/tests/testthat/test-arg-ds.cov.R index 29330c9d..405f60ef 100644 --- a/tests/testthat/test-arg-ds.cov.R +++ b/tests/testthat/test-arg-ds.cov.R @@ -22,7 +22,11 @@ connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) # context("ds.cov::arg::test errors") test_that("cov_erros", { expect_error(ds.cov(), 'x=NULL. Please provide the name of a matrix or dataframe or the names of two numeric vectors!', fixed=TRUE) - expect_error(ds.cov(x='D$LAB_TSC'), 'If x is a numeric vector, y must be a numeric vector!', fixed=TRUE) + + expect_error(ds.cov(x='D$LAB_TSC'), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + res.errors <- DSI::datashield.errors() + expect_match(res.errors[[1]], "If x is a numeric vector, y must be a numeric vector!") + expect_error(ds.cov(x='D$LAB_TSC', y='D$LAB_TRIG', type='datashield'), 'Function argument "type" has to be either "combine" or "split"', fixed=TRUE) }) diff --git a/tests/testthat/test-arg-ds.mean.R b/tests/testthat/test-arg-ds.mean.R index 96ef958b..c0ed8dc1 100644 --- a/tests/testthat/test-arg-ds.mean.R +++ b/tests/testthat/test-arg-ds.mean.R @@ -25,7 +25,11 @@ test_that("mean_erros", { expect_error(ds.mean(), "Please provide the name of the input object!", fixed=TRUE) expect_error(ds.mean(x='D$LAB_TSC', type='datashield'), 'Function argument "type" has to be either "both", "combine" or "split"', fixed=TRUE) - expect_error(ds.mean(x='not_a_numeric', checks=TRUE), "The input object must be an integer or a numeric vector.", fixed=TRUE) + + res.errors <- DSI::datashield.errors() + expect_error(ds.mean(x='not_a_numeric'), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + res.errors <- DSI::datashield.errors() + expect_match(res.errors[[1]], "must be of type numeric or integer") }) #context("ds.mean::arg::discordant errors") diff --git a/tests/testthat/test-arg-ds.quantileMean.R b/tests/testthat/test-arg-ds.quantileMean.R index 4d158762..76d51a61 100644 --- a/tests/testthat/test-arg-ds.quantileMean.R +++ b/tests/testthat/test-arg-ds.quantileMean.R @@ -24,7 +24,10 @@ ds.asCharacter(x='D$LAB_HDL', newobj="not_a_numeric") test_that("quantileMean_erros", { expect_error(ds.quantileMean(), "Please provide the name of the input vector!", fixed=TRUE) expect_error(ds.quantileMean(x='D$LAB_HDL', type='datashield'), 'Function argument "type" has to be either "combine" or "split"', fixed=TRUE) - expect_error(ds.quantileMean(x='not_a_numeric'), "The input object must be an integer or numeric vector.", fixed=TRUE) + + expect_error(ds.quantileMean(x='not_a_numeric'), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + res.errors <- DSI::datashield.errors() + expect_match(res.errors[[1]], "must be of type numeric or integer") }) # diff --git a/tests/testthat/test-arg-ds.var.R b/tests/testthat/test-arg-ds.var.R index 8ececd0d..d20640d3 100644 --- a/tests/testthat/test-arg-ds.var.R +++ b/tests/testthat/test-arg-ds.var.R @@ -24,7 +24,10 @@ test_that("var_erros", { ds.asCharacter(x='D$LAB_TSC', newobj="not_a_numeric") expect_error(ds.var(), "Please provide the name of the input object!", fixed=TRUE) - expect_error(ds.var(x="not_a_numeric", checks=TRUE), "The input object must be an integer or a numeric vector.", fixed=TRUE) + + expect_error(ds.var(x="not_a_numeric"), "There are some DataSHIELD errors, list them with datashield.errors()", fixed=TRUE) + res.errors <- DSI::datashield.errors() + expect_match(res.errors[[1]], "must be of type numeric or integer") }) # diff --git a/tests/testthat/test-disc-ds.levels.R b/tests/testthat/test-disc-ds.levels.R index 95d0c60b..fdc7b075 100644 --- a/tests/testthat/test-disc-ds.levels.R +++ b/tests/testthat/test-disc-ds.levels.R @@ -26,26 +26,20 @@ test_that("setup", { # # context("ds.levels::disc") test_that("simple levels", { -# res <- ds.levels("D$GENDER") - -# expect_length(res, 3) -# expect_length(res$sim1, 2) -# expect_length(res$sim1$ValidityMessage, 1) -# expect_equal(res$sim1$ValidityMessage, "VALID ANALYSIS") -# expect_length(res$sim1$Levels, 2) -# expect_equal(res$sim1$Levels, NA) - -# expect_length(res$sim2, 2) -# expect_length(res$sim2$ValidityMessage, 1) -# expect_equal(res$sim2$ValidityMessage, "VALID ANALYSIS") -# expect_length(res$sim2$Levels, 2) -# expect_equal(res$sim2$Levels, NA) - -# expect_length(res$sim3, 2) -# expect_length(res$sim3$ValidityMessage, 1) -# expect_equal(res$sim3$ValidityMessage, "VALID ANALYSIS") -# expect_length(res$sim3$Levels, 2) -# expect_equal(res$sim3$Levels, NA) + res <- ds.levels("D$GENDER") + + expect_length(res, 3) + expect_length(res$sim1, 1) + expect_length(res$sim1$Levels, 2) + expect_equal(res$sim1$Levels, NA) + + expect_length(res$sim2, 1) + expect_length(res$sim2$Levels, 2) + expect_equal(res$sim2$Levels, NA) + + expect_length(res$sim3, 1) + expect_length(res$sim3$Levels, 2) + expect_equal(res$sim3$Levels, NA) }) # diff --git a/tests/testthat/test-perf-ds.completeCases.R b/tests/testthat/test-perf-ds.completeCases.R new file mode 100644 index 00000000..e2aa3667 --- /dev/null +++ b/tests/testthat/test-perf-ds.completeCases.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.completeCases::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.completeCases::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.completeCases("D", newobj="D_complete") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.completeCases::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.completeCases::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.completeCases::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.completeCases::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.completeCases::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.completeCases::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.completeCases::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.completeCases::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.completeCases::perf::done") diff --git a/tests/testthat/test-perf-ds.dim.R b/tests/testthat/test-perf-ds.dim.R new file mode 100644 index 00000000..047dc453 --- /dev/null +++ b/tests/testthat/test-perf-ds.dim.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.dim::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.dim::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.dim("D") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.dim::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.dim::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.dim::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.dim::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.dim::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.dim::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.dim::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.dim::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.dim::perf::done") diff --git a/tests/testthat/test-perf-ds.isNA.R b/tests/testthat/test-perf-ds.isNA.R new file mode 100644 index 00000000..9b60c550 --- /dev/null +++ b/tests/testthat/test-perf-ds.isNA.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.isNA::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.isNA::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.isNA("D$LAB_TSC") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.isNA::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.isNA::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.isNA::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.isNA::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.isNA::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.isNA::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.isNA::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.isNA::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.isNA::perf::done") diff --git a/tests/testthat/test-perf-ds.levels.R b/tests/testthat/test-perf-ds.levels.R new file mode 100644 index 00000000..4936a975 --- /dev/null +++ b/tests/testthat/test-perf-ds.levels.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.levels::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "PM_BMI_CATEGORICAL")) + +# +# Tests +# + +# context("ds.levels::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.levels("D$PM_BMI_CATEGORICAL") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.levels::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.levels::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.levels::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.levels::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.levels::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.levels::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.levels::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.levels::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.levels::perf::done") diff --git a/tests/testthat/test-perf-ds.ls.R b/tests/testthat/test-perf-ds.ls.R new file mode 100644 index 00000000..e9ad009c --- /dev/null +++ b/tests/testthat/test-perf-ds.ls.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.ls::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.ls::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.ls() + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.ls::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.ls::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.ls::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.ls::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.ls::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.ls::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.ls::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.ls::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.ls::perf::done") diff --git a/tests/testthat/test-perf-ds.names.R b/tests/testthat/test-perf-ds.names.R new file mode 100644 index 00000000..bd39e6af --- /dev/null +++ b/tests/testthat/test-perf-ds.names.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.names::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.names::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.names("D") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.names::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.names::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.names::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.names::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.names::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.names::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.names::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.names::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.names::perf::done") diff --git a/tests/testthat/test-perf-ds.numNA.R b/tests/testthat/test-perf-ds.numNA.R new file mode 100644 index 00000000..682f5c71 --- /dev/null +++ b/tests/testthat/test-perf-ds.numNA.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.numNA::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.numNA::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.numNA("D$LAB_TSC") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.numNA::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.numNA::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.numNA::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.numNA::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.numNA::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.numNA::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.numNA::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.numNA::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.numNA::perf::done") diff --git a/tests/testthat/test-perf-ds.unique.R b/tests/testthat/test-perf-ds.unique.R new file mode 100644 index 00000000..cc4f54d2 --- /dev/null +++ b/tests/testthat/test-perf-ds.unique.R @@ -0,0 +1,59 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arjuna Technologies, Newcastle upon Tyne. All rights reserved. +# +# This program and the accompanying materials +# are made available under the terms of the GNU Public License v3.0. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# +# Set up +# + +# context("ds.unique::perf::setup") +connect.studies.dataset.cnsim(list("LAB_TSC", "LAB_TRIG")) + +# +# Tests +# + +# context("ds.unique::perf::combine:0") +test_that("combine - performance", { + .durationSec <- 30 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + ds.unique("D$LAB_TSC", newobj="unique_TSC") + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.unique::perf::combine:0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.unique::perf::combine:0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.unique::perf::combine:0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.unique::perf::combine:0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.unique::perf::combine:0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.unique::perf::combine:0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.unique::perf::combine:0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.unique::perf::shutdown") +disconnect.studies.dataset.cnsim() +# context("ds.unique::perf::done") diff --git a/tests/testthat/test-smk-ds.completeCases-vectors.R b/tests/testthat/test-smk-ds.completeCases-vectors.R index 86ba71eb..6f46df18 100644 --- a/tests/testthat/test-smk-ds.completeCases-vectors.R +++ b/tests/testthat/test-smk-ds.completeCases-vectors.R @@ -29,11 +29,7 @@ test_that("setup", { test_that("completeCases vector", { ds.c("D$survtime", newobj="vec_n") - res.completeCases <- ds.completeCases("vec_n", "vec_n_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("vec_n", "vec_n_new") res.vec.class <- ds.class("vec_n") @@ -84,11 +80,7 @@ test_that("completeCases vector", { test_that("completeCases vector", { ds.asInteger("D$age.60", newobj="vec_i") - res.completeCases <- ds.completeCases("vec_i", "vec_i_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("vec_i", "vec_i_new") res.vec.class <- ds.class("vec_i") @@ -139,11 +131,7 @@ test_that("completeCases vector", { test_that("completeCases vector", { ds.asCharacter("D$age.60", newobj="vec_c") - res.completeCases <- ds.completeCases("vec_c", "vec_c_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("vec_c", "vec_c_new") res.vec.class <- ds.class("vec_c") @@ -194,11 +182,7 @@ test_that("completeCases vector", { test_that("completeCases vector", { ds.asLogical("D$age.60", newobj="vec_l") - res.completeCases <- ds.completeCases("vec_l", "vec_l_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("vec_l", "vec_l_new") res.vec.class <- ds.class("vec_l") @@ -249,11 +233,7 @@ test_that("completeCases vector", { test_that("completeCases vector", { ds.c("D$female", newobj="vec_f") - res.completeCases <- ds.completeCases("vec_f", "vec_f_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("vec_f", "vec_f_new") res.vec.class <- ds.class("vec_f") diff --git a/tests/testthat/test-smk-ds.completeCases.R b/tests/testthat/test-smk-ds.completeCases.R index 3be25b85..63f6918c 100644 --- a/tests/testthat/test-smk-ds.completeCases.R +++ b/tests/testthat/test-smk-ds.completeCases.R @@ -29,11 +29,7 @@ test_that("setup", { test_that("completeCases data.frame", { ds.dataFrame(c("D$LAB_TSC", "D$LAB_TRIG", "D$LAB_HDL", "D$LAB_GLUC_ADJUSTED", "D$PM_BMI_CONTINUOUS", "D$DIS_CVA", "D$MEDI_LPD", "D$DIS_DIAB", "D$DIS_AMI", "D$GENDER", "D$PM_BMI_CATEGORICAL"), newobj="df") - res.completeCases <- ds.completeCases("df", "df_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("df", "df_new") res.df.class <- ds.class("df") @@ -86,11 +82,7 @@ test_that("completeCases data.frame", { test_that("completeCases matrix", { ds.asDataMatrix("D", newobj="mat") - res.completeCases <- ds.completeCases("mat", "mat_new") - - expect_length(res.completeCases, 2) - expect_equal(res.completeCases$is.object.created, "A data object has been created in all specified data sources") - expect_equal(res.completeCases$validity.check, " appears valid in all sources") + ds.completeCases("mat", "mat_new") res.mat.class <- ds.class("mat") diff --git a/tests/testthat/test-smk-ds.length.R b/tests/testthat/test-smk-ds.length.R index b7c9bd76..5df9be59 100644 --- a/tests/testthat/test-smk-ds.length.R +++ b/tests/testthat/test-smk-ds.length.R @@ -53,7 +53,7 @@ test_that("basic length, combine", { }) test_that("basic length, both", { - res.length <- ds.length('D$LAB_TSC', type='both', check=TRUE) + res.length <- ds.length('D$LAB_TSC', type='both') expect_length(res.length, 4) expect_equal(res.length$`length of D$LAB_TSC in sim1`, 2163) @@ -63,7 +63,7 @@ test_that("basic length, both", { }) test_that("basic length, split", { - res.length <- ds.length('D$LAB_TSC', type='split', check=TRUE) + res.length <- ds.length('D$LAB_TSC', type='split') expect_length(res.length, 3) expect_equal(res.length$`length of D$LAB_TSC in sim1`, 2163) @@ -72,7 +72,7 @@ test_that("basic length, split", { }) test_that("basic length, combine", { - res.length <- ds.length('D$LAB_TSC', type='combine', check=TRUE) + res.length <- ds.length('D$LAB_TSC', type='combine') expect_length(res.length, 1) expect_equal(res.length$`total length of D$LAB_TSC in all studies combined`, 9379) diff --git a/tests/testthat/test-smk-ds.levels.R b/tests/testthat/test-smk-ds.levels.R index 02275893..db50d089 100644 --- a/tests/testthat/test-smk-ds.levels.R +++ b/tests/testthat/test-smk-ds.levels.R @@ -32,21 +32,15 @@ test_that("simple levels", { res <- ds.levels("gender") expect_length(res, 3) - expect_length(res$sim1, 2) - expect_length(res$sim1$ValidityMessage, 1) - expect_equal(res$sim1$ValidityMessage, "VALID ANALYSIS") + expect_length(res$sim1, 1) expect_length(res$sim1$Levels, 2) expect_equal(res$sim1$Levels[1], "0") expect_equal(res$sim1$Levels[2], "1") - expect_length(res$sim2, 2) - expect_length(res$sim2$ValidityMessage, 1) - expect_equal(res$sim2$ValidityMessage, "VALID ANALYSIS") + expect_length(res$sim2, 1) expect_length(res$sim2$Levels, 2) expect_equal(res$sim2$Levels[1], "0") expect_equal(res$sim2$Levels[2], "1") - expect_length(res$sim3, 2) - expect_length(res$sim3$ValidityMessage, 1) - expect_equal(res$sim3$ValidityMessage, "VALID ANALYSIS") + expect_length(res$sim3, 1) expect_length(res$sim3$Levels, 2) expect_equal(res$sim3$Levels[1], "0") expect_equal(res$sim3$Levels[2], "1") @@ -59,23 +53,17 @@ test_that("simple levels", { res <- ds.levels("pm_bmi_categorical") expect_length(res, 3) - expect_length(res$sim1, 2) - expect_length(res$sim1$ValidityMessage, 1) - expect_equal(res$sim1$ValidityMessage, "VALID ANALYSIS") + expect_length(res$sim1, 1) expect_length(res$sim1$Levels, 3) expect_equal(res$sim1$Levels[1], "1") expect_equal(res$sim1$Levels[2], "2") expect_equal(res$sim1$Levels[3], "3") - expect_length(res$sim2, 2) - expect_length(res$sim2$ValidityMessage, 1) - expect_equal(res$sim2$ValidityMessage, "VALID ANALYSIS") + expect_length(res$sim2, 1) expect_length(res$sim2$Levels, 3) expect_equal(res$sim2$Levels[1], "1") expect_equal(res$sim2$Levels[2], "2") expect_equal(res$sim2$Levels[3], "3") - expect_length(res$sim3, 2) - expect_length(res$sim3$ValidityMessage, 1) - expect_equal(res$sim3$ValidityMessage, "VALID ANALYSIS") + expect_length(res$sim3, 1) expect_length(res$sim3$Levels, 3) expect_equal(res$sim3$Levels[1], "1") expect_equal(res$sim3$Levels[2], "2")