Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3e28629
refactor: moved checks to serverside
timcadman Apr 13, 2026
a6e7957
refactor: add class consistency checks, remove ValidityMessage
timcadman Apr 13, 2026
0bb5699
test: update kurtosis and skewness expectations for new return structure
timcadman Apr 13, 2026
00f1002
chore: update dsBase tar and refactor guide
timcadman Apr 13, 2026
a041d44
refactor: add classConsistencyCheck parameter, remove ValidityMessage
timcadman Apr 13, 2026
85007d4
test: update mean and var expectations for new return structure
timcadman Apr 13, 2026
9f27cbb
chore: update dsBase tar
timcadman Apr 13, 2026
4d4ea1a
test: add performance tests for batch 3 functions
timcadman Apr 13, 2026
2e6019b
docs: updated authorship
timcadman Apr 13, 2026
ae07a65
docs: regenerate man pages
timcadman Apr 13, 2026
13cb8a4
docs: update TODOs for batch 1 outstanding issues
timcadman Apr 13, 2026
c24575f
docs: update refactor guide with classConsistencyCheck and ValidityMe…
timcadman Apr 13, 2026
15287d1
test: update batch-3 expectations and refactor guide
timcadman Apr 14, 2026
30976f1
chore: regenerate ds.mean fixtures and update dsBase tar
timcadman Apr 14, 2026
f0cae98
fix: unwrap new list returns in ds.summary
timcadman Apr 14, 2026
67992ca
docs: note checkClass follow-up refactor
timcadman Apr 14, 2026
141ba18
test: add performance test for ds.summary
timcadman Apr 16, 2026
3efab38
fix: remove third-party copyright from new test
timcadman Apr 16, 2026
59e1eff
docs: sync REFACTOR_GUIDE.md from batch-9
timcadman Apr 16, 2026
504d003
docs: updated authorship
timcadman Apr 16, 2026
dac736c
docs: sync REFACTOR_GUIDE.md from batch-10
timcadman Apr 16, 2026
c92d01a
fix: align perf test datasets with smoke tests
timcadman Apr 19, 2026
af8960d
docs: sync REFACTOR_GUIDE.md
timcadman Apr 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ export(ds.var)
export(ds.vectorCalc)
import(DSI)
import(data.table)
importFrom(DSI,datashield.connections_find)
importFrom(cli,cli_abort)
importFrom(stats,as.formula)
importFrom(stats,na.omit)
importFrom(stats,ts)
Expand Down
48 changes: 9 additions & 39 deletions R/ds.cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#' percentage is pre-specified by the 'nfilter.glm'). The second disclosure control checks that none of them is dichotomous
#' with a level having fewer counts than the pre-specified 'nfilter.tab' threshold.
#' @author DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @examples
#' \dontrun{
#'
Expand Down Expand Up @@ -79,56 +80,25 @@
#' }
#' @export
#'
ds.cor <- function(x=NULL, y=NULL, type="split", datasources=NULL){
ds.cor <- function(x=NULL, y=NULL, type="split", classConsistencyCheck=TRUE, datasources=NULL){

# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("x=NULL. Please provide the name of a matrix or dataframe or the names of two numeric vectors!", call.=FALSE)
}else{
isDefined(datasources, x)
}

# check the type of the input objects
typ <- checkClass(datasources, x)

if(('numeric' %in% typ) | ('integer' %in% typ) | ('factor' %in% typ)){
if(is.null(y)){
stop("If x is a numeric vector, y must be a numeric vector!", call.=FALSE)
}else{
isDefined(datasources, y)
typ2 <- checkClass(datasources, y)
}
}

if(('matrix' %in% typ) | ('data.frame' %in% typ) & !(is.null(y))){
y <- NULL
warning("x is a matrix or a dataframe; y will be ignored and a correlation matrix computed for x!")
}

# name of the studies to be used in the output
stdnames <- names(datasources)

# call the server side function
if(('matrix' %in% typ) | ('data.frame' %in% typ)){
calltext <- call("corDS", x, NULL)
}else{
if(!(is.null(y))){
calltext <- call("corDS", x, y)
}else{
calltext <- call("corDS", x, NULL)
}
}
calltext <- call("corDS", x, y)
output <- DSI::datashield.aggregate(datasources, calltext)


if(classConsistencyCheck){
.checkClassConsistency(output)
}

if (type=="split"){
covariance <- list()
sqrt.diag <- list()
Expand Down
28 changes: 10 additions & 18 deletions R/ds.corTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
#' @return \code{ds.corTest} returns to the client-side the results of the correlation test.
#' @author DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @export
#' @examples
#' \dontrun{
Expand Down Expand Up @@ -63,17 +64,9 @@
#'
#' }
#'
ds.corTest <- function(x=NULL, y=NULL, method="pearson", exact=NULL, conf.level=0.95, type='split', datasources=NULL){
ds.corTest <- function(x=NULL, y=NULL, method="pearson", exact=NULL, conf.level=0.95, type='split', classConsistencyCheck=FALSE, datasources=NULL){

# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("x=NULL. Please provide the names of the 1st numeric vector!", call.=FALSE)
Expand All @@ -85,19 +78,18 @@ ds.corTest <- function(x=NULL, y=NULL, method="pearson", exact=NULL, conf.level=
if(!(method %in% c("pearson", "kendall", "spearman"))){
stop('Function argument "method" has to be either "pearson", "kendall" or "spearman"', call.=FALSE)
}

# check if the input objects are defined in all the studies
isDefined(datasources, x)
isDefined(datasources, y)

# call the internal function that checks the input objects are of the same class in all studies.
typ <- checkClass(datasources, x)
typ <- checkClass(datasources, y)

# call the server side function
cally <- call("corTestDS", x, y, method, exact, conf.level)
out <- DSI::datashield.aggregate(datasources, cally)

if(classConsistencyCheck){
.checkClassConsistency(out)
}

# strip class field from results before returning
out <- lapply(out, function(r) { r$class <- NULL; r })

if(type=="split"){
return(out)
}else{
Expand Down
48 changes: 9 additions & 39 deletions R/ds.cov.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#' the disclosure controls then all the output values are replaced with NAs. If all the variables are valid and pass
#' the controls, then the output matrices are returned and also an error message is returned but it is replaced by NA.
#' @author DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @examples
#' \dontrun{
#'
Expand Down Expand Up @@ -96,56 +97,25 @@
#' }
#' @export
#'
ds.cov <- function(x=NULL, y=NULL, naAction='pairwise.complete', type="split", datasources=NULL){
ds.cov <- function(x=NULL, y=NULL, naAction='pairwise.complete', type="split", classConsistencyCheck=TRUE, datasources=NULL){

# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("x=NULL. Please provide the name of a matrix or dataframe or the names of two numeric vectors!", call.=FALSE)
}else{
isDefined(datasources, x)
}

# check the type of the input objects
typ <- checkClass(datasources, x)

if(('numeric' %in% typ) | ('integer' %in% typ) | ('factor' %in% typ)){
if(is.null(y)){
stop("If x is a numeric vector, y must be a numeric vector!", call.=FALSE)
}else{
isDefined(datasources, y)
typ2 <- checkClass(datasources, y)
}
}

if(('matrix' %in% typ) | ('data.frame' %in% typ) & !(is.null(y))){
y <- NULL
warning("x is a matrix or a dataframe; y will be ignored and a covariance matrix computed for x!")
}

# name of the studies to be used in the output
stdnames <- names(datasources)

# call the server side function
if(('matrix' %in% typ) | ('data.frame' %in% typ)){
calltext <- call("covDS", x, NULL, naAction)
}else{
if(!(is.null(y))){
calltext <- call("covDS", x, y, naAction)
}else{
calltext <- call("covDS", x, NULL, naAction)
}
}
calltext <- call("covDS", x, y, naAction)
output <- DSI::datashield.aggregate(datasources, calltext)


if(classConsistencyCheck){
.checkClassConsistency(output)
}

if (type=="split"){
covariance <- list()
results <- list()
Expand Down
63 changes: 23 additions & 40 deletions R/ds.kurtosis.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,56 +23,40 @@
#' @return a matrix showing the kurtosis of the input numeric variable, the number of valid observations and
#' the validity message.
#' @author Demetris Avraam, for DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @export
#'
ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){

# if no opal login details are provided look for 'opal' objects in the environment
if(is.null(datasources)){
datasources <- datashield.connections_find()
}
ds.kurtosis <- function(x=NULL, method=1, type='both', classConsistencyCheck=FALSE, datasources=NULL){

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("Please provide the name of the input vector!", call.=FALSE)
}

if(!all(method %in% c(1,2,3))){
stop("method must be an integer between 1 and 3", call.=FALSE)
}
# enable valid aliases for "type" argument

# enable valid aliases for "type" argument
if(type == 'combine' | type == 'combined' | type == 'combines' | type == 'c') type <- 'combine'
if(type == 'split' | type == 'splits' | type == 's') type <- 'split'
if(type == 'both' | type == 'b' ) type <- 'both'
if(type != 'combine' & type != 'split' & type != 'both'){
stop('Function argument "type" has to be either "both", "combine" or "split"', call.=FALSE)
}

# check if the input object is defined in all the studies
isDefined(datasources, x)

# call the internal function that checks the input object is of the same class in all studies.
typ <- checkClass(datasources, x)

# the input object must be a numeric or an integer vector
if(typ != 'integer' & typ != 'numeric'){
message(paste0(x, " is of type ", typ, "!"))
stop("The input object must be an integer or numeric vector.", call.=FALSE)
}


if (type=='split' | type=='both'){
calltext.split <- call("kurtosisDS1", x, method)
output.split <- DSI::datashield.aggregate(datasources, calltext.split)
mat.split <- matrix(as.numeric(matrix(unlist(output.split), nrow=length(datasources), byrow=TRUE)[,1:2]),nrow=length(datasources))
validity <- matrix(unlist(output.split), nrow=length(datasources), byrow=TRUE)[,3]
mat.split <- data.frame(cbind(mat.split, validity))
output.split <- DSI::datashield.aggregate(datasources, calltext.split)
if(classConsistencyCheck){
.checkClassConsistency(output.split)
}
mat.split <- data.frame(
Kurtosis = sapply(output.split, function(r) r$Kurtosis),
Nvalid = sapply(output.split, function(r) r$Nvalid)
)
rownames(mat.split) <- names(output.split)
colnames(mat.split) <- c('Kurtosis', 'Nvalid', 'ValidityMessage')
}

if (type=='combine' | type=='both'){
Expand All @@ -83,8 +67,11 @@ ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){
stop("FAILED: The number of valid observations in one or more studies is less than nfilter.tab. \n Check that by using the argument type=='split'", call.=FALSE)
}else{
calltext.combined <- call("kurtosisDS2", x, global.mean)
output.combined <- DSI::datashield.aggregate(datasources, calltext.combined)

output.combined <- DSI::datashield.aggregate(datasources, calltext.combined)
if(classConsistencyCheck){
.checkClassConsistency(output.combined)
}

Global.sum.quartics <- 0
Global.sum.squares <- 0
Global.Nvalid <- 0
Expand All @@ -98,19 +85,15 @@ ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){

if(method==1){
Global.kurtosis <- g2.global
combinedMessage <- "VALID ANALYSIS"
}
}
if(method==2){
Global.kurtosis <- ((Global.Nvalid + 1) * g2.global + 6) * (Global.Nvalid - 1)/((Global.Nvalid - 2) * (Global.Nvalid - 3))
combinedMessage <- "VALID ANALYSIS"
}
}
if(method==3){
Global.kurtosis <- (g2.global + 3) * (1 - 1/Global.Nvalid)^2 - 3
combinedMessage <- "VALID ANALYSIS"
}
mat.combined <- data.frame(cbind(Global.kurtosis, Global.Nvalid, combinedMessage))
mat.combined <- data.frame(Kurtosis = Global.kurtosis, Nvalid = Global.Nvalid)
rownames(mat.combined) <- 'studiesCombined'
colnames(mat.combined) <- c('Kurtosis', 'Nvalid', 'ValidityMessage')

}
}
Expand Down
Loading
Loading