4 Tests for main functions in cssr
Now that the helper functions have been defined, we move on to tests for the main functions in the package.
Tests for createSubsamples()
:
::test_that("createSubsamples works", {
testthat<- createSubsamples(n=20L, p=5L, B=11L, sampling_type="SS",
res prop_feats_remove=0)
::expect_true(is.list(res))
testthat::expect_equal(length(res), 2*11)
testthat::expect_true(all(lengths(res) == 20/2))
testthat::expect_equal(length(unique(res[[13]])), 20/2)
testthat
<- res[[4]]
set <- res[[4 + 11]]
comp_set
::expect_equal(length(intersect(set, comp_set)), 0)
testthat::expect_equal(length(union(set, comp_set)), length(c(set, comp_set)))
testthat::expect_equal(20, length(c(set, comp_set)))
testthat
# Try odd n
<- createSubsamples(n=19L, p=23L, B=13L, sampling_type="SS",
res_odd prop_feats_remove=0)
::expect_true(is.list(res_odd))
testthat::expect_equal(length(res_odd), 2*13)
testthat::expect_true(all(lengths(res_odd) == floor(19/2)))
testthat::expect_equal(length(unique(res_odd[[3]])), floor(19/2))
testthat
<- res_odd[[2]]
set_odd <- res_odd[[2 + 13]]
comp_set_odd
::expect_equal(length(intersect(set_odd, comp_set_odd)), 0)
testthat::expect_equal(length(union(set_odd, comp_set_odd)),
testthatlength(c(set_odd, comp_set_odd)))
::expect_equal(19 - 1, length(c(set_odd, comp_set_odd)))
testthat
::expect_error(createSubsamples(n=20L, p=5L, B=11L, sampling_type="MB",
testthatprop_feats_remove=0),
"sampling_type MB is not yet supported (and isn't recommended anyway)",
fixed=TRUE)
# misspecified sampling_type (not specifying error because contains quotation
# marks)
::expect_error(createSubsamples(n=20L, p=5L, B=11L, sampling_type="S",
testthatprop_feats_remove=0))
::expect_error(createSubsamples(n=11.1, p=5L, B=11L, sampling_type="SS",
testthatprop_feats_remove=0),
"n == round(n) is not TRUE",
fixed=TRUE)
::expect_error(createSubsamples(n=-20L, p=5L, B=11L, sampling_type="SS",
testthatprop_feats_remove=0),
"n > 0 is not TRUE",
fixed=TRUE)
::expect_error(createSubsamples(n=20L, p=5L, B=25.6, sampling_type="SS",
testthatprop_feats_remove=0),
"length(subsamples) == B is not TRUE",
fixed=TRUE)
})
## Test passed 😸
Tests for getSubsamps()
:
::test_that("getSubsamps works", {
testthat<- getSubsamps(n=18L, B=21L, sampling_type="SS")
res
::expect_true(is.list(res))
testthat::expect_equal(length(res), 2*21)
testthat::expect_true(all(lengths(res) == 18/2))
testthat::expect_equal(length(unique(res[[7]])), 18/2)
testthat
<- res[[3]]
set <- res[[3 + 21]]
comp_set
::expect_equal(length(intersect(set, comp_set)), 0)
testthat::expect_equal(length(union(set, comp_set)), length(c(set, comp_set)))
testthat::expect_equal(18, length(c(set, comp_set)))
testthat })
## Test passed 😸
Tests for getSelMatrix()
:
::test_that("getSelMatrix works", {
testthatset.seed(98623)
<- matrix(stats::rnorm(25*6), nrow=25, ncol=6)
x <- stats::rnorm(25)
y <- createSubsamples(n=25, p=6, B=12, sampling_type="SS",
subsamps_object prop_feats_remove=0)
<- getSelMatrix(x=x, y=y, lambda=0.01, B=12, sampling_type="SS",
res subsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso)
::expect_true(is.matrix(res))
testthat::expect_equal(nrow(res), 2*12)
testthat::expect_equal(ncol(res), 6)
testthat::expect_true(all(res %in% c(0, 1)))
testthat::expect_true(all(is.integer(res)))
testthat
# Try a different fitfun
testFitfun <- function(X, y, lambda){
<- ncol(X)
p stopifnot(p >= 2)
# Choose p/2 features randomly
<- sample.int(p, size=floor(p/2))
selected return(selected)
}
# Note that value of lambda doesn't matter
<- getSelMatrix(x=x, y=y, lambda="foo", B=12, sampling_type="SS",
res2 subsamps_object=subsamps_object, num_cores=1,
fitfun=testFitfun)
::expect_true(is.matrix(res2))
testthat::expect_equal(nrow(res2), 2*12)
testthat::expect_equal(ncol(res2), 6)
testthat::expect_true(all(res2 %in% c(0, 1)))
testthat::expect_true(all(is.integer(res2)))
testthat
::expect_error(getSelMatrix(x=x, y=y, lambda="0.02", B=12, sampling_type="SS",
testthatsubsamps_object="subsamps_object", num_cores=1,
fitfun=testFitfun),
"is.integer(subsample) is not TRUE",
fixed=TRUE)
::expect_error(getSelMatrix(x=x[1:8, ], y=y, lambda="foo", B=12, sampling_type="SS",
testthatsubsamps_object=subsamps_object, num_cores=1,
fitfun=testFitfun),
"length(y) == n is not TRUE",
fixed=TRUE)
::expect_error(getSelMatrix(x=x, y=y, lambda=-0.02, B=12, sampling_type="SS",
testthatsubsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
# Wrong B
::expect_error(getSelMatrix(x=x, y=y, lambda=0.02, B=37, sampling_type="SS",
testthatsubsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso),
"length(res_list) == nrow(res) is not TRUE",
fixed=TRUE)
})
## Test passed 🎉
Tests for cssLasso()
:
::test_that("cssLasso works", {
testthatset.seed(24509)
<- matrix(stats::rnorm(15*4), nrow=15, ncol=4)
x <- stats::rnorm(15)
y
<- cssLasso(X=x, y=y, lambda=0.01)
res
::expect_true(is.integer(res))
testthat::expect_true(length(res) <= 4)
testthat::expect_true(length(res) >= 0)
testthat::expect_true(length(res) == length(unique(res)))
testthat::expect_true(all(res <= 4))
testthat::expect_true(all(res >= 1))
testthat
::expect_error(cssLasso(X=x[1:13, ], y=y, lambda=0.01),
testthat"For method cssLasso, y must be a vector of length equal to nrow(X).",
fixed=TRUE)
::expect_error(cssLasso(X=x, y=y, lambda=-0.01),
testthat"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
})
## Test passed 😀
Tests for getClusterSelMatrix()
:
::test_that("getClusterSelMatrix works", {
testthat<- list(red_cluster=1L:5L,
good_clusters green_cluster=6L:8L, blue_clust=9L)
<- 14
B <- 9
p
<- as.integer(sample(c(0, 1), size=2*B*p, replace=TRUE))
res_entries
<- matrix(res_entries, nrow=2*B, ncol=p)
good_res
<- getClusterSelMatrix(good_clusters, good_res)
res
::expect_true(is.matrix(res))
testthat::expect_equal(nrow(res), 2*B)
testthat# 3 clusters
::expect_equal(ncol(res), 3)
testthat::expect_identical(colnames(res), c("red_cluster", "green_cluster",
testthat"blue_clust"))
::expect_true(all(is.integer(res)))
testthat::expect_true(all(res %in% c(0, 1)))
testthat
<- good_clusters[[2]]
clust_2
<- rowSums(good_res[, clust_2]) > 0
any_one if(any(any_one)){
::expect_true(all(res[any_one, 2] == 1))
testthat
}
<- rowSums(good_res[, clust_2]) == 0
all_zeros if(any(all_zeros)){
::expect_true(all(res[all_zeros, 2] == 0))
testthat
}
# Not all features in a cluster
<- list(red_cluster=1L:5L, green_cluster=6L:7L, blue_clust=9L)
bad_clusters
::expect_error(getClusterSelMatrix(bad_clusters, good_res),
testthat"length(all_clustered_feats) == p is not TRUE",
fixed=TRUE)
<- as.integer(sample(c(0, 1, 2), size=2*B*p, replace=TRUE))
bad_res_entries
<- matrix(bad_res_entries, nrow=2*B, ncol=p)
bad_res
::expect_error(getClusterSelMatrix(good_clusters, bad_res),
testthat"all(res %in% c(0, 1)) is not TRUE", fixed=TRUE)
})
## Test passed 🌈
Finally, tests for css()
itself!
::test_that("css works", {
testthatset.seed(8712)
<- matrix(stats::rnorm(15*11), nrow=15, ncol=11)
x <- stats::rnorm(15)
y
# Intentionally don't provide clusters for all feature, mix up formatting,
# etc.
<- list(red_cluster=1L:5L,
good_clusters green_cluster=6L:8L,
c4=10:11)
<- css(X=x, y=y, lambda=0.01, clusters=good_clusters, fitfun = cssLasso,
res sampling_type = "SS", B = 13,
prop_feats_remove = 0, train_inds = integer(), num_cores = 1L)
# Basic output
::expect_true(is.list(res))
testthat::expect_identical(class(res), "cssr")
testthat::expect_identical(names(res), c("feat_sel_mat", "clus_sel_mat", "X",
testthat"y", "clusters", "train_inds"))
# feat_sel mat
::expect_true(is.integer(res$feat_sel_mat))
testthat::expect_true(is.matrix(res$feat_sel_mat))
testthat::expect_true(all(res$feat_sel_mat %in% c(0, 1)))
testthat::expect_equal(ncol(res$feat_sel_mat), 11)
testthat::expect_null(colnames(res$feat_sel_mat))
testthat
# clus_sel_mat
::expect_true(is.integer(res$clus_sel_mat))
testthat::expect_true(is.matrix(res$clus_sel_mat))
testthat::expect_true(all(res$clus_sel_mat %in% c(0, 1)))
testthat# 4 clusters
::expect_equal(ncol(res$clus_sel_mat), 4)
testthat::expect_identical(colnames(res$clus_sel_mat), names(res$clusters))
testthat::expect_equal(length(colnames(res$clus_sel_mat)), 4)
testthat::expect_equal(length(unique(colnames(res$clus_sel_mat))), 4)
testthat::expect_true(all(!is.na(colnames(res$clus_sel_mat))))
testthat::expect_true(all(!is.null(colnames(res$clus_sel_mat))))
testthat
# X
::expect_true(is.matrix(res$X))
testthat::expect_true(all(!is.na(res$X)))
testthat::expect_true(is.numeric(res$X))
testthat::expect_equal(ncol(res$X), 11)
testthat::expect_equal(nrow(res$X), 15)
testthat
# y
::expect_true(is.numeric(res$y))
testthat::expect_equal(length(res$y), 15)
testthat
# clusters
::expect_true(is.list(res$clusters))
testthat::expect_equal(length(res$clusters), length(names(res$clusters)))
testthat::expect_equal(length(res$clusters),
testthatlength(unique(names(res$clusters))))
::expect_true(all(!is.na(names(res$clusters))))
testthat::expect_true(all(!is.null(names(res$clusters))))
testthat<- integer()
clust_feats for(i in 1:length(res$clusters)){
<- c(clust_feats, res$clusters[[i]])
clust_feats
}::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
testthat1:11)))
# train_inds
::expect_identical(res$train_inds, integer())
testthat
## Trying other inputs
# X as a data.frame
<- datasets::mtcars
X_df <- css(X=X_df, y=stats::rnorm(nrow(X_df)), lambda=0.01, B = 10)
res_fitfun ::expect_identical(class(res_fitfun), "cssr")
testthat
# X as a dataframe with factors (number of columns of final design matrix
# after one-hot encoding factors won't match number of columns of df2)
# cyl, gear, and carb are factors with more than 2 levels
<- X_df
df2 $cyl <- as.factor(df2$cyl)
df2$vs <- as.factor(df2$vs)
df2$am <- as.factor(df2$am)
df2$gear <- as.factor(df2$gear)
df2$carb <- as.factor(df2$carb)
df2
<- css(X=df2, y=stats::rnorm(nrow(X_df)), lambda=0.01, B = 10)
res_fitfun ::expect_identical(class(res_fitfun), "cssr")
testthat
# Should get error if I try to use clusters in this data.frame that contains
# factors with more than two levels
::expect_error(css(X=df2, y=stats::rnorm(nrow(X_df)), lambda=0.01,
testthatB = 10, clusters=1:3), "When stats::model.matrix converted the provided data.frame X to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert X to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.",
fixed=TRUE)
# X as a matrix with column names
<- x
x2 colnames(x2) <- LETTERS[1:11]
<- css(X=x2, y=y, lambda=0.01, clusters=good_clusters, B = 13)
res_names ::expect_identical(class(res_names), "cssr")
testthat::expect_identical(colnames(x2), colnames(res_names$X))
testthat::expect_identical(colnames(x2), colnames(res_names$feat_sel_mat))
testthat
# Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
# shouldn't throw any error, because the acceptable input for lambda should be
# enforced only by fitfun)
testFitfun <- function(X, y, lambda){
<- ncol(X)
p stopifnot(p >= 2)
# Choose p/2 features randomly
<- sample.int(p, size=floor(p/2))
selected return(selected)
}
<- css(X=x, y=y, lambda=c("foo", as.character(NA), "bar"),
res_fitfun clusters=1:3, B = 10, fitfun=testFitfun)
::expect_identical(class(res_fitfun), "cssr")
testthat
# Bad lambda
::expect_error(css(X=x, y=y, lambda=-0.01, B = 10),
testthat"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
::expect_error(css(X=x, y=y, lambda="foo", B = 10),
testthat"For method cssLasso, lambda must be a numeric.",
fixed=TRUE)
# Single cluster
<- css(X=x, y=y, lambda=0.01, clusters=1:3, B = 10)
res_sing_clust ::expect_identical(class(res_sing_clust), "cssr")
testthat::expect_equal(length(res_sing_clust$clusters), 11 - 3 + 1)
testthat::expect_true(length(unique(names(res_sing_clust$clusters))) == 11 -
testthat3 + 1)
::expect_true(all(!is.na(names(res_sing_clust$clusters))))
testthat::expect_true(all(!is.null(names(res_sing_clust$clusters))))
testthat
# No cluster
::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10)), "cssr")
testthat
# All clusters named
::expect_identical(class(css(X=x, y=y, clusters=list("a"=1:5,
testthat"b"=6:10,
"c"=11),
lambda=0.01, B=10)), "cssr")
# Other sampling types
::expect_error(css(X=x, y=y, lambda=1, sampling_type="MB"),
testthat"sampling_type MB is not yet supported (and isn't recommended anyway)",
fixed=TRUE)
# Error has quotation marks in it
::expect_error(css(X=x, y=y, lambda=1, sampling_type="S"))
testthat
::expect_error(css(X=x, y=y, lambda=1, sampling_type=1),
testthat"is.character(sampling_type) is not TRUE",
fixed=TRUE)
# B
::expect_warning(css(X=x, y=y, lambda=1, B=5),
testthat"Small values of B may lead to poor results.",
fixed=TRUE)
::expect_error(css(X=x, y=y, lambda=1, B=list(10)),
testthat"is.numeric(B) | is.integer(B) is not TRUE",
fixed=TRUE)
# Clusters
::expect_error(css(X=x, y=y, lambda=1, clusters="red"),
testthat"is.numeric(clusters) | is.integer(clusters) is not TRUE",
fixed=TRUE)
# prop_feats_remove
::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10,
testthatprop_feats_remove=0.3)), "cssr")
# Weirdly high, but still valid, value of prop_feats_remove
::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10,
testthatprop_feats_remove=0.9999999999)), "cssr")
# Use train_inds argument
<- css(X=x, y=y, lambda=0.01, B = 10, train_inds=11:15)
res_train ::expect_equal(res_train$train_inds, 11:15)
testthat
})
## Test passed 🥳