8 Tests for the core css() functions
Now that all of the package’s functions have been defined, we collect their tests here, grouped to mirror the definition chapters above. Keeping the tests together at the end lets those chapters read as continuous narrative; because every function is defined before this part, each test runs only after all of its dependencies have been defined.
Tests for coerceDataFrameToMatrix():
testthat::test_that("coerceDataFrameToMatrix keeps a single-column data.frame as a matrix (#43)", {
# A 1-column data.frame must coerce to a 1-column matrix, not collapse to a
# vector (which used to crash with the cryptic "argument is of length zero",
# both with and without clusters).
num_df <- data.frame(a = as.numeric(1:8))
res_clust <- coerceDataFrameToMatrix(num_df, clusters = list(1))
testthat::expect_true(is.matrix(res_clust))
testthat::expect_equal(dim(res_clust), c(8L, 1L))
res_noclust <- coerceDataFrameToMatrix(num_df, clusters = list())
testthat::expect_true(is.matrix(res_noclust))
testthat::expect_equal(dim(res_noclust), c(8L, 1L))
# A matrix passes through unchanged.
m <- matrix(as.numeric(1:8), nrow = 8, ncol = 1)
testthat::expect_identical(coerceDataFrameToMatrix(m, clusters = list()), m)
# The factor-expansion guard still fires (a >= 3-level factor changes ncol).
fac_df <- data.frame(a = factor(c("x", "y", "z", "x", "y", "z", "x", "y")))
testthat::expect_error(coerceDataFrameToMatrix(fac_df, clusters = list(1)),
"the number of columns changed", fixed = TRUE)
})## Test passed with 6 successes.
Tests for checkNoNAs():
testthat::test_that("checkNoNAs flags NA/NaN/Inf in a matrix or data.frame and passes clean input", {
m_ok <- matrix(as.numeric(1:8), nrow = 4, ncol = 2)
testthat::expect_identical(checkNoNAs(m_ok, "X"), m_ok) # returns input invisibly
m_na <- m_ok; m_na[2, 1] <- NA
testthat::expect_error(checkNoNAs(m_na, "X"),
"must not contain missing", fixed = TRUE)
# data.frame with a numeric NA is caught too (before any coercion).
df_na <- data.frame(a = c(1, NA, 3), b = c(4, 5, 6))
testthat::expect_error(checkNoNAs(df_na, "X"),
"must not contain missing", fixed = TRUE)
# arg_name appears in the message.
testthat::expect_error(checkNoNAs(m_na, "newx"), "newx", fixed = TRUE)
# Non-finite values are rejected too (#99): is.na(Inf) is FALSE, so the
# previous bare is.na() check let Inf/-Inf slip through into glmnet and the
# cluster-representative averaging, silently corrupting results.
m_inf <- m_ok; m_inf[2, 1] <- Inf
testthat::expect_error(checkNoNAs(m_inf, "X"), "non-finite", fixed = TRUE)
m_neginf <- m_ok; m_neginf[3, 2] <- -Inf
testthat::expect_error(checkNoNAs(m_neginf, "X"), "non-finite", fixed = TRUE)
# NaN is caught by is.na() (is.na(NaN) is TRUE).
m_nan <- m_ok; m_nan[1, 1] <- NaN
testthat::expect_error(checkNoNAs(m_nan, "X"),
"must not contain missing", fixed = TRUE)
# data.frame with Inf in a numeric column is rejected too.
df_inf <- data.frame(a = c(1, Inf, 3), b = c(4, 5, 6))
testthat::expect_error(checkNoNAs(df_inf, "X"), "non-finite", fixed = TRUE)
# A finite data.frame passes through unchanged.
df_ok <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6))
testthat::expect_identical(checkNoNAs(df_ok, "X"), df_ok)
# A data.frame with non-numeric (factor/character) columns plus clean numeric
# columns still passes: the is.infinite() check is per-numeric-column, so it
# must not false-positive on non-numeric columns.
df_mixed <- data.frame(
f = factor(c("x", "y", "z")),
s = c("a", "b", "c"),
n = c(1, 2, 3),
stringsAsFactors = FALSE)
testthat::expect_identical(checkNoNAs(df_mixed, "X"), df_mixed)
})## Test passed with 10 successes.
Tests for checkFiniteY():
testthat::test_that("checkFiniteY accepts finite numeric/integer y and rejects non-finite or non-numeric y (#100)", {
# Finite numeric and integer y pass, returning the input invisibly.
y_num <- as.numeric(1:8) * 0.5
testthat::expect_identical(checkFiniteY(y_num, "y"), y_num)
y_int <- 1L:8L
testthat::expect_identical(checkFiniteY(y_int, "y"), y_int)
testthat::expect_invisible(checkFiniteY(y_num, "y"))
# NA, NaN, Inf, -Inf each error with the non-finite message.
testthat::expect_error(checkFiniteY(c(1, NA, 3), "y"),
"must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
testthat::expect_error(checkFiniteY(c(1, NaN, 3), "y"),
"must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
testthat::expect_error(checkFiniteY(c(1, Inf, 3), "y"),
"must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
testthat::expect_error(checkFiniteY(c(1, -Inf, 3), "y"),
"must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
# A non-numeric (logical/character) y errors with the numeric message.
testthat::expect_error(checkFiniteY(c(TRUE, FALSE, TRUE), "y"),
"must be a numeric (real-valued) vector", fixed = TRUE)
testthat::expect_error(checkFiniteY(c("a", "b"), "y"),
"must be a numeric (real-valued) vector", fixed = TRUE)
# arg_name is interpolated into both messages.
testthat::expect_error(checkFiniteY(c(1, Inf), "y_train_selec"),
"The provided y_train_selec must not contain missing", fixed = TRUE)
testthat::expect_error(checkFiniteY(c(TRUE, FALSE), "y_train_selec"),
"The provided y_train_selec must be a numeric", fixed = TRUE)
})## Test passed with 11 successes.
tests for checkCssClustersInput:
testthat::test_that("checkCssClustersInput works", {
# Intentionally don't provide clusters for all feature, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1L:4L, green_cluster=5L:8L)
res <- checkCssClustersInput(good_clusters)
# clusters
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), length(names(res)))
testthat::expect_equal(length(res), length(unique(names(res))))
testthat::expect_true(all(!is.na(names(res))))
testthat::expect_true(all(!is.null(names(res))))
clust_feats <- integer()
for(i in 1:length(res)){
clust_feats <- c(clust_feats, res[[i]])
}
testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
1:8)))
## Trying other inputs
unnamed_clusters <- list(1L:3L, 5L:8L)
res <- checkCssClustersInput(unnamed_clusters)
# clusters
testthat::expect_true(is.list(res))
clust_feats <- integer()
for(i in 1:length(res)){
clust_feats <- c(clust_feats, res[[i]])
}
testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
1:8)))
testthat::expect_error(checkCssClustersInput(list(1:4, 4:6)),
"Overlapping clusters detected; clusters must be non-overlapping. Overlapping clusters: 1, 2.",
fixed=TRUE)
testthat::expect_error(checkCssClustersInput(list(2:3, 2:3)),
"length(clusters) == length(unique(clusters)) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkCssClustersInput(list(2:3, as.integer(NA))),
"!is.na(clusters) are not all TRUE",
fixed=TRUE)
testthat::expect_error(checkCssClustersInput(list(2:3, c(4, 4, 5))),
"length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkCssClustersInput(list(2:3, -1)),
"all(clusters[[i]] >= 1) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkCssClustersInput(c(0.4, 0.6)),
"all(clusters == round(clusters)) is not TRUE",
fixed=TRUE)
# Single cluster
res_sing_clust <- checkCssClustersInput(2:5)
testthat::expect_equal(length(res_sing_clust), 4)
})## Test passed with 17 successes.
Tests for checkY():
testthat::test_that("checkY works", {
testthat::expect_null(checkY(as.numeric(1:20)*.1, 20))
testthat::expect_null(checkY(1L:15L, 15))
testthat::expect_error(checkY(1:7, 8), "n == length(y) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkY(1:7, -7), "n > 0 is not TRUE", fixed=TRUE)
testthat::expect_error(checkY(rep(as.numeric(NA), 13), 13),
"all(is.finite(y)) is not TRUE", fixed=TRUE)
# Inf is now rejected too (previously all(!is.na(y)) missed it) (#100)
testthat::expect_error(checkY(c(1, Inf, 3), 3),
"all(is.finite(y)) is not TRUE", fixed=TRUE)
testthat::expect_error(checkY(rep(5.2, 9), 9),
"length(unique(y)) > 1 is not TRUE", fixed=TRUE)
testthat::expect_error(checkY(c(TRUE, FALSE, TRUE), 3),
"is.numeric(y) | is.integer(y) is not TRUE",
fixed=TRUE)
})## Test passed with 8 successes.
Tests for checkFormatClustersInput():
testthat::test_that("checkFormatClustersInput works", {
# Intentionally don't provide clusters for all feature, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1L:4L, green_cluster=5L:8L)
res <- checkFormatClustersInput(good_clusters, p=10,
clust_names=c("red_cluster", "green_cluster"),
get_prototypes=FALSE, x=NA, y=NA, R=NA)
testthat::expect_true(is.list(res))
clust_feats <- integer()
for(i in 1:length(res)){
clust_feats <- c(clust_feats, res[[i]])
}
testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
1:8)))
## Trying other inputs
unnamed_clusters <- list(1L:3L, 5L:8L)
res <- checkFormatClustersInput(unnamed_clusters, p=10, clust_names=NA,
get_prototypes=FALSE, x=NA, y=NA, R=NA)
# clusters
testthat::expect_true(is.list(res))
clust_feats <- integer()
for(i in 1:length(res)){
clust_feats <- c(clust_feats, res[[i]])
}
testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
1:8)))
testthat::expect_error(checkFormatClustersInput(list(1:4, 4:6), p=10,
clust_names=NA,
get_prototypes=FALSE, x=NA,
y=NA, R=NA),
"length(intersect(clusters[[i]], clusters[[j]])) == 0 is not TRUE",
fixed=TRUE)
testthat::expect_error(checkFormatClustersInput(list(2:3, 2:3), p=10,
clust_names=NA, get_prototypes=FALSE, x=NA,
y=NA, R=NA),
"length(clusters) == length(unique(clusters)) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkFormatClustersInput(list(2:3, as.integer(NA)),
p=10,
clust_names=NA,
get_prototypes=FALSE, x=NA,
y=NA, R=NA),
"Must specify one of clusters or R (or does one of these provided inputs contain NA?)",
fixed=TRUE)
testthat::expect_error(checkFormatClustersInput(list(2:3, c(4, 4, 5)),
p=10,
clust_names=NA,
get_prototypes=FALSE, x=NA,
y=NA, R=NA),
"length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkFormatClustersInput(list(1:4, -1),
p=10,
clust_names=NA,
get_prototypes=FALSE, x=NA,
y=NA, R=NA),
"all(clusters[[i]] >= 1) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkFormatClustersInput(list(1:4, c(2.3, 1.2)),
p=10,
clust_names=NA,
get_prototypes=FALSE, x=NA,
y=NA, R=NA),
"is.integer(clusters[[i]]) is not TRUE",
fixed=TRUE)
# Single cluster
testthat::expect_true(is.list(checkFormatClustersInput(c(1:5), p=10,
clust_names=NA,
get_prototypes=FALSE,
x=NA, y=NA, R=NA)))
})## Test passed with 13 successes.
Tests for checkClusters():
testthat::test_that("checkClusters works", {
good_clusters <- list(c1=1L:5L, c2=6L:8L, c3=9L)
testthat::expect_null(checkClusters(good_clusters, 9))
testthat::expect_error(checkClusters(good_clusters, 10),
"length(all_clustered_feats) == p is not TRUE",
fixed=TRUE)
testthat::expect_error(checkClusters(1L:10L, 10),
"is.list(clusters) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=9L,
c4=integer()), 9),
"all(lengths(clusters) >= 1) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=9L,
c4=as.integer(NA)), 9),
"all(!is.na(clusters)) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=9L,
c2=6L:8L), 9),
"n_clusters == length(unique(clusters)) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=10L), 9),
"all(all_clustered_feats <= p) is not TRUE",
fixed=TRUE)
})## Test passed with 7 successes.
Tests for identifyPrototype():
testthat::test_that("identifyPrototype works", {
testthat::expect_identical(identifyPrototype(10L, "a", 5), 10L)
n <- 10
p <- 5
set.seed(9834)
X <- matrix(stats::rnorm(n*p), nrow=n, ncol=p)
y <- X[, p]
testthat::expect_equal(identifyPrototype(as.integer(p), X, y), p)
testthat::expect_equal(identifyPrototype(2L, X, y), 2)
testthat::expect_equal(identifyPrototype(as.integer(2:p), X, y), p)
testthat::expect_error(identifyPrototype(as.integer(2:p), y, X),
"incorrect number of dimensions",
fixed=TRUE)
y2 <- rnorm(n)
res <- identifyPrototype(c(2L, 3L), X, y2)
testthat::expect_true(is.integer(res))
testthat::expect_equal(length(res), 1)
testthat::expect_true(res %in% c(2L, 3L))
})
testthat::test_that("identifyPrototype handles a constant cluster member silently (#59)", {
# A constant column has undefined correlation; the vectorized cor() path
# returns 0 for it -- and SILENTLY (base cor() warns "the standard deviation
# is zero" on a constant column; identifyPrototype now suppresses that to
# preserve the silent-0 contract).
n <- 12
set.seed(7321)
varying <- stats::rnorm(n)
# Column 1 is constant; column 2 equals y (perfectly correlated).
x <- cbind(rep(1.5, n), varying)
y <- varying
# The prototype must be the varying member (2), never the constant column (1).
testthat::expect_identical(identifyPrototype(c(1L, 2L), x, y), 2L)
# And no "standard deviation is zero" warning may leak.
testthat::expect_silent(identifyPrototype(c(1L, 2L), x, y))
})
testthat::test_that("identifyPrototype warns on a constant response y (#67)", {
# Moved from the removed helper's test: a constant y makes every
# cluster-member correlation undefined, and identifyPrototype warns.
set.seed(5417)
X <- matrix(stats::rnorm(8 * 2), nrow = 8, ncol = 2)
y_const <- rep(2.5, 8)
testthat::expect_warning(
identifyPrototype(c(1L, 2L), X, y_const),
"identifyPrototype: the response y has only one unique value",
fixed = TRUE)
})
testthat::test_that("identifyPrototype selects by absolute correlation (#67)", {
# The prototype is the member with the largest ABSOLUTE correlation with y, so
# a near-perfectly NEGATIVELY correlated member must beat a weak positive one.
set.seed(6708)
n <- 20
y <- stats::rnorm(n)
X <- cbind(-y, y + stats::rnorm(n, sd = 4)) # col 1: cor=-1; col 2: weak +cor
# abs() => col 1 (|cor|=1) wins despite its negative sign.
testthat::expect_identical(identifyPrototype(c(1L, 2L), X, y), 1L)
})## Test passed with 8 successes.
## Test passed with 2 successes.
## Test passed with 1 success.
## Test passed with 1 success.
Tests for getPrototypes()
testthat::test_that("getPrototypes works", {
n <- 10
p <- 5
set.seed(902689)
X <- matrix(stats::rnorm(n*p), nrow=n, ncol=p)
y <- X[, p]
testthat::expect_identical(getPrototypes(list(1L, 2L, 3L, 4L, 5L), X, y), 1:5)
testthat::expect_identical(getPrototypes(list(1L:5L), X, y), 5L)
testthat::expect_identical(getPrototypes(list(1L, 2L:5L), X, y), c(1L, 5L))
testthat::expect_identical(getPrototypes(list(3L:5L), X, y), 5L)
y2 <- rnorm(n)
res <- getPrototypes(list(1L, c(2L, 3L), c(4L, 5L)), X, y2)
testthat::expect_true(is.integer(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(res[1], 1L)
testthat::expect_true(res[2] %in% c(2L, 3L))
testthat::expect_true(res[3] %in% c(4L, 5L))
testthat::expect_error(getPrototypes(list(1L, 2L, 3L, 4L, 5L), y, X),
"is.matrix(x) is not TRUE",
fixed=TRUE)
testthat::expect_error(getPrototypes(list(1L, 2L, 3L, 4L, 5L), X, y[1:9]),
"n == length(y) is not TRUE",
fixed=TRUE)
})## Test passed with 11 successes.
Finally, tests for formatClusters():
testthat::test_that("formatClusters works", {
# Intentionally don't provide clusters for all feature, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1:3, 5:8)
res <- formatClusters(good_clusters, p=10)
testthat::expect_true(is.list(res))
testthat::expect_identical(names(res), c("clusters", "multiple"))
# Clusters
testthat::expect_true(is.list(res$clusters))
testthat::expect_equal(length(res$clusters), 5)
testthat::expect_equal(5, length(names(res$clusters)))
testthat::expect_equal(5, length(unique(names(res$clusters))))
testthat::expect_true("red_cluster" %in% names(res$clusters))
testthat::expect_true(all(!is.na(names(res$clusters))))
testthat::expect_true(all(!is.null(names(res$clusters))))
testthat::expect_true(all(names(res$clusters) != ""))
clust_feats <- integer()
true_list <- list(1:3, 5:8, 4, 9, 10)
for(i in 1:length(res$clusters)){
testthat::expect_true(is.integer(res$clusters[[i]]))
testthat::expect_equal(length(intersect(clust_feats, res$clusters[[i]])), 0)
testthat::expect_true(all(res$clusters[[i]] %in% 1:10))
testthat::expect_equal(length(res$clusters[[i]]),
length(unique(res$clusters[[i]])))
testthat::expect_true(all(res$clusters[[i]] == true_list[[i]]))
clust_feats <- c(clust_feats, res$clusters[[i]])
}
testthat::expect_equal(length(clust_feats), 10)
testthat::expect_equal(10, length(unique(clust_feats)))
testthat::expect_equal(10, length(intersect(clust_feats, 1:10)))
# Multiple
testthat::expect_true(res$multiple)
testthat::expect_false(formatClusters(3:5, p=10)$multiple)
## Trying other inputs
testthat::expect_error(formatClusters(list(3:7, 7:10), p=15),
"length(intersect(clusters[[i]], clusters[[j]])) == 0 is not TRUE",
fixed=TRUE)
testthat::expect_error(formatClusters(list(5:8, 5:8), p=9),
"length(clusters) == length(unique(clusters)) is not TRUE",
fixed=TRUE)
# Out-of-range cluster index is now caught up front by checkFormatClustersInput
# (#104, L3) with a message naming the offending index and p, instead of the
# downstream cryptic length(all_clustered_feats) == p stopifnot.
testthat::expect_error(formatClusters(list(5:8), p=7),
"Cluster index 8 exceeds the number of features (p = 7).",
fixed=TRUE)
testthat::expect_error(formatClusters(list(2:3, as.integer(NA)), p=10),
"Must specify one of clusters or R (or does one of these provided inputs contain NA?)",
fixed=TRUE)
testthat::expect_error(formatClusters(list(2:3, c(4, 4, 5)), p=8),
"length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
fixed=TRUE)
testthat::expect_error(formatClusters(list(1:4, -1), p=10),
"all(clusters[[i]] >= 1) is not TRUE",
fixed=TRUE)
testthat::expect_error(formatClusters(list(1:4, c(2.3, 1.2))),
"is.integer(clusters[[i]]) is not TRUE",
fixed=TRUE)
### Test prototypes feature
n <- 8
p <- 6
set.seed(690289)
X <- matrix(stats::rnorm(n*p), nrow=n, ncol=p)
y <- X[, p]
res <- formatClusters(clusters=list(), p=p, get_prototypes=TRUE, x=X, y=y)
testthat::expect_true(is.list(res))
testthat::expect_identical(names(res), c("clusters", "multiple",
"prototypes"))
testthat::expect_true(is.integer(res$prototypes))
testthat::expect_identical(res$prototypes, 1:p)
testthat::expect_equal(formatClusters(clusters=1:p, p=p,
get_prototypes=TRUE, x=X,
y=y)$prototypes, p)
testthat::expect_identical(formatClusters(clusters=list(1L, 2L:p), p=p,
get_prototypes=TRUE, x=X,
y=y)$prototypes,
as.integer(c(1, p)))
testthat::expect_identical(formatClusters(clusters=3L:p, p=p,
get_prototypes=TRUE, x=X,
y=y)$prototypes,
as.integer(c(p, 1, 2)))
y2 <- rnorm(n)
res <- formatClusters(clusters=list(2:3, 4:5), p=p, get_prototypes=TRUE,
x=X, y=y2)$prototypes
testthat::expect_true(is.integer(res))
testthat::expect_equal(length(res), 4)
testthat::expect_true(res[1] %in% c(2L, 3L))
testthat::expect_true(res[2] %in% c(4L, 5L))
testthat::expect_equal(res[3], 1L)
testthat::expect_equal(res[4], p)
testthat::expect_error(formatClusters(clusters=list(2:3, 4:5), p=p,
get_prototypes=TRUE, x=y2, y=X),
"is.matrix(x) is not TRUE", fixed=TRUE)
testthat::expect_error(formatClusters(clusters=list(2:3, 4:5), p=p,
get_prototypes=TRUE, x=X,
y=y2[1:(n-1)]),
"n == length(y) is not TRUE", fixed=TRUE)
})## Test passed with 62 successes.
Tests for checkSamplingType():
testthat::test_that("checkSamplingType works", {
testthat::expect_null(checkSamplingType("SS"))
testthat::expect_error(checkSamplingType("MB"),
"sampling_type MB is not yet supported (and isn't recommended anyway)",
fixed=TRUE)
testthat::expect_error(checkSamplingType(c("SS", "SS")),
"length(sampling_type) == 1 is not TRUE", fixed=TRUE)
testthat::expect_error(checkSamplingType(1),
"is.character(sampling_type) is not TRUE", fixed=TRUE)
testthat::expect_error(checkSamplingType(as.character(NA)),
"!is.na(sampling_type) is not TRUE", fixed=TRUE)
})## Test passed with 5 successes.
Tests for checkB():
testthat::test_that("checkB works", {
testthat::expect_null(checkB(1500))
testthat::expect_null(checkB(15))
testthat::expect_error(checkB("B"),
"is.numeric(B) | is.integer(B) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkB(20:25), "length(B) == 1 is not TRUE",
fixed=TRUE)
testthat::expect_error(checkB(as.integer(NA)), "!is.na(B) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkB(1.2), "B == round(B) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkB(-100), "B > 0 is not TRUE",
fixed=TRUE)
testthat::expect_warning(checkB(5),
"Small values of B may lead to poor results.",
fixed=TRUE)
testthat::expect_warning(checkB(2200),
"Large values of B may require long computation times.",
fixed=TRUE)
})## Test passed with 9 successes.
Tests for checkAlpha():
testthat::test_that("checkAlpha validates alpha in (0, 1] (#72)", {
testthat::expect_null(checkAlpha(1))
testthat::expect_null(checkAlpha(0.5))
testthat::expect_error(checkAlpha("0.5"),
"is.numeric(alpha) | is.integer(alpha) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkAlpha(c(0.5, 0.6)),
"length(alpha) == 1 is not TRUE", fixed=TRUE)
testthat::expect_error(checkAlpha(NA_real_), "!is.na(alpha) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkAlpha(0), "alpha > 0 is not TRUE", fixed=TRUE)
testthat::expect_error(checkAlpha(1.5), "alpha <= 1 is not TRUE", fixed=TRUE)
})## Test passed with 7 successes.
Tests for checkPropFeatsRemove():
testthat::test_that("checkPropFeatsRemove works", {
testthat::expect_null(checkPropFeatsRemove(0, 5))
testthat::expect_null(checkPropFeatsRemove(.3, 10))
testthat::expect_error(checkPropFeatsRemove(1, 3),
"prop_feats_remove >= 0 & prop_feats_remove < 1 is not TRUE",
fixed=TRUE)
testthat::expect_error(checkPropFeatsRemove(c(.5, .6), 17),
"length(prop_feats_remove) == 1 is not TRUE",
fixed=TRUE)
testthat::expect_error(checkPropFeatsRemove(".3", 99),
"is.numeric(prop_feats_remove) | is.integer(prop_feats_remove) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkPropFeatsRemove(as.numeric(NA), 172),
"!is.na(prop_feats_remove) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkPropFeatsRemove(.1, 1),
"p >= 2 is not TRUE",
fixed=TRUE)
})## Test passed with 7 successes.
Finally, tests for checkCssInputs():
testthat::test_that("checkCssInputs works", {
set.seed(80526)
x <- matrix(stats::rnorm(15*11), nrow=15, ncol=11)
y <- stats::rnorm(15)
# Intentionally don't provide clusters for all feature, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1L:5L,
green_cluster=6L:8L
# , c4=10:11
)
res <- checkCssInputs(X=x, y=y, lambda=0.01, clusters=good_clusters,
fitfun = cssLasso, sampling_type = "SS", B = 13,
prop_feats_remove = 0, train_inds = integer(),
num_cores = 1L)
# Basic output
testthat::expect_true(is.list(res))
testthat::expect_identical(names(res), c("feat_names", "X", "clusters"))
# feat_names
testthat::expect_true(is.character(res$feat_names))
testthat::expect_true(is.na(res$feat_names))
testthat::expect_equal(length(res$feat_names), 1)
# X
testthat::expect_true(is.matrix(res$X))
testthat::expect_true(all(!is.na(res$X)))
testthat::expect_true(is.numeric(res$X))
testthat::expect_equal(ncol(res$X), 11)
testthat::expect_equal(nrow(res$X), 15)
# clusters
testthat::expect_true(is.list(res$clusters))
testthat::expect_equal(length(res$clusters), length(names(res$clusters)))
testthat::expect_equal(length(res$clusters),
length(unique(names(res$clusters))))
testthat::expect_true(all(!is.na(names(res$clusters))))
testthat::expect_true(all(!is.null(names(res$clusters))))
clust_feats <- integer()
for(i in 1:length(res$clusters)){
clust_feats <- c(clust_feats, res$clusters[[i]])
}
testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
1:11)))
# Regression test: train_inds with an out-of-range index (> n) must be
# rejected. Previously the upper-bound check was a no-op due to a misplaced
# parenthesis (all(train_inds) <= n instead of all(train_inds <= n)), so
# out-of-range indices were silently accepted. (n = 15 here; 16 is invalid.)
testthat::expect_error(checkCssInputs(X=x, y=y, lambda=0.01,
clusters=good_clusters,
fitfun = cssLasso, sampling_type = "SS",
B = 13, prop_feats_remove = 0,
train_inds = c(1L, 2L, 16L),
num_cores = 1L))
## Trying other inputs
# Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
# shouldn't throw any error, because the acceptable input for lambda should be
# enforced only by fitfun)
testFitfun <- function(X, y, lambda){
p <- ncol(X)
stopifnot(p >= 2)
# Choose p/2 features randomly
selected <- sample.int(p, size=floor(p/2))
return(selected)
}
res_fitfun <- checkCssInputs(X=x, y=y, lambda=x, clusters=1:3,
fitfun = testFitfun, sampling_type = "SS",
B = 13, prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L)
testthat::expect_true(is.list(res_fitfun))
# Single cluster
res_sing_clust <- checkCssInputs(X=x, y=y,
lambda=c("foo", as.character(NA), "bar"),
clusters=1:3, fitfun = testFitfun,
sampling_type = "SS", B = 13,
prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L)
testthat::expect_true(is.list(res_sing_clust))
testthat::expect_equal(length(res_sing_clust$clusters), 11 - 3 + 1)
testthat::expect_true(length(unique(names(res_sing_clust$clusters))) == 11 -
3 + 1)
testthat::expect_true(all(!is.na(names(res_sing_clust$clusters))))
testthat::expect_true(all(!is.null(names(res_sing_clust$clusters))))
# Other sampling types
testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo",
as.character(NA),
"bar"), clusters=1:3,
fitfun = testFitfun,
sampling_type = "MB", B = 13,
prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L),
"sampling_type MB is not yet supported (and isn't recommended anyway)",
fixed=TRUE)
# Error has quotation marks in it
testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo",
as.character(NA),
"bar"), clusters=1:3,
fitfun = testFitfun,
sampling_type = "S", B = 13,
prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L))
testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo", "bar",
as.character(NA)),
clusters=1:3, fitfun = testFitfun,
sampling_type = 2, B = 13,
prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L),
"is.character(sampling_type) is not TRUE",
fixed=TRUE)
# B
testthat::expect_warning(checkCssInputs(X=x, y=y, lambda=c("foo", "bar",
as.character(NA)),
clusters=1:3, fitfun = testFitfun,
sampling_type = "SS", B = 5,
prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L),
"Small values of B may lead to poor results.",
fixed=TRUE)
testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo", "bar",
as.character(NA)),
clusters=1:3, fitfun = testFitfun,
sampling_type = "SS", B = "foo",
prop_feats_remove = 0,
train_inds = integer(), num_cores = 1L),
"is.numeric(B) | is.integer(B) is not TRUE",
fixed=TRUE)
# prop_feats_remove
testthat::expect_true(is.list(checkCssInputs(X=x, y=y,
lambda=c("foo", "bar",
as.character(NA)),
clusters=1:3, fitfun=testFitfun,
sampling_type = "SS", B = 12,
prop_feats_remove = 0.3,
train_inds = integer(),
num_cores = 1L)))
# Use train_inds argument
testthat::expect_true(is.list(checkCssInputs(X=x, y=y,
lambda=c("foo", "bar",
as.character(NA)),
clusters=1:3, fitfun=testFitfun,
sampling_type = "SS", B = 12,
prop_feats_remove = 0.3,
train_inds = 11:15,
num_cores = 1L)))
})## Test passed with 31 successes.
Tests for checkCssLoopOutput():
testthat::test_that("checkCssLoopOutput works", {
testthat::expect_null(checkCssLoopOutput(selected=1:5, p=6,
feats_on_subsamp=1:6))
testthat::expect_error(checkCssLoopOutput(selected=1:5, p=4,
feats_on_subsamp=1:6),
"The provided feature selection method fitfun returned a vector of selected features longer than p on (at least) one subsample",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=1:5, p=7,
feats_on_subsamp=1:4),
"The provided feature selection method somehow selected features that were not provided for it to consider.",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=c(1, 2, 3, 4.4, 5), p=7,
feats_on_subsamp=1:7),
"The provided feature selection method fitfun failed to return a vector of valid (integer) indices on (at least) one subsample",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=rep(1, 3), p=7,
feats_on_subsamp=1:7),
"The provided feature selection method fitfun returned a vector of selected features containing repeated indices on (at least) one subsample",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=c(-1, 5), p=7,
feats_on_subsamp=1:7),
"The provided feature selection method fitfun returned a vector of selected features containing a non-positive index on (at least) one subsample",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=c(0, 5), p=7,
feats_on_subsamp=1:7),
"The provided feature selection method fitfun returned a vector of selected features containing a non-positive index on (at least) one subsample",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=as.integer(NA), p=7,
feats_on_subsamp=1:7),
"The provided feature selection method fitfun returned a vector containing NA values on (at least) one subsample",
fixed=TRUE)
testthat::expect_error(checkCssLoopOutput(selected=c("1", "2", "3"), p=7,
feats_on_subsamp=1:7),
"The provided feature selection method fitfun failed to return an integer or numeric vector on (at least) one subsample",
fixed=TRUE)
})## Test passed with 9 successes.
Tests for fitfunFailureMessage():
testthat::test_that("fitfunFailureMessage surfaces the cause and subsample index (#73)", {
# try-error path: the wrapped condition message is surfaced (this is what
# getSelMatrix actually receives from mclapply under forking).
te <- structure("Error\n", class = "try-error",
condition = simpleError("fitfun returned a character vector"))
msg <- fitfunFailureMessage(te, 4L)
testthat::expect_match(msg, "subsample 4", fixed = TRUE)
testthat::expect_match(msg, "fitfun returned a character vector", fixed = TRUE)
# defensive non-try-error fallback: the class is reported.
msg2 <- fitfunFailureMessage("not an integer vector", 2L)
testthat::expect_match(msg2, "class character", fixed = TRUE)
testthat::expect_match(msg2, "subsample 2", fixed = TRUE)
testthat::expect_match(msg2, "integer vector of selected feature indices",
fixed = TRUE)
})## Test passed with 5 successes.
Tests for checkCssLassoInputs():
testthat::test_that("checkCssLassoInputs works", {
set.seed(761)
x <- matrix(stats::rnorm(15*4), nrow=15, ncol=4)
y <- stats::rnorm(15)
testthat::expect_null(checkCssLassoInputs(X=x, y=y, lambda=0.01))
testthat::expect_error(checkCssLassoInputs(X=x, y=logical(15), lambda=0.05),
"For method cssLasso, y must be a numeric or integer vector.",
fixed=TRUE)
# Integer y is accepted (is.numeric(1L) is TRUE; the | !is.integer guard
# makes the validator read uniformly with the other entry points). Regression
# test for issue #13 -- this is NOT a behavior change, integer y always worked.
testthat::expect_null(checkCssLassoInputs(X=x, y=as.integer(round(y * 10)),
lambda=0.01))
testthat::expect_error(checkCssLassoInputs(X=x[1:13, ], y=y, lambda=0.01),
"For method cssLasso, y must be a vector of length equal to nrow(X).",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=rep(1.2, 15), lambda=0.05),
"Subsample with only one unique value of y detected--for method cssLasso, all subsamples of y of size floor(n/2) must have more than one unique value.",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=TRUE),
"For method cssLasso, lambda must be a numeric.",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=as.numeric(NA)),
"NA detected in provided lambda input to cssLasso",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=-0.01),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=x),
"For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=numeric()),
"For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
fixed=TRUE)
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=-0.01),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
# Bundled c(lambda=, alpha=) form (elastic net)
testthat::expect_null(checkCssLassoInputs(X=x, y=y,
lambda=c(lambda=0.1, alpha=0.5)))
# Order-insensitive (setequal on names)
testthat::expect_null(checkCssLassoInputs(X=x, y=y,
lambda=c(alpha=0.5, lambda=0.1)))
# alpha at the boundary 1 is allowed
testthat::expect_null(checkCssLassoInputs(X=x, y=y,
lambda=c(lambda=0.1, alpha=1)))
# Unnamed length-2 vector is rejected
testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=c(0.1, 0.5)),
"For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
fixed=TRUE)
# alpha = 0 (degenerate ridge) is rejected
testthat::expect_error(checkCssLassoInputs(X=x, y=y,
lambda=c(lambda=0.1, alpha=0)),
"For method cssLasso, the alpha component of lambda must be in (0, 1].",
fixed=TRUE)
# alpha > 1 is rejected
testthat::expect_error(checkCssLassoInputs(X=x, y=y,
lambda=c(lambda=0.1, alpha=1.2)),
"For method cssLasso, the alpha component of lambda must be in (0, 1].",
fixed=TRUE)
# Length-3 vector is rejected
testthat::expect_error(checkCssLassoInputs(X=x, y=y,
lambda=c(lambda=0.1, alpha=0.5,
extra=0.3)),
"For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
fixed=TRUE)
})## Test passed with 18 successes.
Tests for cssLasso() confirming that the elastic net mixing parameter alpha,
bundled into lambda as c(lambda=, alpha=), actually changes which features
are selected (and that the default pure-lasso path is unchanged). On a design
with a correlated block of relevant features, a smaller alpha (more ridge-like)
pulls in the correlated block partners that the pure lasso drops, so the
elastic-net selected set is a strict superset of the lasso selected set:
testthat::test_that("cssLasso alpha (bundled in lambda) drives selection", {
# Correlated-block design: features 1-4 are relevant and share a common
# latent factor at correlation rho; features 5-20 are irrelevant noise.
set.seed(1)
n <- 100
p <- 20
rho <- 0.9
block <- 4
Z <- matrix(stats::rnorm(n*p), n, p)
common <- stats::rnorm(n)
for(j in 1:block){
Z[, j] <- sqrt(rho)*common + sqrt(1 - rho)*Z[, j]
}
y <- as.numeric(Z %*% c(rep(1, block), rep(0, p - block)) + stats::rnorm(n))
L <- 3.72251
s_lasso <- cssLasso(Z, y, lambda=c(lambda=L, alpha=1))
s_enet <- cssLasso(Z, y, lambda=c(lambda=L, alpha=0.2))
# Pure lasso selects only the two strongest block members; the elastic net
# pulls in the full correlated block.
testthat::expect_identical(s_lasso, c(3L, 4L))
testthat::expect_identical(s_enet, c(1L, 2L, 3L, 4L))
# The elastic-net set is a strict superset of the lasso set (the round-2
# cosmetic: robust to L within +/-10%).
testthat::expect_true(all(s_lasso %in% s_enet))
testthat::expect_true(length(setdiff(s_enet, s_lasso)) > 0)
# Back-compatibility: a scalar lambda is byte-identical to the bundled form
# with alpha = 1 (the default pure-lasso path is unchanged).
testthat::expect_identical(cssLasso(Z, y, lambda=L),
cssLasso(Z, y, lambda=c(lambda=L, alpha=1)))
})## Test passed with 5 successes.
Tests for cssLoop() (had to define checkCssLassoInputs() first):
testthat::test_that("cssLoop works", {
set.seed(89134)
x <- matrix(stats::rnorm(9*8), nrow=9, ncol=8)
y <- stats::rnorm(9)
output <- cssLoop(input=1L:4L, x=x, y=y, lambda=0.05,
fitfun=cssLasso)
testthat::expect_true(is.integer(output))
testthat::expect_equal(length(output), length(unique(output)))
testthat::expect_true(length(output) <= 8)
testthat::expect_true(all(output >= 1))
testthat::expect_true(all(output <= 8))
testthat::expect_error(cssLoop(input=1L:6L, x=x, y=y, lambda=0.05,
fitfun=cssLasso),
"floor(n/2) == length(subsample) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssLoop(input=1L:4L, x=x, y=y[1:8],
lambda=0.05, fitfun=cssLasso),
"length(y) == n is not TRUE",
fixed=TRUE)
testthat::expect_error(cssLoop(input=1L:4L, x=x, y=logical(9),
lambda=0.05, fitfun=cssLasso),
"For method cssLasso, y must be a numeric or integer vector.",
fixed=TRUE)
testthat::expect_error(cssLoop(input=1L:4L, x=x, y=y,
lambda=x, fitfun=cssLasso),
"For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
fixed=TRUE)
# Test other input format
alt_input <- list("subsample"=2:5, "feats_to_keep"=c(FALSE, rep(TRUE, 4),
rep(FALSE, 2), TRUE))
output2 <- cssLoop(input=alt_input, x=x, y=y, lambda=0.08, fitfun=cssLasso)
testthat::expect_true(is.integer(output2))
testthat::expect_equal(length(output2), length(unique(output2)))
testthat::expect_true(length(output2) <= 8)
testthat::expect_true(all(output2 %in% c(2, 3, 4, 5, 8)))
testthat::expect_error(cssLoop(input= list("subsample"=2:5,
"feats_to_keep"=c(FALSE,
rep(TRUE, 4),
rep(FALSE, 2))),
x=x, y=y, lambda=0.08, fitfun=cssLasso),
"length(feats_to_keep) == p is not TRUE",
fixed=TRUE)
# Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
# shouldn't throw any error, because the acceptable input for lambda should be
# enforced only by fitfun) and nonsense y
testFitfun <- function(X, y, lambda){
p <- ncol(X)
stopifnot(p >= 2)
# Choose p/2 features randomly
selected <- sample.int(p, size=floor(p/2))
return(selected)
}
testthat::expect_true(is.integer(cssLoop(input=1L:4L, x=x, y=y,
lambda=TRUE, fitfun=testFitfun)))
testthat::expect_true(is.integer(cssLoop(input=1L:4L, x=x,
y=character(9), lambda=.05,
fitfun=testFitfun)))
})## Test passed with 16 successes.
Tests for checkGetClusterSelMatrixInput():
testthat::test_that("checkGetClusterSelMatrixInput works", {
good_clusters <- list(happy=1L:8L, sad=9L:10L, med=11L)
res <- matrix(sample(c(0, 1), size=6*11, replace=TRUE), nrow=6, ncol=11)
testthat::expect_null(checkGetClusterSelMatrixInput(good_clusters, res))
testthat::expect_error(checkGetClusterSelMatrixInput(list(happy=1L:8L,
med=11L), res),
"length(all_clustered_feats) == p is not TRUE",
fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(good_clusters, 1:9),
"is.matrix(res) is not TRUE", fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(good_clusters, res + .3),
"all(res %in% c(0, 1)) is not TRUE", fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(good_clusters,
res[, 1:9]),
"length(all_clustered_feats) == p is not TRUE",
fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(1L:10L, res),
"is.list(clusters) is not TRUE", fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
c2=6L:8L,
c3=9L,
c4=integer()), res),
"all(lengths(clusters) >= 1) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
c2=6L:8L, c3=9L,
c4=as.integer(NA)), res),
"all(!is.na(clusters)) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
c2=6L:8L, c3=9L,
c2=6L:8L), res),
"n_clusters == length(unique(clusters)) is not TRUE",
fixed=TRUE)
testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
c2=6L:8L, c3=14L), res),
"length(all_clustered_feats) == p is not TRUE",
fixed=TRUE)
})## Test passed with 10 successes.
Tests for createSubsamples():
testthat::test_that("createSubsamples returns properly-named elements with feature removal (#69)", {
set.seed(692)
res <- createSubsamples(n = 30, p = 8, B = 5, sampling_type = "SS",
prop_feats_remove = 0.3)
testthat::expect_equal(length(res), 2L * 5L) # SS -> 2B elements
# Every element is a named list(subsample, feats_to_keep) -- the invariant the
# (now-meaningful) output stopifnot guards.
testthat::expect_true(all(vapply(res,
function(s) identical(names(s), c("subsample", "feats_to_keep")),
logical(1))))
})
testthat::test_that("createSubsamples works", {
res <- createSubsamples(n=20L, p=5L, B=11L, sampling_type="SS",
prop_feats_remove=0)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 2*11)
testthat::expect_true(all(lengths(res) == 20/2))
testthat::expect_equal(length(unique(res[[13]])), 20/2)
set <- res[[4]]
comp_set <- res[[4 + 11]]
testthat::expect_equal(length(intersect(set, comp_set)), 0)
testthat::expect_equal(length(union(set, comp_set)), length(c(set, comp_set)))
testthat::expect_equal(20, length(c(set, comp_set)))
# Try odd n
res_odd <- createSubsamples(n=19L, p=23L, B=13L, sampling_type="SS",
prop_feats_remove=0)
testthat::expect_true(is.list(res_odd))
testthat::expect_equal(length(res_odd), 2*13)
testthat::expect_true(all(lengths(res_odd) == floor(19/2)))
testthat::expect_equal(length(unique(res_odd[[3]])), floor(19/2))
set_odd <- res_odd[[2]]
comp_set_odd <- res_odd[[2 + 13]]
testthat::expect_equal(length(intersect(set_odd, comp_set_odd)), 0)
testthat::expect_equal(length(union(set_odd, comp_set_odd)),
length(c(set_odd, comp_set_odd)))
testthat::expect_equal(19 - 1, length(c(set_odd, comp_set_odd)))
testthat::expect_error(createSubsamples(n=20L, p=5L, B=11L, sampling_type="MB",
prop_feats_remove=0),
"sampling_type MB is not yet supported (and isn't recommended anyway)",
fixed=TRUE)
# misspecified sampling_type (not specifying error because contains quotation
# marks)
testthat::expect_error(createSubsamples(n=20L, p=5L, B=11L, sampling_type="S",
prop_feats_remove=0))
testthat::expect_error(createSubsamples(n=11.1, p=5L, B=11L, sampling_type="SS",
prop_feats_remove=0),
"n == round(n) is not TRUE",
fixed=TRUE)
testthat::expect_error(createSubsamples(n=-20L, p=5L, B=11L, sampling_type="SS",
prop_feats_remove=0),
"n > 0 is not TRUE",
fixed=TRUE)
testthat::expect_error(createSubsamples(n=20L, p=5L, B=25.6, sampling_type="SS",
prop_feats_remove=0),
"length(subsamples) == B is not TRUE",
fixed=TRUE)
})## Test passed with 2 successes.
## Test passed with 19 successes.
Tests for getSubsamps():
testthat::test_that("getSubsamps works", {
res <- getSubsamps(n=18L, B=21L, sampling_type="SS")
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 2*21)
testthat::expect_true(all(lengths(res) == 18/2))
testthat::expect_equal(length(unique(res[[7]])), 18/2)
set <- res[[3]]
comp_set <- res[[3 + 21]]
testthat::expect_equal(length(intersect(set, comp_set)), 0)
testthat::expect_equal(length(union(set, comp_set)), length(c(set, comp_set)))
testthat::expect_equal(18, length(c(set, comp_set)))
})## Test passed with 7 successes.
Tests for getSelMatrix():
testthat::test_that("getSelMatrix works", {
set.seed(98623)
x <- matrix(stats::rnorm(25*6), nrow=25, ncol=6)
y <- stats::rnorm(25)
subsamps_object <- createSubsamples(n=25, p=6, B=12, sampling_type="SS",
prop_feats_remove=0)
res <- getSelMatrix(x=x, y=y, lambda=0.01, B=12, sampling_type="SS",
subsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso)
testthat::expect_true(is.matrix(res))
testthat::expect_equal(nrow(res), 2*12)
testthat::expect_equal(ncol(res), 6)
testthat::expect_true(all(res %in% c(0, 1)))
testthat::expect_true(all(is.integer(res)))
# Try a different fitfun
testFitfun <- function(X, y, lambda){
p <- ncol(X)
stopifnot(p >= 2)
# Choose p/2 features randomly
selected <- sample.int(p, size=floor(p/2))
return(selected)
}
# Note that value of lambda doesn't matter
res2 <- getSelMatrix(x=x, y=y, lambda="foo", B=12, sampling_type="SS",
subsamps_object=subsamps_object, num_cores=1,
fitfun=testFitfun)
testthat::expect_true(is.matrix(res2))
testthat::expect_equal(nrow(res2), 2*12)
testthat::expect_equal(ncol(res2), 6)
testthat::expect_true(all(res2 %in% c(0, 1)))
testthat::expect_true(all(is.integer(res2)))
testthat::expect_error(getSelMatrix(x=x, y=y, lambda="0.02", B=12, sampling_type="SS",
subsamps_object="subsamps_object", num_cores=1,
fitfun=testFitfun),
"is.integer(subsample) is not TRUE",
fixed=TRUE)
testthat::expect_error(getSelMatrix(x=x[1:8, ], y=y, lambda="foo", B=12, sampling_type="SS",
subsamps_object=subsamps_object, num_cores=1,
fitfun=testFitfun),
"length(y) == n is not TRUE",
fixed=TRUE)
testthat::expect_error(getSelMatrix(x=x, y=y, lambda=-0.02, B=12, sampling_type="SS",
subsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
# Wrong B
testthat::expect_error(getSelMatrix(x=x, y=y, lambda=0.02, B=37, sampling_type="SS",
subsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso),
"length(res_list) == nrow(res) is not TRUE",
fixed=TRUE)
})## Test passed with 14 successes.
A killed parallel worker makes parallel::mclapply() return NULL for that
subsample. That NULL is distinct from a legitimate integer(0) (“selected
nothing”) result, so getSelMatrix() must error rather than silently record a
zero row (which would bias the selection proportions) (#104, L8):
testthat::test_that("getSelMatrix detects a killed parallel worker (#104)", {
set.seed(98623)
x <- matrix(stats::rnorm(25*6), nrow=25, ncol=6)
y <- stats::rnorm(25)
subsamps_object <- createSubsamples(n=25, p=6, B=12, sampling_type="SS",
prop_feats_remove=0)
# Simulate a worker that was killed: mclapply returns a list containing NULL.
testthat::local_mocked_bindings(
mclapply = function(...) list(NULL),
.package = "parallel")
testthat::expect_error(
getSelMatrix(x=x, y=y, lambda=0.01, B=12, sampling_type="SS",
subsamps_object=subsamps_object, num_cores=1,
fitfun=cssLasso),
"parallel worker failed")
})## Test passed with 1 success.
Tests for cssLasso():
testthat::test_that("cssLasso works", {
set.seed(24509)
x <- matrix(stats::rnorm(15*4), nrow=15, ncol=4)
y <- stats::rnorm(15)
res <- cssLasso(X=x, y=y, lambda=0.01)
testthat::expect_true(is.integer(res))
testthat::expect_true(length(res) <= 4)
testthat::expect_true(length(res) >= 0)
testthat::expect_true(length(res) == length(unique(res)))
testthat::expect_true(all(res <= 4))
testthat::expect_true(all(res >= 1))
testthat::expect_error(cssLasso(X=x[1:13, ], y=y, lambda=0.01),
"For method cssLasso, y must be a vector of length equal to nrow(X).",
fixed=TRUE)
testthat::expect_error(cssLasso(X=x, y=y, lambda=-0.01),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
})## Test passed with 8 successes.
Tests for getClusterSelMatrix():
testthat::test_that("getClusterSelMatrix works", {
good_clusters <- list(red_cluster=1L:5L,
green_cluster=6L:8L, blue_clust=9L)
B <- 14
p <- 9
res_entries <- as.integer(sample(c(0, 1), size=2*B*p, replace=TRUE))
good_res <- matrix(res_entries, nrow=2*B, ncol=p)
res <- getClusterSelMatrix(good_clusters, good_res)
testthat::expect_true(is.matrix(res))
testthat::expect_equal(nrow(res), 2*B)
# 3 clusters
testthat::expect_equal(ncol(res), 3)
testthat::expect_identical(colnames(res), c("red_cluster", "green_cluster",
"blue_clust"))
testthat::expect_true(all(is.integer(res)))
testthat::expect_true(all(res %in% c(0, 1)))
# (#105, L12) Value-pin EVERY cluster column (not just cluster 2): each column
# must equal the "at least one member of that cluster was selected" indicator.
# This pins the full feat2clus mapping/ordering that the #57 rowsum refactor
# relies on, including the singleton cluster (drop=FALSE keeps it a matrix).
for(j in seq_along(good_clusters)){
testthat::expect_equal(res[, j],
as.integer(rowSums(good_res[, good_clusters[[j]], drop=FALSE]) > 0))
}
# Single-cluster case (#57): t(clust_counts > 0L) collapses to one column
res1 <- getClusterSelMatrix(list(only = 1L:9L), good_res)
testthat::expect_equal(ncol(res1), 1)
testthat::expect_true(all(is.integer(res1)))
# Not all features in a cluster
bad_clusters <- list(red_cluster=1L:5L, green_cluster=6L:7L, blue_clust=9L)
testthat::expect_error(getClusterSelMatrix(bad_clusters, good_res),
"length(all_clustered_feats) == p is not TRUE",
fixed=TRUE)
bad_res_entries <- as.integer(sample(c(0, 1, 2), size=2*B*p, replace=TRUE))
bad_res <- matrix(bad_res_entries, nrow=2*B, ncol=p)
testthat::expect_error(getClusterSelMatrix(good_clusters, bad_res),
"all(res %in% c(0, 1)) is not TRUE", fixed=TRUE)
})## Test passed with 13 successes.
Finally, tests for css() itself!
testthat::test_that("css works", {
set.seed(8712)
x <- matrix(stats::rnorm(15*11), nrow=15, ncol=11)
y <- stats::rnorm(15)
# Intentionally don't provide clusters for all feature, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1L:5L,
green_cluster=6L:8L,
c4=10:11)
res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, fitfun = cssLasso,
sampling_type = "SS", B = 13,
prop_feats_remove = 0, train_inds = integer(), num_cores = 1L)
# Basic output
testthat::expect_true(is.list(res))
testthat::expect_identical(class(res), "cssr")
testthat::expect_identical(names(res), c("feat_sel_mat", "clus_sel_mat", "X",
"y", "clusters", "train_inds"))
# feat_sel mat
testthat::expect_true(is.integer(res$feat_sel_mat))
testthat::expect_true(is.matrix(res$feat_sel_mat))
testthat::expect_true(all(res$feat_sel_mat %in% c(0, 1)))
testthat::expect_equal(ncol(res$feat_sel_mat), 11)
testthat::expect_null(colnames(res$feat_sel_mat))
# clus_sel_mat
testthat::expect_true(is.integer(res$clus_sel_mat))
testthat::expect_true(is.matrix(res$clus_sel_mat))
testthat::expect_true(all(res$clus_sel_mat %in% c(0, 1)))
# 4 clusters
testthat::expect_equal(ncol(res$clus_sel_mat), 4)
testthat::expect_identical(colnames(res$clus_sel_mat), names(res$clusters))
testthat::expect_equal(length(colnames(res$clus_sel_mat)), 4)
testthat::expect_equal(length(unique(colnames(res$clus_sel_mat))), 4)
testthat::expect_true(all(!is.na(colnames(res$clus_sel_mat))))
testthat::expect_true(all(!is.null(colnames(res$clus_sel_mat))))
# X
testthat::expect_true(is.matrix(res$X))
testthat::expect_true(all(!is.na(res$X)))
testthat::expect_true(is.numeric(res$X))
testthat::expect_equal(ncol(res$X), 11)
testthat::expect_equal(nrow(res$X), 15)
# y
testthat::expect_true(is.numeric(res$y))
testthat::expect_equal(length(res$y), 15)
# clusters
testthat::expect_true(is.list(res$clusters))
testthat::expect_equal(length(res$clusters), length(names(res$clusters)))
testthat::expect_equal(length(res$clusters),
length(unique(names(res$clusters))))
testthat::expect_true(all(!is.na(names(res$clusters))))
testthat::expect_true(all(!is.null(names(res$clusters))))
clust_feats <- integer()
for(i in 1:length(res$clusters)){
clust_feats <- c(clust_feats, res$clusters[[i]])
}
testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
1:11)))
# train_inds
testthat::expect_identical(res$train_inds, integer())
## Trying other inputs
# X as a data.frame
X_df <- datasets::mtcars
res_fitfun <- css(X=X_df, y=stats::rnorm(nrow(X_df)), lambda=0.01, B = 10)
testthat::expect_identical(class(res_fitfun), "cssr")
# X as a dataframe with factors (number of columns of final design matrix
# after one-hot encoding factors won't match number of columns of df2)
# cyl, gear, and carb are factors with more than 2 levels
df2 <- X_df
df2$cyl <- as.factor(df2$cyl)
df2$vs <- as.factor(df2$vs)
df2$am <- as.factor(df2$am)
df2$gear <- as.factor(df2$gear)
df2$carb <- as.factor(df2$carb)
res_fitfun <- css(X=df2, y=stats::rnorm(nrow(X_df)), lambda=0.01, B = 10)
testthat::expect_identical(class(res_fitfun), "cssr")
# Should get error if I try to use clusters in this data.frame that contains
# factors with more than two levels
testthat::expect_error(css(X=df2, y=stats::rnorm(nrow(X_df)), lambda=0.01,
B = 10, clusters=1:3), "When stats::model.matrix converted the provided data.frame X to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert X to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.",
fixed=TRUE)
# X as a matrix with column names
x2 <- x
colnames(x2) <- LETTERS[1:11]
res_names <- css(X=x2, y=y, lambda=0.01, clusters=good_clusters, B = 13)
testthat::expect_identical(class(res_names), "cssr")
testthat::expect_identical(colnames(x2), colnames(res_names$X))
testthat::expect_identical(colnames(x2), colnames(res_names$feat_sel_mat))
# Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
# shouldn't throw any error, because the acceptable input for lambda should be
# enforced only by fitfun)
testFitfun <- function(X, y, lambda){
p <- ncol(X)
stopifnot(p >= 2)
# Choose p/2 features randomly
selected <- sample.int(p, size=floor(p/2))
return(selected)
}
res_fitfun <- css(X=x, y=y, lambda=c("foo", as.character(NA), "bar"),
clusters=1:3, B = 10, fitfun=testFitfun)
testthat::expect_identical(class(res_fitfun), "cssr")
# Bad lambda
testthat::expect_error(css(X=x, y=y, lambda=-0.01, B = 10),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
testthat::expect_error(css(X=x, y=y, lambda="foo", B = 10),
"For method cssLasso, lambda must be a numeric.",
fixed=TRUE)
# Single cluster
res_sing_clust <- css(X=x, y=y, lambda=0.01, clusters=1:3, B = 10)
testthat::expect_identical(class(res_sing_clust), "cssr")
testthat::expect_equal(length(res_sing_clust$clusters), 11 - 3 + 1)
testthat::expect_true(length(unique(names(res_sing_clust$clusters))) == 11 -
3 + 1)
testthat::expect_true(all(!is.na(names(res_sing_clust$clusters))))
testthat::expect_true(all(!is.null(names(res_sing_clust$clusters))))
# No cluster
testthat::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10)), "cssr")
# All clusters named
testthat::expect_identical(class(css(X=x, y=y, clusters=list("a"=1:5,
"b"=6:10,
"c"=11),
lambda=0.01, B=10)), "cssr")
# Other sampling types
testthat::expect_error(css(X=x, y=y, lambda=1, sampling_type="MB"),
"sampling_type MB is not yet supported (and isn't recommended anyway)",
fixed=TRUE)
# Error has quotation marks in it
testthat::expect_error(css(X=x, y=y, lambda=1, sampling_type="S"))
testthat::expect_error(css(X=x, y=y, lambda=1, sampling_type=1),
"is.character(sampling_type) is not TRUE",
fixed=TRUE)
# B
testthat::expect_warning(css(X=x, y=y, lambda=1, B=5),
"Small values of B may lead to poor results.",
fixed=TRUE)
testthat::expect_error(css(X=x, y=y, lambda=1, B=list(10)),
"is.numeric(B) | is.integer(B) is not TRUE",
fixed=TRUE)
# Clusters
testthat::expect_error(css(X=x, y=y, lambda=1, clusters="red"),
"is.numeric(clusters) | is.integer(clusters) is not TRUE",
fixed=TRUE)
# prop_feats_remove
testthat::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10,
prop_feats_remove=0.3)), "cssr")
# Weirdly high, but still valid, value of prop_feats_remove
testthat::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10,
prop_feats_remove=0.9999999999)), "cssr")
# Use train_inds argument
res_train <- css(X=x, y=y, lambda=0.01, B = 10, train_inds=11:15)
testthat::expect_equal(res_train$train_inds, 11:15)
})
testthat::test_that("css errors on an X containing NA (#74)", {
set.seed(8642)
X <- matrix(stats::rnorm(20 * 4), nrow = 20, ncol = 4)
X[3, 2] <- NA
y <- stats::rnorm(20)
testthat::expect_error(
css(X = X, y = y, lambda = 0.01),
"must not contain missing",
fixed = TRUE)
})## Test passed with 57 successes.
## Test passed with 1 success.
We also confirm the parallel feature-selection loop’s RNG is isolated per subsample (issue #12): cssLoop seeds the RNG immediately before fitfun, so a stochastic fitfun is reproducible and identical whether run serially or in parallel, given a seed set before css(). (The parallel checks require real forking, so they are guarded and skipped where forking is unavailable – Windows, CRAN, or a fully-loaded machine.)
testthat::test_that("cssLoop seeds fitfun reproducibly; css parallel RNG is isolated (#12)", {
set.seed(99)
x_rng <- matrix(stats::rnorm(40 * 8), nrow=40, ncol=8)
y_rng <- stats::rnorm(40)
# A stochastic fitfun: randomly selects 2 features (uses the RNG), so its
# output depends on the seed in effect when it runs.
rfit <- function(X, y, lambda){ sort(sample.int(ncol(X), 2)) }
sub <- sort(sample.int(40L, 20L))
# cssLoop's seed argument makes a stochastic fitfun reproducible: the same
# seed gives the same selection. (On main, cssLoop has no seed argument, so
# this behavior does not exist there.) This needs no parallelism.
r1 <- cssLoop(input=sub, x=x_rng, y=y_rng, lambda=0.1, fitfun=rfit, seed=123L)
r2 <- cssLoop(input=sub, x=x_rng, y=y_rng, lambda=0.1, fitfun=rfit, seed=123L)
testthat::expect_identical(r1, r2)
# ...and the seed genuinely drives the (stochastic) selection -- different
# seeds give more than one distinct result (so the check above is not vacuous).
outs <- lapply(1:8, function(s) cssLoop(input=sub, x=x_rng, y=y_rng,
lambda=0.1, fitfun=rfit, seed=s))
testthat::expect_true(length(unique(outs)) > 1)
# End-to-end, serial css is reproducible across re-runs.
set.seed(1)
d1 <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=1L)$feat_sel_mat
set.seed(1)
d2 <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=1L)$feat_sel_mat
testthat::expect_identical(d1, d2)
# Parallel checks need real forking; run the parallel css calls defensively
# and skip (rather than fail) when forking is unavailable -- Windows, CRAN, or
# a fully-loaded machine. A genuine reproducibility regression would still
# surface as a failed expectation below, not a skip.
par_runs <- tryCatch({
set.seed(1)
a <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=2L)$feat_sel_mat
set.seed(1)
b <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=2L)$feat_sel_mat
list(a=a, b=b)
}, error=function(e) NULL)
if(is.null(par_runs)){
testthat::skip("parallel forking unavailable in this environment")
}
testthat::expect_identical(par_runs$a, par_runs$b) # parallel reproducible
testthat::expect_identical(par_runs$a, d1) # serial == parallel
})## Test passed with 5 successes.