8 Tests for the core `css()` functions

Now that all of the package’s functions have been defined, we collect their tests here, grouped to mirror the definition chapters above. Keeping the tests together at the end lets those chapters read as continuous narrative; because every function is defined before this part, each test runs only after all of its dependencies have been defined.

Tests for coerceDataFrameToMatrix():

testthat::test_that("coerceDataFrameToMatrix keeps a single-column data.frame as a matrix (#43)", {
  # A 1-column data.frame must coerce to a 1-column matrix, not collapse to a
  # vector (which used to crash with the cryptic "argument is of length zero",
  # both with and without clusters).
  num_df <- data.frame(a = as.numeric(1:8))
  res_clust <- coerceDataFrameToMatrix(num_df, clusters = list(1))
  testthat::expect_true(is.matrix(res_clust))
  testthat::expect_equal(dim(res_clust), c(8L, 1L))
  res_noclust <- coerceDataFrameToMatrix(num_df, clusters = list())
  testthat::expect_true(is.matrix(res_noclust))
  testthat::expect_equal(dim(res_noclust), c(8L, 1L))

  # A matrix passes through unchanged.
  m <- matrix(as.numeric(1:8), nrow = 8, ncol = 1)
  testthat::expect_identical(coerceDataFrameToMatrix(m, clusters = list()), m)

  # The factor-expansion guard still fires (a >= 3-level factor changes ncol).
  fac_df <- data.frame(a = factor(c("x", "y", "z", "x", "y", "z", "x", "y")))
  testthat::expect_error(coerceDataFrameToMatrix(fac_df, clusters = list(1)),
    "the number of columns changed", fixed = TRUE)

  # A non-numeric (character) matrix is rejected (#152).
  testthat::expect_error(
    coerceDataFrameToMatrix(matrix(letters[1:6], nrow = 3, ncol = 2), clusters = list()),
    "must be a numeric matrix", fixed = TRUE)
})

## Test passed with 7 successes.

Tests that a non-numeric matrix X is rejected at every X entry point (#152). All seven entry points funnel through coerceDataFrameToMatrix(), so a character matrix errors with the same clear message everywhere instead of silently producing all-zero selections (css) or crashing later (protolasso):

testthat::test_that("non-numeric matrix X is rejected at every entry point (#152)", {
    set.seed(1)
    cx <- matrix(sample(letters, 20, TRUE), 10, 2)   # character matrix
    testthat::expect_error(
        css(X = cx, y = stats::rnorm(10), lambda = 0.01, B = 5),
        "must be a numeric matrix", fixed = TRUE)
    testthat::expect_error(
        protolasso(matrix(sample(letters, 40, TRUE), 20, 2), stats::rnorm(20)),
        "must be a numeric matrix", fixed = TRUE)
    testthat::expect_error(
        clusterRepLasso(matrix(sample(letters, 40, TRUE), 20, 2), stats::rnorm(20)),
        "must be a numeric matrix", fixed = TRUE)
    # getCssPreds(testX = <character matrix>): build a valid css result on
    # numeric data, then pass a character testX of the SAME ncol so it
    # reaches coerceDataFrameToMatrix (not an ncol-mismatch check first).
    nx <- matrix(stats::rnorm(60), 30, 2)
    res <- css(X = nx[1:20, ], y = stats::rnorm(20), lambda = 0.01, B = 5)
    testthat::expect_error(
        getCssPreds(res, testX = matrix(sample(letters, 20, TRUE), 10, 2),
            trainX = nx[21:30, ], trainY = stats::rnorm(10)),
        "must be a numeric matrix", fixed = TRUE)
})

## -- Warning: non-numeric matrix X is rejected at every entry point (#152) -------
## Small values of B may lead to poor results.
## Backtrace:
##     x
##  1. \-litr (local) css(X = nx[1:20, ], y = stats::rnorm(20), lambda = 0.01, B = 5)
##  2.   \-litr (local) checkCssInputs(...)
##  3.     \-litr (local) checkB(B)
## Test passed with 4 successes.

Tests for checkNoNAs():

testthat::test_that("checkNoNAs flags NA/NaN/Inf in a matrix or data.frame and passes clean input", {
  m_ok <- matrix(as.numeric(1:8), nrow = 4, ncol = 2)
  testthat::expect_identical(checkNoNAs(m_ok, "X"), m_ok)   # returns input invisibly
  m_na <- m_ok; m_na[2, 1] <- NA
  testthat::expect_error(checkNoNAs(m_na, "X"),
    "must not contain missing", fixed = TRUE)
  # data.frame with a numeric NA is caught too (before any coercion).
  df_na <- data.frame(a = c(1, NA, 3), b = c(4, 5, 6))
  testthat::expect_error(checkNoNAs(df_na, "X"),
    "must not contain missing", fixed = TRUE)
  # arg_name appears in the message.
  testthat::expect_error(checkNoNAs(m_na, "newx"), "newx", fixed = TRUE)

  # Non-finite values are rejected too (#99): is.na(Inf) is FALSE, so the
  # previous bare is.na() check let Inf/-Inf slip through into glmnet and the
  # cluster-representative averaging, silently corrupting results.
  m_inf <- m_ok; m_inf[2, 1] <- Inf
  testthat::expect_error(checkNoNAs(m_inf, "X"), "non-finite", fixed = TRUE)
  m_neginf <- m_ok; m_neginf[3, 2] <- -Inf
  testthat::expect_error(checkNoNAs(m_neginf, "X"), "non-finite", fixed = TRUE)
  # NaN is caught by is.na() (is.na(NaN) is TRUE).
  m_nan <- m_ok; m_nan[1, 1] <- NaN
  testthat::expect_error(checkNoNAs(m_nan, "X"),
    "must not contain missing", fixed = TRUE)
  # data.frame with Inf in a numeric column is rejected too.
  df_inf <- data.frame(a = c(1, Inf, 3), b = c(4, 5, 6))
  testthat::expect_error(checkNoNAs(df_inf, "X"), "non-finite", fixed = TRUE)

  # A finite data.frame passes through unchanged.
  df_ok <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6))
  testthat::expect_identical(checkNoNAs(df_ok, "X"), df_ok)
  # A data.frame with non-numeric (factor/character) columns plus clean numeric
  # columns still passes: the is.infinite() check is per-numeric-column, so it
  # must not false-positive on non-numeric columns.
  df_mixed <- data.frame(
    f = factor(c("x", "y", "z")),
    s = c("a", "b", "c"),
    n = c(1, 2, 3),
    stringsAsFactors = FALSE)
  testthat::expect_identical(checkNoNAs(df_mixed, "X"), df_mixed)
})

## Test passed with 10 successes.

Tests for checkFiniteY():

testthat::test_that("checkFiniteY accepts finite numeric/integer y and rejects non-finite or non-numeric y (#100)", {
  # Finite numeric and integer y pass, returning the input invisibly.
  y_num <- as.numeric(1:8) * 0.5
  testthat::expect_identical(checkFiniteY(y_num, "y"), y_num)
  y_int <- 1L:8L
  testthat::expect_identical(checkFiniteY(y_int, "y"), y_int)
  testthat::expect_invisible(checkFiniteY(y_num, "y"))

  # NA, NaN, Inf, -Inf each error with the non-finite message.
  testthat::expect_error(checkFiniteY(c(1, NA, 3), "y"),
    "must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
  testthat::expect_error(checkFiniteY(c(1, NaN, 3), "y"),
    "must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
  testthat::expect_error(checkFiniteY(c(1, Inf, 3), "y"),
    "must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)
  testthat::expect_error(checkFiniteY(c(1, -Inf, 3), "y"),
    "must not contain missing (NA) or non-finite (Inf) values", fixed = TRUE)

  # A non-numeric (logical/character) y errors with the numeric message.
  testthat::expect_error(checkFiniteY(c(TRUE, FALSE, TRUE), "y"),
    "must be a numeric (real-valued) vector", fixed = TRUE)
  testthat::expect_error(checkFiniteY(c("a", "b"), "y"),
    "must be a numeric (real-valued) vector", fixed = TRUE)

  # arg_name is interpolated into both messages.
  testthat::expect_error(checkFiniteY(c(1, Inf), "y_train_selec"),
    "The provided y_train_selec must not contain missing", fixed = TRUE)
  testthat::expect_error(checkFiniteY(c(TRUE, FALSE), "y_train_selec"),
    "The provided y_train_selec must be a numeric", fixed = TRUE)
})

## Test passed with 11 successes.

tests for checkCssClustersInput:

testthat::test_that("checkCssClustersInput works", {
  
  # Intentionally don't provide clusters for all feature, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1L:4L, green_cluster=5L:8L)
  
  res <- checkCssClustersInput(good_clusters)
  
  # clusters
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), length(names(res)))
  testthat::expect_equal(length(res), length(unique(names(res))))
  testthat::expect_true(all(!is.na(names(res))))
  testthat::expect_true(all(!is.null(names(res))))
  clust_feats <- integer()
  for(i in 1:length(res)){
    clust_feats <- c(clust_feats, res[[i]])
  }
  testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
  testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
                                                               1:8)))

  ## Trying other inputs
  
  unnamed_clusters <- list(1L:3L, 5L:8L)
  
  res <- checkCssClustersInput(unnamed_clusters)
  
  # clusters
  testthat::expect_true(is.list(res))
  clust_feats <- integer()
  for(i in 1:length(res)){
    clust_feats <- c(clust_feats, res[[i]])
  }
  testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
  testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
                                                               1:8)))
  
  testthat::expect_error(checkCssClustersInput(list(1:4, 4:6)),
                         "Overlapping clusters detected; clusters must be non-overlapping. Overlapping clusters: 1, 2.",
                         fixed=TRUE)
  
  # Duplicate clusters are now silently removed (#156): the result is identical
  # to passing the cluster only once.
  testthat::expect_identical(checkCssClustersInput(list(2:3, 2:3)), list(2:3))
  
  testthat::expect_error(checkCssClustersInput(list(2:3, as.integer(NA))),
                         "!is.na(clusters) are not all TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssClustersInput(list(2:3, c(4, 4, 5))),
                         "length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssClustersInput(list(2:3, -1)),
                         "all(clusters[[i]] >= 1) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssClustersInput(c(0.4, 0.6)),
                         "all(clusters == round(clusters)) is not TRUE",
                         fixed=TRUE)
  
  # Single cluster
  res_sing_clust <- checkCssClustersInput(2:5)
  testthat::expect_equal(length(res_sing_clust), 4)


})

## Test passed with 17 successes.

Tests for checkY():

testthat::test_that("checkY works", {
  testthat::expect_null(checkY(as.numeric(1:20)*.1, 20))
  testthat::expect_null(checkY(1L:15L, 15))
  testthat::expect_error(checkY(1:7, 8), "n == length(y) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkY(1:7, -7), "n > 0 is not TRUE", fixed=TRUE)
  testthat::expect_error(checkY(rep(as.numeric(NA), 13), 13),
                         "all(is.finite(y)) is not TRUE", fixed=TRUE)
  # Inf is now rejected too (previously all(!is.na(y)) missed it) (#100)
  testthat::expect_error(checkY(c(1, Inf, 3), 3),
                         "all(is.finite(y)) is not TRUE", fixed=TRUE)
  testthat::expect_error(checkY(rep(5.2, 9), 9),
                         "length(unique(y)) > 1 is not TRUE", fixed=TRUE)
  testthat::expect_error(checkY(c(TRUE, FALSE, TRUE), 3),
                         "is.numeric(y) | is.integer(y) is not TRUE",
                         fixed=TRUE)
})

## Test passed with 8 successes.

Tests for checkFormatClustersInput():

testthat::test_that("checkFormatClustersInput works", {
  
  # Intentionally don't provide clusters for all feature, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1L:4L, green_cluster=5L:8L)
  
  res <- checkFormatClustersInput(good_clusters, p=10,
                                  clust_names=c("red_cluster", "green_cluster"),
                                  get_prototypes=FALSE, x=NA, y=NA, R=NA)
  
  testthat::expect_true(is.list(res))
  clust_feats <- integer()
  for(i in 1:length(res)){
    clust_feats <- c(clust_feats, res[[i]])
  }
  testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
  testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
                                                               1:8)))

  ## Trying other inputs
  
  unnamed_clusters <- list(1L:3L, 5L:8L)

  res <- checkFormatClustersInput(unnamed_clusters, p=10, clust_names=NA,
                                  get_prototypes=FALSE, x=NA, y=NA, R=NA)

  # clusters
  testthat::expect_true(is.list(res))
  clust_feats <- integer()
  for(i in 1:length(res)){
    clust_feats <- c(clust_feats, res[[i]])
  }
  testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
  testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
                                                               1:8)))

  testthat::expect_error(checkFormatClustersInput(list(1:4, 4:6), p=10,
                                                  clust_names=NA,
                                                  get_prototypes=FALSE, x=NA,
                                                  y=NA, R=NA),
                         "length(intersect(clusters[[i]], clusters[[j]])) == 0 is not TRUE",
                         fixed=TRUE)
  # Duplicate clusters are now silently removed (#156).
  testthat::expect_identical(checkFormatClustersInput(list(2:3, 2:3), p=10,
                                  clust_names=NA, get_prototypes=FALSE, x=NA,
                                  y=NA, R=NA),
                             list(2:3))
  
  testthat::expect_error(checkFormatClustersInput(list(2:3, as.integer(NA)),
                                                  p=10,
                                                  clust_names=NA,
                                                  get_prototypes=FALSE, x=NA,
                                                  y=NA, R=NA),
                         "Must specify one of clusters or R (or does one of these provided inputs contain NA?)",
                         fixed=TRUE)
  
  testthat::expect_error(checkFormatClustersInput(list(2:3, c(4, 4, 5)),
                                                  p=10,
                                                  clust_names=NA,
                                                  get_prototypes=FALSE, x=NA,
                                                  y=NA, R=NA),
                         "length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
                         fixed=TRUE)
  
   testthat::expect_error(checkFormatClustersInput(list(1:4, -1),
                                                  p=10,
                                                  clust_names=NA,
                                                  get_prototypes=FALSE, x=NA,
                                                  y=NA, R=NA),
                         "all(clusters[[i]] >= 1) is not TRUE",
                         fixed=TRUE)
   
   testthat::expect_error(checkFormatClustersInput(list(1:4, c(2.3, 1.2)),
                                                  p=10,
                                                  clust_names=NA,
                                                  get_prototypes=FALSE, x=NA,
                                                  y=NA, R=NA),
                         "is.integer(clusters[[i]]) is not TRUE",
                         fixed=TRUE)

  # Single cluster
   testthat::expect_true(is.list(checkFormatClustersInput(c(1:5), p=10,
                                                          clust_names=NA,
                                                          get_prototypes=FALSE,
                                                          x=NA, y=NA, R=NA)))
})

## Test passed with 13 successes.

Tests for checkClusters():

testthat::test_that("checkClusters works", {
  good_clusters <- list(c1=1L:5L, c2=6L:8L, c3=9L)
  
  testthat::expect_null(checkClusters(good_clusters, 9))
  testthat::expect_error(checkClusters(good_clusters, 10),
                         "length(all_clustered_feats) == p is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkClusters(1L:10L, 10),
                         "is.list(clusters) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=9L,
                                            c4=integer()), 9),
                         "all(lengths(clusters) >= 1) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=9L,
                                            c4=as.integer(NA)), 9),
                         "all(!is.na(clusters)) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=9L,
                                            c2=6L:8L), 9),
                         "n_clusters == length(unique(clusters)) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkClusters(list(c1=1L:5L, c2=6L:8L, c3=10L), 9),
                         "all(all_clustered_feats <= p) is not TRUE",
                         fixed=TRUE)
})

## Test passed with 7 successes.

Tests for identifyPrototype():

testthat::test_that("identifyPrototype works", {
  testthat::expect_identical(identifyPrototype(10L, "a", 5), 10L)
  n <- 10
  p <- 5
  
  set.seed(9834)
  
  X <- matrix(stats::rnorm(n*p), nrow=n, ncol=p)
  y <- X[, p]
  testthat::expect_equal(identifyPrototype(as.integer(p), X, y), p)
  testthat::expect_equal(identifyPrototype(2L, X, y), 2)
  testthat::expect_equal(identifyPrototype(as.integer(2:p), X, y), p)
  testthat::expect_error(identifyPrototype(as.integer(2:p), y, X),
                         "incorrect number of dimensions",
                         fixed=TRUE)
  
  y2 <- rnorm(n)

  res <- identifyPrototype(c(2L, 3L), X, y2)

  testthat::expect_true(is.integer(res))

  testthat::expect_equal(length(res), 1)

  testthat::expect_true(res %in% c(2L, 3L))

})

testthat::test_that("identifyPrototype handles a constant cluster member silently (#59)", {
  # A constant column has undefined correlation; the vectorized cor() path
  # returns 0 for it -- and SILENTLY (base cor() warns "the standard deviation
  # is zero" on a constant column; identifyPrototype now suppresses that to
  # preserve the silent-0 contract).
  n <- 12
  set.seed(7321)
  varying <- stats::rnorm(n)
  # Column 1 is constant; column 2 equals y (perfectly correlated).
  x <- cbind(rep(1.5, n), varying)
  y <- varying
  # The prototype must be the varying member (2), never the constant column (1).
  testthat::expect_identical(identifyPrototype(c(1L, 2L), x, y), 2L)
  # And no "standard deviation is zero" warning may leak.
  testthat::expect_silent(identifyPrototype(c(1L, 2L), x, y))
})

testthat::test_that("identifyPrototype warns on a constant response y (#67)", {
  # Moved from the removed helper's test: a constant y makes every
  # cluster-member correlation undefined, and identifyPrototype warns.
  set.seed(5417)
  X <- matrix(stats::rnorm(8 * 2), nrow = 8, ncol = 2)
  y_const <- rep(2.5, 8)
  testthat::expect_warning(
    identifyPrototype(c(1L, 2L), X, y_const),
    "identifyPrototype: the response y has only one unique value",
    fixed = TRUE)
})

testthat::test_that("identifyPrototype selects by absolute correlation (#67)", {
  # The prototype is the member with the largest ABSOLUTE correlation with y, so
  # a near-perfectly NEGATIVELY correlated member must beat a weak positive one.
  set.seed(6708)
  n <- 20
  y <- stats::rnorm(n)
  X <- cbind(-y, y + stats::rnorm(n, sd = 4))  # col 1: cor=-1; col 2: weak +cor
  # abs() => col 1 (|cor|=1) wins despite its negative sign.
  testthat::expect_identical(identifyPrototype(c(1L, 2L), X, y), 1L)
})

## Test passed with 8 successes.

## Test passed with 2 successes.

## Test passed with 1 success.

## Test passed with 1 success.

Tests for getPrototypes()

testthat::test_that("getPrototypes works", {
  n <- 10
  p <- 5
  
  set.seed(902689)
  
  X <- matrix(stats::rnorm(n*p), nrow=n, ncol=p)
  y <- X[, p]

  testthat::expect_identical(getPrototypes(list(1L, 2L, 3L, 4L, 5L), X, y), 1:5)

  testthat::expect_identical(getPrototypes(list(1L:5L), X, y), 5L)

  testthat::expect_identical(getPrototypes(list(1L, 2L:5L), X, y), c(1L, 5L))

  testthat::expect_identical(getPrototypes(list(3L:5L), X, y), 5L)

  y2 <- rnorm(n)

  res <- getPrototypes(list(1L, c(2L, 3L), c(4L, 5L)), X, y2)

  testthat::expect_true(is.integer(res))

  testthat::expect_equal(length(res), 3)

  testthat::expect_identical(res[1], 1L)

  testthat::expect_true(res[2] %in% c(2L, 3L))

  testthat::expect_true(res[3] %in% c(4L, 5L))

  testthat::expect_error(getPrototypes(list(1L, 2L, 3L, 4L, 5L), y, X),
                          "is.matrix(x) is not TRUE",
                          fixed=TRUE)

  testthat::expect_error(getPrototypes(list(1L, 2L, 3L, 4L, 5L), X, y[1:9]),
                         "n == length(y) is not TRUE",
                         fixed=TRUE)

})

## Test passed with 11 successes.

Finally, tests for formatClusters():

testthat::test_that("formatClusters works", {
  
  # Intentionally don't provide clusters for all feature, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1:3, 5:8)
  
  res <- formatClusters(good_clusters, p=10)
  
  testthat::expect_true(is.list(res))
  testthat::expect_identical(names(res), c("clusters", "multiple"))
  
  # Clusters
  testthat::expect_true(is.list(res$clusters))
  testthat::expect_equal(length(res$clusters), 5)
  testthat::expect_equal(5, length(names(res$clusters)))
  testthat::expect_equal(5, length(unique(names(res$clusters))))
  testthat::expect_true("red_cluster" %in% names(res$clusters))
  testthat::expect_true(all(!is.na(names(res$clusters))))
  testthat::expect_true(all(!is.null(names(res$clusters))))
  testthat::expect_true(all(names(res$clusters) != ""))

  clust_feats <- integer()
  true_list <- list(1:3, 5:8, 4, 9, 10)
  for(i in 1:length(res$clusters)){
    testthat::expect_true(is.integer(res$clusters[[i]]))
    testthat::expect_equal(length(intersect(clust_feats, res$clusters[[i]])), 0)
    testthat::expect_true(all(res$clusters[[i]] %in% 1:10))
    testthat::expect_equal(length(res$clusters[[i]]),
                           length(unique(res$clusters[[i]])))
    testthat::expect_true(all(res$clusters[[i]] == true_list[[i]]))
    clust_feats <- c(clust_feats, res$clusters[[i]])
  }

  testthat::expect_equal(length(clust_feats), 10)
  testthat::expect_equal(10, length(unique(clust_feats)))
  testthat::expect_equal(10, length(intersect(clust_feats, 1:10)))
  
  # Multiple
  testthat::expect_true(res$multiple)
  testthat::expect_false(formatClusters(3:5, p=10)$multiple)

  ## Trying other inputs

  testthat::expect_error(formatClusters(list(3:7, 7:10), p=15),
                         "length(intersect(clusters[[i]], clusters[[j]])) == 0 is not TRUE",
                         fixed=TRUE)
  # Duplicate clusters are now silently removed (#156): dropping the duplicate
  # 5:8 leaves the same 6 formatted clusters (5:8 plus five singletons) as
  # passing 5:8 once, with multiple = FALSE and 5:8 appearing exactly once.
  dedup_fc <- formatClusters(list(5:8, 5:8), p=9)
  testthat::expect_identical(dedup_fc, formatClusters(list(5:8), p=9))
  testthat::expect_equal(length(dedup_fc$clusters), 6)
  testthat::expect_false(dedup_fc$multiple)
  testthat::expect_equal(sum(vapply(dedup_fc$clusters,
                                    function(z) identical(as.integer(z), 5:8),
                                    logical(1))), 1)

  # Out-of-range cluster index is now caught up front by checkFormatClustersInput
  # (#104, L3) with a message naming the offending index and p, instead of the
  # downstream cryptic length(all_clustered_feats) == p stopifnot.
  testthat::expect_error(formatClusters(list(5:8), p=7),
                         "Cluster index 8 exceeds the number of features (p = 7).",
                         fixed=TRUE)

  testthat::expect_error(formatClusters(list(2:3, as.integer(NA)), p=10),
                         "Must specify one of clusters or R (or does one of these provided inputs contain NA?)",
                         fixed=TRUE)

  testthat::expect_error(formatClusters(list(2:3, c(4, 4, 5)), p=8),
                         "length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
                         fixed=TRUE)

   testthat::expect_error(formatClusters(list(1:4, -1), p=10),
                         "all(clusters[[i]] >= 1) is not TRUE",
                         fixed=TRUE)

   testthat::expect_error(formatClusters(list(1:4, c(2.3, 1.2))),
                         "is.integer(clusters[[i]]) is not TRUE",
                         fixed=TRUE)
   
   ### Test prototypes feature
   
    n <- 8
    p <- 6
    
    set.seed(690289)
    
    X <- matrix(stats::rnorm(n*p), nrow=n, ncol=p)
    y <- X[, p]
    
    res <- formatClusters(clusters=list(), p=p, get_prototypes=TRUE, x=X, y=y)
    
    testthat::expect_true(is.list(res))
    testthat::expect_identical(names(res), c("clusters", "multiple",
                                             "prototypes"))
    testthat::expect_true(is.integer(res$prototypes))
    testthat::expect_identical(res$prototypes, 1:p)

    testthat::expect_equal(formatClusters(clusters=1:p, p=p,
                                              get_prototypes=TRUE, x=X,
                                              y=y)$prototypes, p)
    
    testthat::expect_identical(formatClusters(clusters=list(1L, 2L:p), p=p,
                                              get_prototypes=TRUE, x=X,
                                              y=y)$prototypes,
                               as.integer(c(1, p)))
    
    testthat::expect_identical(formatClusters(clusters=3L:p, p=p,
                                              get_prototypes=TRUE, x=X,
                                              y=y)$prototypes,
                               as.integer(c(p, 1, 2)))
    
    y2 <- rnorm(n)

    res <- formatClusters(clusters=list(2:3, 4:5), p=p, get_prototypes=TRUE,
                          x=X, y=y2)$prototypes

    testthat::expect_true(is.integer(res))

    testthat::expect_equal(length(res), 4)

    testthat::expect_true(res[1] %in% c(2L, 3L))

    testthat::expect_true(res[2] %in% c(4L, 5L))
    
    testthat::expect_equal(res[3], 1L)
    
    testthat::expect_equal(res[4], p)

    testthat::expect_error(formatClusters(clusters=list(2:3, 4:5), p=p,
                                          get_prototypes=TRUE, x=y2, y=X),
                           "is.matrix(x) is not TRUE", fixed=TRUE)

    testthat::expect_error(formatClusters(clusters=list(2:3, 4:5), p=p,
                                          get_prototypes=TRUE, x=X,
                                          y=y2[1:(n-1)]),
                           "n == length(y) is not TRUE", fixed=TRUE)
})

## Test passed with 65 successes.

Tests for checkSamplingType():

testthat::test_that("checkSamplingType works", {
  testthat::expect_null(checkSamplingType("SS"))
  testthat::expect_error(checkSamplingType("MB"),
                         "sampling_type MB is not yet supported (and isn't recommended anyway)",
                         fixed=TRUE)
  testthat::expect_error(checkSamplingType(c("SS", "SS")),
                         "length(sampling_type) == 1 is not TRUE", fixed=TRUE)
  testthat::expect_error(checkSamplingType(1),
                         "is.character(sampling_type) is not TRUE", fixed=TRUE)
  testthat::expect_error(checkSamplingType(as.character(NA)),
                         "!is.na(sampling_type) is not TRUE", fixed=TRUE)
})

## Test passed with 5 successes.

Tests for checkB():

testthat::test_that("checkB works", {
  testthat::expect_null(checkB(1500))
  testthat::expect_null(checkB(15))
  testthat::expect_error(checkB("B"),
                         "is.numeric(B) | is.integer(B) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkB(20:25), "length(B) == 1 is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkB(as.integer(NA)), "!is.na(B) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkB(1.2), "B == round(B) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkB(-100), "B > 0 is not TRUE",
                         fixed=TRUE)
  testthat::expect_warning(checkB(5),
                           "Small values of B may lead to poor results.",
                           fixed=TRUE)
  testthat::expect_warning(checkB(2200),
                           "Large values of B may require long computation times.",
                           fixed=TRUE)
})

## Test passed with 9 successes.

Tests for checkAlpha():

testthat::test_that("checkAlpha validates alpha in (0, 1] (#72)", {
  testthat::expect_null(checkAlpha(1))
  testthat::expect_null(checkAlpha(0.5))
  testthat::expect_error(checkAlpha("0.5"),
                         "is.numeric(alpha) | is.integer(alpha) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkAlpha(c(0.5, 0.6)),
                         "length(alpha) == 1 is not TRUE", fixed=TRUE)
  testthat::expect_error(checkAlpha(NA_real_), "!is.na(alpha) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkAlpha(0), "alpha > 0 is not TRUE", fixed=TRUE)
  testthat::expect_error(checkAlpha(1.5), "alpha <= 1 is not TRUE", fixed=TRUE)
})

## Test passed with 7 successes.

Tests for checkPropFeatsRemove():

testthat::test_that("checkPropFeatsRemove works", {
  testthat::expect_null(checkPropFeatsRemove(0, 5))
  testthat::expect_null(checkPropFeatsRemove(.3, 10))
  testthat::expect_error(checkPropFeatsRemove(1, 3),
                         "prop_feats_remove >= 0 & prop_feats_remove < 1 is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkPropFeatsRemove(c(.5, .6), 17),
                         "length(prop_feats_remove) == 1 is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkPropFeatsRemove(".3", 99),
                         "is.numeric(prop_feats_remove) | is.integer(prop_feats_remove) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkPropFeatsRemove(as.numeric(NA), 172),
                         "!is.na(prop_feats_remove) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(checkPropFeatsRemove(.1, 1),
                         "p >= 2 is not TRUE",
                         fixed=TRUE)
})

## Test passed with 7 successes.

Finally, tests for checkCssInputs():

testthat::test_that("checkCssInputs works", {
  set.seed(80526)
  
  x <- matrix(stats::rnorm(15*11), nrow=15, ncol=11)
  y <- stats::rnorm(15)
  
  # Intentionally don't provide clusters for all feature, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1L:5L,
                        green_cluster=6L:8L
                        # , c4=10:11
                        )
  
  res <- checkCssInputs(X=x, y=y, lambda=0.01, clusters=good_clusters,
                        fitfun = cssLasso, sampling_type = "SS", B = 13,
                        prop_feats_remove = 0, train_inds = integer(),
                        num_cores = 1L)
  
  # Basic output
  testthat::expect_true(is.list(res))
  testthat::expect_identical(names(res), c("feat_names", "X", "clusters"))
  
  # feat_names
  testthat::expect_true(is.character(res$feat_names))
  testthat::expect_true(is.na(res$feat_names))
  testthat::expect_equal(length(res$feat_names), 1)

  # X
  testthat::expect_true(is.matrix(res$X))
  testthat::expect_true(all(!is.na(res$X)))
  testthat::expect_true(is.numeric(res$X))
  testthat::expect_equal(ncol(res$X), 11)
  testthat::expect_equal(nrow(res$X), 15)

  # clusters
  testthat::expect_true(is.list(res$clusters))
  testthat::expect_equal(length(res$clusters), length(names(res$clusters)))
  testthat::expect_equal(length(res$clusters),
                         length(unique(names(res$clusters))))
  testthat::expect_true(all(!is.na(names(res$clusters))))
  testthat::expect_true(all(!is.null(names(res$clusters))))
  clust_feats <- integer()
  for(i in 1:length(res$clusters)){
    clust_feats <- c(clust_feats, res$clusters[[i]])
  }
  testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
  testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
                                                               1:11)))

  # Regression test: train_inds with an out-of-range index (> n) must be
  # rejected. Previously the upper-bound check was a no-op due to a misplaced
  # parenthesis (all(train_inds) <= n instead of all(train_inds <= n)), so
  # out-of-range indices were silently accepted. (n = 15 here; 16 is invalid.)
  testthat::expect_error(checkCssInputs(X=x, y=y, lambda=0.01,
                                        clusters=good_clusters,
                                        fitfun = cssLasso, sampling_type = "SS",
                                        B = 13, prop_feats_remove = 0,
                                        train_inds = c(1L, 2L, 16L),
                                        num_cores = 1L))

  ## Trying other inputs

  # Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
  # shouldn't throw any error, because the acceptable input for lambda should be
  # enforced only by fitfun)

  testFitfun <- function(X, y, lambda){
    p <- ncol(X)
    stopifnot(p >= 2)
    # Choose p/2 features randomly
    selected <- sample.int(p, size=floor(p/2))
    return(selected)
  }

  res_fitfun <- checkCssInputs(X=x, y=y, lambda=x, clusters=1:3,
                               fitfun = testFitfun, sampling_type = "SS",
                               B = 13, prop_feats_remove = 0,
                               train_inds = integer(), num_cores = 1L)
  testthat::expect_true(is.list(res_fitfun))

  # Single cluster
  res_sing_clust <- checkCssInputs(X=x, y=y,
                                   lambda=c("foo", as.character(NA), "bar"),
                                   clusters=1:3, fitfun = testFitfun,
                                   sampling_type = "SS", B = 13,
                                   prop_feats_remove = 0,
                                   train_inds = integer(), num_cores = 1L)
  testthat::expect_true(is.list(res_sing_clust))
  testthat::expect_equal(length(res_sing_clust$clusters), 11 - 3 + 1)
  testthat::expect_true(length(unique(names(res_sing_clust$clusters))) == 11 -
                          3 + 1)
  testthat::expect_true(all(!is.na(names(res_sing_clust$clusters))))
  testthat::expect_true(all(!is.null(names(res_sing_clust$clusters))))

  # Other sampling types
  testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo",
                                                           as.character(NA),
                                                           "bar"), clusters=1:3,
                                        fitfun = testFitfun,
                                        sampling_type = "MB", B = 13,
                                        prop_feats_remove = 0,
                                        train_inds = integer(), num_cores = 1L),
                         "sampling_type MB is not yet supported (and isn't recommended anyway)",
                         fixed=TRUE)

  # Error has quotation marks in it
  testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo",
                                                           as.character(NA),
                                                           "bar"), clusters=1:3,
                                        fitfun = testFitfun,
                                        sampling_type = "S", B = 13,
                                        prop_feats_remove = 0,
                                        train_inds = integer(), num_cores = 1L))
  
  testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo", "bar",
                                                           as.character(NA)),
                                        clusters=1:3, fitfun = testFitfun,
                                        sampling_type = 2, B = 13,
                                        prop_feats_remove = 0,
                                        train_inds = integer(), num_cores = 1L),
                         "is.character(sampling_type) is not TRUE",
                         fixed=TRUE)

  # B
  testthat::expect_warning(checkCssInputs(X=x, y=y, lambda=c("foo", "bar",
                                                           as.character(NA)),
                                        clusters=1:3, fitfun = testFitfun,
                                        sampling_type = "SS", B = 5,
                                        prop_feats_remove = 0,
                                        train_inds = integer(), num_cores = 1L),
                           "Small values of B may lead to poor results.",
                           fixed=TRUE)

  testthat::expect_error(checkCssInputs(X=x, y=y, lambda=c("foo", "bar",
                                                           as.character(NA)),
                                        clusters=1:3, fitfun = testFitfun,
                                        sampling_type = "SS", B = "foo",
                                        prop_feats_remove = 0,
                                        train_inds = integer(), num_cores = 1L),
                           "is.numeric(B) | is.integer(B) is not TRUE",
                           fixed=TRUE)

  # prop_feats_remove
  testthat::expect_true(is.list(checkCssInputs(X=x, y=y,
                                               lambda=c("foo", "bar",
                                                        as.character(NA)),
                                               clusters=1:3, fitfun=testFitfun,
                                               sampling_type = "SS", B = 12,
                                               prop_feats_remove = 0.3,
                                               train_inds = integer(),
                                               num_cores = 1L)))

  # Use train_inds argument
  testthat::expect_true(is.list(checkCssInputs(X=x, y=y,
                                               lambda=c("foo", "bar",
                                                        as.character(NA)),
                                               clusters=1:3, fitfun=testFitfun,
                                               sampling_type = "SS", B = 12,
                                               prop_feats_remove = 0.3,
                                               train_inds = 11:15,
                                               num_cores = 1L)))

})

## Test passed with 31 successes.

Tests for checkCssLoopOutput():

testthat::test_that("checkCssLoopOutput works", {
  testthat::expect_null(checkCssLoopOutput(selected=1:5, p=6,
                                           feats_on_subsamp=1:6))
  
  testthat::expect_error(checkCssLoopOutput(selected=1:5, p=4,
                                            feats_on_subsamp=1:6),
                         "The provided feature selection method fitfun returned a vector of selected features longer than p on (at least) one subsample",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=1:5, p=7,
                                            feats_on_subsamp=1:4),
                         "The provided feature selection method somehow selected features that were not provided for it to consider.",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=c(1, 2, 3, 4.4, 5), p=7,
                                            feats_on_subsamp=1:7),
                         "The provided feature selection method fitfun failed to return a vector of valid (integer) indices on (at least) one subsample",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=rep(1, 3), p=7,
                                            feats_on_subsamp=1:7),
                         "The provided feature selection method fitfun returned a vector of selected features containing repeated indices on (at least) one subsample",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=c(-1, 5), p=7,
                                            feats_on_subsamp=1:7),
                         "The provided feature selection method fitfun returned a vector of selected features containing a non-positive index on (at least) one subsample",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=c(0, 5), p=7,
                                            feats_on_subsamp=1:7),
                         "The provided feature selection method fitfun returned a vector of selected features containing a non-positive index on (at least) one subsample",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=as.integer(NA), p=7,
                                            feats_on_subsamp=1:7),
                         "The provided feature selection method fitfun returned a vector containing NA values on (at least) one subsample",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLoopOutput(selected=c("1", "2", "3"), p=7,
                                            feats_on_subsamp=1:7),
                         "The provided feature selection method fitfun failed to return an integer or numeric vector on (at least) one subsample",
                         fixed=TRUE)

  testthat::expect_error(
    checkCssLoopOutput(selected=c(TRUE, FALSE), p=7, feats_on_subsamp=1:7),
    "returned a logical vector", fixed=TRUE)

  testthat::expect_error(
    checkCssLoopOutput(selected=NULL, p=7, feats_on_subsamp=1:7),
    "returned NULL", fixed=TRUE)

})

## Test passed with 11 successes.

Tests for fitfunFailureMessage():

testthat::test_that("fitfunFailureMessage surfaces the cause and subsample index (#73)", {
  # try-error path: the wrapped condition message is surfaced (this is what
  # getSelMatrix actually receives from mclapply under forking).
  te <- structure("Error\n", class = "try-error",
    condition = simpleError("fitfun returned a character vector"))
  msg <- fitfunFailureMessage(te, 4L)
  testthat::expect_match(msg, "subsample 4", fixed = TRUE)
  testthat::expect_match(msg, "fitfun returned a character vector", fixed = TRUE)
  # defensive non-try-error fallback: the class is reported.
  msg2 <- fitfunFailureMessage("not an integer vector", 2L)
  testthat::expect_match(msg2, "class character", fixed = TRUE)
  testthat::expect_match(msg2, "subsample 2", fixed = TRUE)
  testthat::expect_match(msg2, "integer vector of selected feature indices",
    fixed = TRUE)
})

## Test passed with 5 successes.

Tests for checkCssLassoInputs():

testthat::test_that("checkCssLassoInputs works", {
  set.seed(761)
  
  x <- matrix(stats::rnorm(15*4), nrow=15, ncol=4)
  y <- stats::rnorm(15)
  
  testthat::expect_null(checkCssLassoInputs(X=x, y=y, lambda=0.01))
  
  testthat::expect_error(checkCssLassoInputs(X=x, y=logical(15), lambda=0.05),
                         "For method cssLasso, y must be a numeric or integer vector.",
                         fixed=TRUE)

  # Integer y is accepted (is.numeric(1L) is TRUE; the | !is.integer guard
  # makes the validator read uniformly with the other entry points). Regression
  # test for issue #13 -- this is NOT a behavior change, integer y always worked.
  testthat::expect_null(checkCssLassoInputs(X=x, y=as.integer(round(y * 10)),
                                            lambda=0.01))

  testthat::expect_error(checkCssLassoInputs(X=x[1:13, ], y=y, lambda=0.01),
                         "For method cssLasso, y must be a vector of length equal to nrow(X).",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLassoInputs(X=x, y=rep(1.2, 15), lambda=0.05),
                         "Subsample with only one unique value of y detected: for the default cssLasso, every subsample of y (of size floor(n/2)) must have more than one unique value. css draws random subsamples, so this abort is seed-dependent and becomes more likely as B grows. It indicates that y is too discrete for the default cssLasso--supply a less discrete (more continuous) response, or pass a custom fitfun that tolerates a constant-y subsample.",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=TRUE),
                         "For method cssLasso, lambda must be a numeric.",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=as.numeric(NA)),
                         "NA detected in provided lambda input to cssLasso",
                         fixed=TRUE)
  
  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=-0.01),
                         "For method cssLasso, lambda must be nonnegative.",
                         fixed=TRUE)

  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=x),
                         "For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
                         fixed=TRUE)

  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=numeric()),
                         "For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
                         fixed=TRUE)

  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=-0.01),
                         "For method cssLasso, lambda must be nonnegative.",
                         fixed=TRUE)

  # Bundled c(lambda=, alpha=) form (elastic net)
  testthat::expect_null(checkCssLassoInputs(X=x, y=y,
                                            lambda=c(lambda=0.1, alpha=0.5)))

  # Order-insensitive (setequal on names)
  testthat::expect_null(checkCssLassoInputs(X=x, y=y,
                                            lambda=c(alpha=0.5, lambda=0.1)))

  # alpha at the boundary 1 is allowed
  testthat::expect_null(checkCssLassoInputs(X=x, y=y,
                                            lambda=c(lambda=0.1, alpha=1)))

  # Unnamed length-2 vector is rejected
  testthat::expect_error(checkCssLassoInputs(X=x, y=y, lambda=c(0.1, 0.5)),
                         "For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
                         fixed=TRUE)

  # alpha = 0 (degenerate ridge) is rejected
  testthat::expect_error(checkCssLassoInputs(X=x, y=y,
                                             lambda=c(lambda=0.1, alpha=0)),
                         "For method cssLasso, the alpha component of lambda must be in (0, 1].",
                         fixed=TRUE)

  # alpha > 1 is rejected
  testthat::expect_error(checkCssLassoInputs(X=x, y=y,
                                             lambda=c(lambda=0.1, alpha=1.2)),
                         "For method cssLasso, the alpha component of lambda must be in (0, 1].",
                         fixed=TRUE)

  # Length-3 vector is rejected
  testthat::expect_error(checkCssLassoInputs(X=x, y=y,
                                             lambda=c(lambda=0.1, alpha=0.5,
                                                      extra=0.3)),
                         "For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
                         fixed=TRUE)

})

## Test passed with 18 successes.

Tests for cssLasso() confirming that the elastic net mixing parameter alpha, bundled into lambda as c(lambda=, alpha=), actually changes which features are selected (and that the default pure-lasso path is unchanged). On a design with a correlated block of relevant features, a smaller alpha (more ridge-like) pulls in the correlated block partners that the pure lasso drops, so the elastic-net selected set is a strict superset of the lasso selected set:

testthat::test_that("cssLasso alpha (bundled in lambda) drives selection", {
  # Correlated-block design: features 1-4 are relevant and share a common
  # latent factor at correlation rho; features 5-20 are irrelevant noise.
  set.seed(1)
  n <- 100
  p <- 20
  rho <- 0.9
  block <- 4
  Z <- matrix(stats::rnorm(n*p), n, p)
  common <- stats::rnorm(n)
  for(j in 1:block){
    Z[, j] <- sqrt(rho)*common + sqrt(1 - rho)*Z[, j]
  }
  y <- as.numeric(Z %*% c(rep(1, block), rep(0, p - block)) + stats::rnorm(n))
  L <- 3.72251

  s_lasso <- cssLasso(Z, y, lambda=c(lambda=L, alpha=1))
  s_enet <- cssLasso(Z, y, lambda=c(lambda=L, alpha=0.2))

  # Pure lasso selects only the two strongest block members; the elastic net
  # pulls in the full correlated block.
  testthat::expect_identical(s_lasso, c(3L, 4L))
  testthat::expect_identical(s_enet, c(1L, 2L, 3L, 4L))

  # The elastic-net set is a strict superset of the lasso set (the round-2
  # cosmetic: robust to L within +/-10%).
  testthat::expect_true(all(s_lasso %in% s_enet))
  testthat::expect_true(length(setdiff(s_enet, s_lasso)) > 0)

  # Back-compatibility: a scalar lambda is byte-identical to the bundled form
  # with alpha = 1 (the default pure-lasso path is unchanged).
  testthat::expect_identical(cssLasso(Z, y, lambda=L),
                             cssLasso(Z, y, lambda=c(lambda=L, alpha=1)))
})

## Test passed with 5 successes.

Tests for cssLoop() (had to define checkCssLassoInputs() first):

testthat::test_that("cssLoop works", {
  set.seed(89134)

  x <- matrix(stats::rnorm(9*8), nrow=9, ncol=8)
  y <- stats::rnorm(9)

  output <- cssLoop(input=1L:4L, x=x, y=y, lambda=0.05,
                    fitfun=cssLasso)

  testthat::expect_true(is.integer(output))

  testthat::expect_equal(length(output), length(unique(output)))

  testthat::expect_true(length(output) <= 8)

  testthat::expect_true(all(output >= 1))

  testthat::expect_true(all(output <= 8))

  testthat::expect_error(cssLoop(input=1L:6L, x=x, y=y, lambda=0.05,
                                 fitfun=cssLasso),
                         "floor(n/2) == length(subsample) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(cssLoop(input=1L:4L, x=x, y=y[1:8],
                                 lambda=0.05, fitfun=cssLasso),
                         "length(y) == n is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(cssLoop(input=1L:4L, x=x, y=logical(9),
                                 lambda=0.05, fitfun=cssLasso),
                         "For method cssLasso, y must be a numeric or integer vector.",
                         fixed=TRUE)

  testthat::expect_error(cssLoop(input=1L:4L, x=x, y=y,
                                 lambda=x, fitfun=cssLasso),
                         "For method cssLasso, lambda must be either a single nonnegative numeric or a named length-2 numeric vector c(lambda=<value>, alpha=<value>).",
                         fixed=TRUE)

  # Test other input format

  alt_input <- list("subsample"=2:5, "feats_to_keep"=c(FALSE, rep(TRUE, 4),
                                                       rep(FALSE, 2), TRUE))

  output2 <- cssLoop(input=alt_input, x=x, y=y, lambda=0.08, fitfun=cssLasso)

  testthat::expect_true(is.integer(output2))

  testthat::expect_equal(length(output2), length(unique(output2)))

  testthat::expect_true(length(output2) <= 8)

  testthat::expect_true(all(output2 %in% c(2, 3, 4, 5, 8)))

  testthat::expect_error(cssLoop(input= list("subsample"=2:5,
                                             "feats_to_keep"=c(FALSE,
                                                               rep(TRUE, 4),
                                                               rep(FALSE, 2))),
                                 x=x, y=y, lambda=0.08, fitfun=cssLasso),
                         "length(feats_to_keep) == p is not TRUE",
                         fixed=TRUE)

  # Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
  # shouldn't throw any error, because the acceptable input for lambda should be
  # enforced only by fitfun) and nonsense y

  testFitfun <- function(X, y, lambda){
    p <- ncol(X)
    stopifnot(p >= 2)
    # Choose p/2 features randomly
    selected <- sample.int(p, size=floor(p/2))
    return(selected)
  }

  testthat::expect_true(is.integer(cssLoop(input=1L:4L, x=x, y=y,
                                           lambda=TRUE, fitfun=testFitfun)))

  testthat::expect_true(is.integer(cssLoop(input=1L:4L, x=x,
                                           y=character(9), lambda=.05,
                                           fitfun=testFitfun)))

})

## Test passed with 16 successes.

cssLoop() must validate the raw fitfun return against its documented contract before remapping to original feature indices, so that natural contract violations error loudly instead of silently corrupting the selection matrix (#151):

testthat::test_that("cssLoop validates the raw fitfun return before remapping (#151)", {
  set.seed(1)
  n <- 40
  p <- 6
  x <- matrix(stats::rnorm(n * p), n, p)
  y <- stats::rnorm(n)
  subsample <- as.integer(1:(n %/% 2))
  feats_to_keep <- c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE)  # p_sub = 4
  input <- list(subsample = subsample, feats_to_keep = feats_to_keep)

  mk <- function(val) function(X, y, lambda) val   # fitfun returning a fixed value

  # Each invalid raw return must error (was silently wrong on main #151):
  testthat::expect_error(cssLoop(input, x, y, lambda = 0.1,
                                 fitfun = mk(-1L)))                # non-positive
  testthat::expect_error(cssLoop(input, x, y, lambda = 0.1,
                                 fitfun = mk(c(TRUE, FALSE))),     # logical
                         "logical vector", fixed = TRUE)
  testthat::expect_error(cssLoop(input, x, y, lambda = 0.1,
                                 fitfun = mk(1.5)))                # fractional
  testthat::expect_error(cssLoop(input, x, y, lambda = 0.1,
                                 fitfun = mk(NULL)),               # NULL
                         "NULL", fixed = TRUE)
  # 5 > p_sub = 4; on main this errors but MIS-reports "NA values" (post-remap
  # NA). Asserting the message makes this a real regression test AND locks in
  # the cosmetic fix.
  testthat::expect_error(cssLoop(input, x, y, lambda = 0.1,
                                 fitfun = mk(5L)),
                         "index greater than ncol(X)", fixed = TRUE)

  # Valid raw returns still work and remap correctly:
  testthat::expect_equal(
    cssLoop(input, x, y, lambda = 0.1, fitfun = mk(integer(0))),  # select nothing
    integer(0))
  testthat::expect_equal(
    cssLoop(input, x, y, lambda = 0.1, fitfun = mk(c(1L, 3L))),   # raw cols 1,3 ...
    as.integer(which(feats_to_keep)[c(1L, 3L)]))                  # ... remap to 1,4
})

## Test passed with 7 successes.

Tests for checkGetClusterSelMatrixInput():

testthat::test_that("checkGetClusterSelMatrixInput works", {
  
  good_clusters <- list(happy=1L:8L, sad=9L:10L, med=11L)
  
  res <- matrix(sample(c(0, 1), size=6*11, replace=TRUE), nrow=6, ncol=11)
  
  testthat::expect_null(checkGetClusterSelMatrixInput(good_clusters, res))
  
  testthat::expect_error(checkGetClusterSelMatrixInput(list(happy=1L:8L,
                                                            med=11L), res),
                         "length(all_clustered_feats) == p is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(good_clusters, 1:9),
                         "is.matrix(res) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(good_clusters, res + .3),
                         "all(res %in% c(0, 1)) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(good_clusters,
                                                       res[, 1:9]),
                         "length(all_clustered_feats) == p is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(1L:10L, res),
                         "is.list(clusters) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
                                                            c2=6L:8L,
                                                            c3=9L,
                                                            c4=integer()), res),
                         "all(lengths(clusters) >= 1) is not TRUE",
                         fixed=TRUE)
                         
  testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
                                            c2=6L:8L, c3=9L,
                                            c4=as.integer(NA)), res),
                         "all(!is.na(clusters)) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
                                            c2=6L:8L, c3=9L,
                                            c2=6L:8L), res),
                         "n_clusters == length(unique(clusters)) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGetClusterSelMatrixInput(list(c1=1L:5L,
                                            c2=6L:8L, c3=14L), res),
                         "length(all_clustered_feats) == p is not TRUE",
                         fixed=TRUE)
  
})

## Test passed with 10 successes.

Tests for createSubsamples():

testthat::test_that("createSubsamples returns properly-named elements with feature removal (#69)", {
  set.seed(692)
  res <- createSubsamples(n = 30, p = 8, B = 5, sampling_type = "SS",
                          prop_feats_remove = 0.3)
  testthat::expect_equal(length(res), 2L * 5L)   # SS -> 2B elements
  # Every element is a named list(subsample, feats_to_keep) -- the invariant the
  # (now-meaningful) output stopifnot guards.
  testthat::expect_true(all(vapply(res,
      function(s) identical(names(s), c("subsample", "feats_to_keep")),
      logical(1))))
})

testthat::test_that("createSubsamples copies feats_to_keep onto each SS complementary pair (#128)", {
  set.seed(128)
  res <- createSubsamples(n = 30, p = 20, B = 5, sampling_type = "SS",
                          prop_feats_remove = 0.5)
  # SS with feature removal -> 2B elements.
  testthat::expect_equal(length(res), 10L)
  # Each complementary pair (i, B + i) must share the SAME feats_to_keep -- the
  # Shah-Samworth pairing guarantee (createSubsamples copies subsample i's mask
  # onto its pair B + i). With p = 20, two independent rbinom draws would match
  # with probability ~2^-20 (~1e-6), so the identity below genuinely exercises
  # the copy rather than passing by coincidence. Every subsample must also retain
  # >= 2 features (the while-loop guarantee).
  for(i in 1:5){
    testthat::expect_identical(res[[5L + i]]$feats_to_keep,
                               res[[i]]$feats_to_keep)
    testthat::expect_gte(sum(res[[i]]$feats_to_keep), 2)
    testthat::expect_gte(sum(res[[5L + i]]$feats_to_keep), 2)
  }
})

testthat::test_that("createSubsamples works", {
  res <- createSubsamples(n=20L, p=5L, B=11L, sampling_type="SS",
                          prop_feats_remove=0)
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 2*11)
  testthat::expect_true(all(lengths(res) == 20/2))
  testthat::expect_equal(length(unique(res[[13]])), 20/2)

  set <- res[[4]]
  comp_set <- res[[4 + 11]]

  testthat::expect_equal(length(intersect(set, comp_set)), 0)
  testthat::expect_equal(length(union(set, comp_set)), length(c(set, comp_set)))
  testthat::expect_equal(20, length(c(set, comp_set)))

  # Try odd n

  res_odd <- createSubsamples(n=19L, p=23L, B=13L, sampling_type="SS",
                              prop_feats_remove=0)
  testthat::expect_true(is.list(res_odd))
  testthat::expect_equal(length(res_odd), 2*13)
  testthat::expect_true(all(lengths(res_odd) == floor(19/2)))
  testthat::expect_equal(length(unique(res_odd[[3]])), floor(19/2))

  set_odd <- res_odd[[2]]
  comp_set_odd <- res_odd[[2 + 13]]

  testthat::expect_equal(length(intersect(set_odd, comp_set_odd)), 0)
  testthat::expect_equal(length(union(set_odd, comp_set_odd)),
                         length(c(set_odd, comp_set_odd)))
  testthat::expect_equal(19 - 1, length(c(set_odd, comp_set_odd)))

  testthat::expect_error(createSubsamples(n=20L, p=5L, B=11L, sampling_type="MB",
                          prop_feats_remove=0),
                         "sampling_type MB is not yet supported (and isn't recommended anyway)",
                         fixed=TRUE)
  # misspecified sampling_type (not specifying error because contains quotation
  # marks)
  testthat::expect_error(createSubsamples(n=20L, p=5L, B=11L, sampling_type="S",
                          prop_feats_remove=0))
  testthat::expect_error(createSubsamples(n=11.1, p=5L, B=11L, sampling_type="SS",
                          prop_feats_remove=0),
                         "n == round(n) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(createSubsamples(n=-20L, p=5L, B=11L, sampling_type="SS",
                          prop_feats_remove=0),
                         "n > 0 is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(createSubsamples(n=20L, p=5L, B=25.6, sampling_type="SS",
                          prop_feats_remove=0),
                         "length(subsamples) == B is not TRUE",
                         fixed=TRUE)
})

## Test passed with 2 successes.

## Test passed with 16 successes.

## Test passed with 19 successes.

Tests for getSubsamps():

testthat::test_that("getSubsamps works", {
  res <- getSubsamps(n=18L, B=21L, sampling_type="SS")
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 2*21)
  testthat::expect_true(all(lengths(res) == 18/2))
  testthat::expect_equal(length(unique(res[[7]])), 18/2)

  set <- res[[3]]
  comp_set <- res[[3 + 21]]

  testthat::expect_equal(length(intersect(set, comp_set)), 0)
  testthat::expect_equal(length(union(set, comp_set)), length(c(set, comp_set)))
  testthat::expect_equal(18, length(c(set, comp_set)))
})

## Test passed with 7 successes.

Tests for getSelMatrix():

testthat::test_that("getSelMatrix works", {
  set.seed(98623)
  x <- matrix(stats::rnorm(25*6), nrow=25, ncol=6)
  y <- stats::rnorm(25)
  subsamps_object <- createSubsamples(n=25, p=6, B=12, sampling_type="SS",
                                      prop_feats_remove=0)
  res <- getSelMatrix(x=x, y=y, lambda=0.01, B=12, sampling_type="SS",
                      subsamps_object=subsamps_object, num_cores=1,
                      fitfun=cssLasso)

  testthat::expect_true(is.matrix(res))
  testthat::expect_equal(nrow(res), 2*12)
  testthat::expect_equal(ncol(res), 6)
  testthat::expect_true(all(res %in% c(0, 1)))
  testthat::expect_true(all(is.integer(res)))

  # Try a different fitfun

  testFitfun <- function(X, y, lambda){
    p <- ncol(X)
    stopifnot(p >= 2)
    # Choose p/2 features randomly
    selected <- sample.int(p, size=floor(p/2))
    return(selected)
  }
  
  # Note that value of lambda doesn't matter
  res2 <- getSelMatrix(x=x, y=y, lambda="foo", B=12, sampling_type="SS",
                      subsamps_object=subsamps_object, num_cores=1,
                      fitfun=testFitfun)

  testthat::expect_true(is.matrix(res2))
  testthat::expect_equal(nrow(res2), 2*12)
  testthat::expect_equal(ncol(res2), 6)
  testthat::expect_true(all(res2 %in% c(0, 1)))
  testthat::expect_true(all(is.integer(res2)))
  
  # With the per-subsample tryCatch (#155a), cssLoop's is.integer(subsample)
  # failure is caught and tagged, so the length(res_list) == nrow(res) check
  # (res_list length 1 vs 24 rows) now trips first.
  testthat::expect_error(getSelMatrix(x=x, y=y, lambda="0.02", B=12, sampling_type="SS",
                      subsamps_object="subsamps_object", num_cores=1,
                      fitfun=testFitfun),
                      "length(res_list) == nrow(res) is not TRUE",
                      fixed=TRUE)
  testthat::expect_error(getSelMatrix(x=x[1:8, ], y=y, lambda="foo", B=12, sampling_type="SS",
                      subsamps_object=subsamps_object, num_cores=1,
                      fitfun=testFitfun),
                      "length(y) == n is not TRUE",
                      fixed=TRUE)
  testthat::expect_error(getSelMatrix(x=x, y=y, lambda=-0.02, B=12, sampling_type="SS",
                      subsamps_object=subsamps_object, num_cores=1,
                      fitfun=cssLasso),
                      "For method cssLasso, lambda must be nonnegative.",
                      fixed=TRUE)
  # Wrong B
  testthat::expect_error(getSelMatrix(x=x, y=y, lambda=0.02, B=37, sampling_type="SS",
                      subsamps_object=subsamps_object, num_cores=1,
                      fitfun=cssLasso),
                      "length(res_list) == nrow(res) is not TRUE",
                      fixed=TRUE)
})

## Test passed with 14 successes.

A killed parallel worker makes parallel::mclapply() return NULL for that subsample. That NULL is distinct from a legitimate integer(0) (“selected nothing”) result, so getSelMatrix() must error rather than silently record a zero row (which would bias the selection proportions) (#104, L8):

testthat::test_that("getSelMatrix detects a killed parallel worker (#104)", {
  set.seed(98623)
  x <- matrix(stats::rnorm(25*6), nrow=25, ncol=6)
  y <- stats::rnorm(25)
  subsamps_object <- createSubsamples(n=25, p=6, B=12, sampling_type="SS",
                                      prop_feats_remove=0)

  # Simulate a worker that was killed: mclapply returns a list containing NULL.
  testthat::local_mocked_bindings(
    mclapply = function(...) list(NULL),
    .package = "parallel")

  testthat::expect_error(
    getSelMatrix(x=x, y=y, lambda=0.01, B=12, sampling_type="SS",
                 subsamps_object=subsamps_object, num_cores=1,
                 fitfun=cssLasso),
    "parallel worker failed")
})

## Test passed with 1 success.

We test the num_cores > 1 robustness fixes (issue #155). First (#155a), a fitfun failure is reported against its true subsample index rather than a prescheduling-misattributed one. fitfunFailureMessage() reads the tagged index carried by the cssr_loop_error, ignoring the (fallback) loop index it is passed:

testthat::test_that("fitfunFailureMessage reads the tagged subsample index (#155a)", {
  msg <- fitfunFailureMessage(
    structure(list(i=7L, msg="boom"), class="cssr_loop_error"), 999L)
  testthat::expect_true(grepl("subsample 7", msg, fixed=TRUE))
  testthat::expect_true(grepl("boom", msg, fixed=TRUE))
  testthat::expect_false(grepl("subsample 999", msg, fixed=TRUE))
})

## Test passed with 3 successes.

End-to-end (#155a), when a fitfun fails on exactly one subsample under num_cores = 2, getSelMatrix() must name that subsample – not the successful subsample 1 it shares a preschedule chunk with. (Needs real forking, so the parallel call is probed and the test is skipped where forking is unavailable.)

testthat::test_that("a parallel fitfun failure reports the true subsample (#155a)", {
  B <- 7L
  K <- 5L      # in 2..B: shares preschedule chunk 1 with the SUCCESSFUL subsample 1
  n <- 25L
  p <- 6L
  set.seed(408)
  x <- matrix(stats::rnorm(n * p), nrow=n, ncol=p)
  x[1, 1] <- -999    # sentinel "poison" marker in row 1
  y <- stats::rnorm(n)

  # floor(n/2) = 12 distinct row indices per subsample; ONLY subsample K holds row 1.
  half <- floor(n / 2)
  subsamps_object <- lapply(seq_len(2L * B),
                            function(j) if(j == K) 1:half else 2:(half + 1L))

  # fitfun stops iff its subsample contains the poison row (row 1), else selects.
  poison_fitfun <- function(X, y, lambda){
    if(any(X[, 1] == -999)) stop("poison") else c(1L, 2L)
  }

  # Probe real forking; skip (rather than fail) where unavailable.
  forked <- tryCatch({
    parallel::mclapply(1:2, function(i) i, mc.cores=2L)
    TRUE
  }, error=function(e) FALSE)
  if(!forked){
    testthat::skip("parallel forking unavailable in this environment")
  }

  err <- tryCatch(
    getSelMatrix(x=x, y=y, lambda=0.01, B=B, sampling_type="SS",
                 subsamps_object=subsamps_object, num_cores=2L,
                 fitfun=poison_fitfun),
    error=function(e) conditionMessage(e))
  testthat::expect_true(grepl("subsample 5", err, fixed=TRUE))
  testthat::expect_false(grepl("subsample 1", err, fixed=TRUE))
})

## Test passed with 2 successes.

And (#155b), fitfun warnings emitted inside forked workers must surface in the parent – the same count and messages as a serial run, rather than being silently swallowed:

testthat::test_that("fitfun warnings surface under num_cores > 1 (#155b)", {
  B <- 6L
  n <- 25L
  p <- 6L
  set.seed(511)
  x <- matrix(stats::rnorm(n * p), nrow=n, ncol=p)
  y <- stats::rnorm(n)
  subsamps_object <- createSubsamples(n=n, p=p, B=B, sampling_type="SS",
                                      prop_feats_remove=0)

  # One warning per fitfun call; returns a valid selection.
  warn_fitfun <- function(X, y, lambda){ warning("wfit"); c(1L, 2L) }

  serial_warns <- testthat::capture_warnings(
    getSelMatrix(x=x, y=y, lambda=0.01, B=B, sampling_type="SS",
                 subsamps_object=subsamps_object, num_cores=1L,
                 fitfun=warn_fitfun))
  # one warning per subsample = 2*B
  testthat::expect_equal(sum(serial_warns == "wfit"), 2L * B)

  # Probe real forking; skip where unavailable.
  forked <- tryCatch({
    parallel::mclapply(1:2, function(i) i, mc.cores=2L)
    TRUE
  }, error=function(e) FALSE)
  if(!forked){
    testthat::skip("parallel forking unavailable in this environment")
  }

  par_warns <- testthat::capture_warnings(
    getSelMatrix(x=x, y=y, lambda=0.01, B=B, sampling_type="SS",
                 subsamps_object=subsamps_object, num_cores=2L,
                 fitfun=warn_fitfun))
  # parallel no longer swallows them: same count AND same messages as serial.
  testthat::expect_equal(sum(par_warns == "wfit"), 2L * B)
  testthat::expect_identical(sort(par_warns), sort(serial_warns))
})

## Test passed with 3 successes.

Tests for cssLasso():

testthat::test_that("cssLasso works", {
  set.seed(24509)
  x <- matrix(stats::rnorm(15*4), nrow=15, ncol=4)
  y <- stats::rnorm(15)
  
  res <- cssLasso(X=x, y=y, lambda=0.01)
  
  testthat::expect_true(is.integer(res))
  testthat::expect_true(length(res) <= 4)
  testthat::expect_true(length(res) >= 0)
  testthat::expect_true(length(res) == length(unique(res)))
  testthat::expect_true(all(res <= 4))
  testthat::expect_true(all(res >= 1))
  
  testthat::expect_error(cssLasso(X=x[1:13, ], y=y, lambda=0.01),
                         "For method cssLasso, y must be a vector of length equal to nrow(X).",
                         fixed=TRUE)
  testthat::expect_error(cssLasso(X=x, y=y, lambda=-0.01),
                         "For method cssLasso, lambda must be nonnegative.",
                         fixed=TRUE)

})

## Test passed with 8 successes.

Tests for getClusterSelMatrix():

testthat::test_that("getClusterSelMatrix works", {
  good_clusters <- list(red_cluster=1L:5L,
                        green_cluster=6L:8L, blue_clust=9L)
  
  B <- 14
  p <- 9
  
  res_entries <- as.integer(sample(c(0, 1), size=2*B*p, replace=TRUE))
  
  good_res <- matrix(res_entries, nrow=2*B, ncol=p)
  
  res <- getClusterSelMatrix(good_clusters, good_res)
  
  testthat::expect_true(is.matrix(res))
  testthat::expect_equal(nrow(res), 2*B)
  # 3 clusters
  testthat::expect_equal(ncol(res), 3)
  testthat::expect_identical(colnames(res), c("red_cluster", "green_cluster",
                                              "blue_clust"))
  testthat::expect_true(all(is.integer(res)))
  testthat::expect_true(all(res %in% c(0, 1)))

  # (#105, L12) Value-pin EVERY cluster column (not just cluster 2): each column
  # must equal the "at least one member of that cluster was selected" indicator.
  # This pins the full feat2clus mapping/ordering that the #57 rowsum refactor
  # relies on, including the singleton cluster (drop=FALSE keeps it a matrix).
  for(j in seq_along(good_clusters)){
    testthat::expect_equal(res[, j],
        as.integer(rowSums(good_res[, good_clusters[[j]], drop=FALSE]) > 0))
  }

  # Single-cluster case (#57): t(clust_counts > 0L) collapses to one column
  res1 <- getClusterSelMatrix(list(only = 1L:9L), good_res)
  testthat::expect_equal(ncol(res1), 1)
  testthat::expect_true(all(is.integer(res1)))

  # Not all features in a cluster
  bad_clusters <- list(red_cluster=1L:5L, green_cluster=6L:7L, blue_clust=9L)

  testthat::expect_error(getClusterSelMatrix(bad_clusters, good_res),
                         "length(all_clustered_feats) == p is not TRUE",
                         fixed=TRUE)

  bad_res_entries <- as.integer(sample(c(0, 1, 2), size=2*B*p, replace=TRUE))

  bad_res <- matrix(bad_res_entries, nrow=2*B, ncol=p)

  testthat::expect_error(getClusterSelMatrix(good_clusters, bad_res),
                         "all(res %in% c(0, 1)) is not TRUE", fixed=TRUE)
})

## Test passed with 13 successes.

Finally, tests for css() itself!

testthat::test_that("css works", {
  set.seed(8712)
  
  x <- matrix(stats::rnorm(15*11), nrow=15, ncol=11)
  y <- stats::rnorm(15)
  
  # Intentionally don't provide clusters for all feature, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1L:5L,
                        green_cluster=6L:8L,
                        c4=10:11)
  
  res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, fitfun = cssLasso,
    sampling_type = "SS", B = 13,
    prop_feats_remove = 0, train_inds = integer(), num_cores = 1L)
  
  # Basic output
  testthat::expect_true(is.list(res))
  testthat::expect_identical(class(res), "cssr")
  testthat::expect_identical(names(res), c("feat_sel_mat", "clus_sel_mat", "X",
                                           "y", "clusters", "train_inds"))
  
  # feat_sel mat
  testthat::expect_true(is.integer(res$feat_sel_mat))
  testthat::expect_true(is.matrix(res$feat_sel_mat))
  testthat::expect_true(all(res$feat_sel_mat %in% c(0, 1)))
  testthat::expect_equal(ncol(res$feat_sel_mat), 11)
  testthat::expect_null(colnames(res$feat_sel_mat))

  # clus_sel_mat
  testthat::expect_true(is.integer(res$clus_sel_mat))
  testthat::expect_true(is.matrix(res$clus_sel_mat))
  testthat::expect_true(all(res$clus_sel_mat %in% c(0, 1)))
  # 4 clusters
  testthat::expect_equal(ncol(res$clus_sel_mat), 4)
  testthat::expect_identical(colnames(res$clus_sel_mat), names(res$clusters))
  testthat::expect_equal(length(colnames(res$clus_sel_mat)), 4)
  testthat::expect_equal(length(unique(colnames(res$clus_sel_mat))), 4)
  testthat::expect_true(all(!is.na(colnames(res$clus_sel_mat))))
  testthat::expect_true(all(!is.null(colnames(res$clus_sel_mat))))

  # X
  testthat::expect_true(is.matrix(res$X))
  testthat::expect_true(all(!is.na(res$X)))
  testthat::expect_true(is.numeric(res$X))
  testthat::expect_equal(ncol(res$X), 11)
  testthat::expect_equal(nrow(res$X), 15)

  # y
  testthat::expect_true(is.numeric(res$y))
  testthat::expect_equal(length(res$y), 15)

  # clusters
  testthat::expect_true(is.list(res$clusters))
  testthat::expect_equal(length(res$clusters), length(names(res$clusters)))
  testthat::expect_equal(length(res$clusters),
                         length(unique(names(res$clusters))))
  testthat::expect_true(all(!is.na(names(res$clusters))))
  testthat::expect_true(all(!is.null(names(res$clusters))))
  clust_feats <- integer()
  for(i in 1:length(res$clusters)){
    clust_feats <- c(clust_feats, res$clusters[[i]])
  }
  testthat::expect_equal(length(clust_feats), length(unique(clust_feats)))
  testthat::expect_equal(length(clust_feats), length(intersect(clust_feats,
                                                               1:11)))
  # train_inds
  testthat::expect_identical(res$train_inds, integer())

  ## Trying other inputs
  
  # X as a data.frame
  X_df <- datasets::mtcars
  res_fitfun <- css(X=X_df, y=stats::rnorm(nrow(X_df)), lambda=0.01, B = 10)
  testthat::expect_identical(class(res_fitfun), "cssr")
  
  # X as a dataframe with factors (number of columns of final design matrix
  # after one-hot encoding factors won't match number of columns of df2)
  # cyl, gear, and carb are factors with more than 2 levels
  df2 <- X_df
  df2$cyl <- as.factor(df2$cyl)
  df2$vs <- as.factor(df2$vs)
  df2$am <- as.factor(df2$am)
  df2$gear <- as.factor(df2$gear)
  df2$carb <- as.factor(df2$carb)
  
  res_fitfun <- css(X=df2, y=stats::rnorm(nrow(X_df)), lambda=0.01, B = 10)
  testthat::expect_identical(class(res_fitfun), "cssr")
  
  # Should get error if I try to use clusters in this data.frame that contains
  # factors with more than two levels
  testthat::expect_error(css(X=df2, y=stats::rnorm(nrow(X_df)), lambda=0.01,
                             B = 10, clusters=1:3), "When stats::model.matrix converted the provided data.frame X to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert X to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.",
                         fixed=TRUE)
  
  # X as a matrix with column names
  x2 <- x
  colnames(x2) <- LETTERS[1:11]
  res_names <- css(X=x2, y=y, lambda=0.01, clusters=good_clusters, B = 13)
  testthat::expect_identical(class(res_names), "cssr")
  testthat::expect_identical(colnames(x2), colnames(res_names$X))
  testthat::expect_identical(colnames(x2), colnames(res_names$feat_sel_mat))

  # Custom fitfun with nonsense lambda (which will be ignored by fitfun, and
  # shouldn't throw any error, because the acceptable input for lambda should be
  # enforced only by fitfun)

  testFitfun <- function(X, y, lambda){
    p <- ncol(X)
    stopifnot(p >= 2)
    # Choose p/2 features randomly
    selected <- sample.int(p, size=floor(p/2))
    return(selected)
  }

  res_fitfun <- css(X=x, y=y, lambda=c("foo", as.character(NA), "bar"),
                    clusters=1:3, B = 10, fitfun=testFitfun)
  testthat::expect_identical(class(res_fitfun), "cssr")

  # Bad lambda
  testthat::expect_error(css(X=x, y=y, lambda=-0.01, B = 10),
                         "For method cssLasso, lambda must be nonnegative.",
                         fixed=TRUE)

  testthat::expect_error(css(X=x, y=y, lambda="foo", B = 10),
                         "For method cssLasso, lambda must be a numeric.",
                         fixed=TRUE)

  # Single cluster
  res_sing_clust <- css(X=x, y=y, lambda=0.01, clusters=1:3, B = 10)
  testthat::expect_identical(class(res_sing_clust), "cssr")
  testthat::expect_equal(length(res_sing_clust$clusters), 11 - 3 + 1)
  testthat::expect_true(length(unique(names(res_sing_clust$clusters))) == 11 -
                          3 + 1)
  testthat::expect_true(all(!is.na(names(res_sing_clust$clusters))))
  testthat::expect_true(all(!is.null(names(res_sing_clust$clusters))))

  # No cluster
  testthat::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10)), "cssr")
  
  # All clusters named
  testthat::expect_identical(class(css(X=x, y=y, clusters=list("a"=1:5,
                                                               "b"=6:10,
                                                               "c"=11),
                                       lambda=0.01, B=10)), "cssr")

  # Other sampling types
  testthat::expect_error(css(X=x, y=y, lambda=1, sampling_type="MB"),
                         "sampling_type MB is not yet supported (and isn't recommended anyway)",
                         fixed=TRUE)

  # Error has quotation marks in it
  testthat::expect_error(css(X=x, y=y, lambda=1, sampling_type="S"))

  testthat::expect_error(css(X=x, y=y, lambda=1, sampling_type=1),
                         "is.character(sampling_type) is not TRUE",
                         fixed=TRUE)

  # B
  testthat::expect_warning(css(X=x, y=y, lambda=1, B=5),
                           "Small values of B may lead to poor results.",
                           fixed=TRUE)

  testthat::expect_error(css(X=x, y=y, lambda=1, B=list(10)),
                           "is.numeric(B) | is.integer(B) is not TRUE",
                           fixed=TRUE)
  
  # Clusters
  testthat::expect_error(css(X=x, y=y, lambda=1, clusters="red"),
                           "is.numeric(clusters) | is.integer(clusters) is not TRUE",
                           fixed=TRUE)

  # prop_feats_remove
  testthat::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10,
                                       prop_feats_remove=0.3)), "cssr")
  # Weirdly high, but still valid, value of prop_feats_remove
  testthat::expect_identical(class(css(X=x, y=y, lambda=0.01, B = 10,
                                       prop_feats_remove=0.9999999999)), "cssr")
  
  # Use train_inds argument
  res_train <- css(X=x, y=y, lambda=0.01, B = 10, train_inds=11:15)
  testthat::expect_equal(res_train$train_inds, 11:15)

})

testthat::test_that("duplicate clusters are removed to match the docs (#156)", {
  set.seed(9156)

  x <- matrix(stats::rnorm(15*11), nrow=15, ncol=11)
  y <- stats::rnorm(15)

  # (i) Unnamed duplicate cluster: css returns the same result as the
  # deduplicated input (css is stochastic, so seed both calls identically).
  set.seed(41)
  res_dup <- css(X=x, y=y, lambda=0.01, clusters=list(1:2, 1:2),
    fitfun=cssLasso, sampling_type="SS", B=13, prop_feats_remove=0,
    train_inds=integer(), num_cores=1L)
  set.seed(41)
  res_dedup <- css(X=x, y=y, lambda=0.01, clusters=list(1:2),
    fitfun=cssLasso, sampling_type="SS", B=13, prop_feats_remove=0,
    train_inds=integer(), num_cores=1L)
  testthat::expect_identical(res_dup, res_dedup)

  # (ii) First name wins, DIRECT helper path (checkFormatClustersInput, finding
  # 2): the length check now runs before the removal, so a named duplicate keeps
  # the first occurrence and its name.
  testthat::expect_identical(
    checkFormatClustersInput(list(a=2:3, b=2:3), p=10, clust_names=c("a", "b"),
                             get_prototypes=FALSE, x=NA, y=NA, R=NA),
    list(a=2:3))

  # (ii) First name wins, END-TO-END css path (checkCssInputs, finding 3): a
  # named duplicate runs without error and keeps the first occurrence's name.
  res_named <- css(X=x, y=y, lambda=0.01, clusters=list(myclust=2:3, dup=2:3),
    fitfun=cssLasso, sampling_type="SS", B=13, prop_feats_remove=0,
    train_inds=integer(), num_cores=1L)
  testthat::expect_true("myclust" %in% names(res_named$clusters))
  testthat::expect_false("dup" %in% names(res_named$clusters))

  # (iii) cssSelect and cssPredict route through css -> checkCssInputs, so they
  # inherit the fix: a named duplicate cluster runs without error.
  sel <- cssSelect(X=x, y=y, clusters=list(myclust=2:3, dup=2:3))
  testthat::expect_identical(names(sel), c("selected_clusts", "selected_feats",
                                           "weights"))

  test_x <- matrix(stats::rnorm(5*11), nrow=5, ncol=11)
  preds <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                      clusters=list(myclust=2:3, dup=2:3))
  testthat::expect_true(is.numeric(preds))
  testthat::expect_equal(length(preds), 5)

})

testthat::test_that("css errors on an X containing NA (#74)", {
  set.seed(8642)
  X <- matrix(stats::rnorm(20 * 4), nrow = 20, ncol = 4)
  X[3, 2] <- NA
  y <- stats::rnorm(20)
  testthat::expect_error(
    css(X = X, y = y, lambda = 0.01),
    "must not contain missing",
    fixed = TRUE)
})

## Test passed with 57 successes.

## -- Warning: duplicate clusters are removed to match the docs (#156) ------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = list(myclust = 2:3, dup = 2:3))
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: duplicate clusters are removed to match the docs (#156) ------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = list(myclust = 2:3, dup = 2:3))
##  2.   \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: duplicate clusters are removed to match the docs (#156) ------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: duplicate clusters are removed to match the docs (#156) ------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## Test passed with 7 successes.

## Test passed with 1 success.

The fitfun extension point accepts any function whose arguments are exactly X, y, and lambda – in any order, with or without defaults – because cssLoop calls fitfun by keyword (do.call(fitfun, list(X=..., y=..., lambda=...))). A wrong or extra argument set is rejected with a message listing the arguments actually detected (issue #154).

testthat::test_that("css accepts any (X, y, lambda) fitfun signature and rejects wrong args (#154)", {
    set.seed(1)
    x <- matrix(stats::rnorm(60), 20, 3)
    y <- stats::rnorm(20)

    # (a) A default on lambda is accepted (rejected on the base by identical()).
    f_default <- function(X, y, lambda = 0.1) cssLasso(X, y, lambda)
    res_def <- css(x, y, lambda = 0.01, fitfun = f_default, B = 10)
    testthat::expect_identical(class(res_def), "cssr")

    # (b) Reordered args are accepted AND produce identical results
    #     (cssLoop calls fitfun by keyword, so order is irrelevant).
    f_reordered <- function(y, X, lambda) cssLasso(X, y, lambda)
    set.seed(7); res_re  <- css(x, y, lambda = 0.01, fitfun = f_reordered, B = 10)
    set.seed(7); res_ord <- css(x, y, lambda = 0.01, fitfun = cssLasso,    B = 10)
    testthat::expect_equal(res_re$feat_sel_mat, res_ord$feat_sel_mat)

    # (c) A wrong argument set is rejected, with the message listing the ACTUAL args.
    testthat::expect_error(
        css(x, y, lambda = 0.01, fitfun = function(X, y, z) 1L, B = 5),
        "must accept exactly the arguments X, y, and lambda (in any order, with or without defaults). Detected arguments to fitfun: X, y, z", fixed = TRUE)
    # (d) Extra args are rejected, with the new "exactly" wording.
    testthat::expect_error(
        css(x, y, lambda = 0.01, fitfun = function(X, y, lambda, extra) 1L, B = 5),
        "must accept exactly the arguments", fixed = TRUE)
})

## Test passed with 4 successes.

We also confirm the parallel feature-selection loop’s RNG is isolated per subsample (issue #12): cssLoop seeds the RNG immediately before fitfun, so a stochastic fitfun is reproducible and identical whether run serially or in parallel, given a seed set before css(). (The parallel checks require real forking, so they are guarded and skipped where forking is unavailable – Windows, CRAN, or a fully-loaded machine.)

testthat::test_that("cssLoop seeds fitfun reproducibly; css parallel RNG is isolated (#12)", {
  set.seed(99)
  x_rng <- matrix(stats::rnorm(40 * 8), nrow=40, ncol=8)
  y_rng <- stats::rnorm(40)

  # A stochastic fitfun: randomly selects 2 features (uses the RNG), so its
  # output depends on the seed in effect when it runs.
  rfit <- function(X, y, lambda){ sort(sample.int(ncol(X), 2)) }
  sub <- sort(sample.int(40L, 20L))

  # cssLoop's seed argument makes a stochastic fitfun reproducible: the same
  # seed gives the same selection. (On main, cssLoop has no seed argument, so
  # this behavior does not exist there.) This needs no parallelism.
  r1 <- cssLoop(input=sub, x=x_rng, y=y_rng, lambda=0.1, fitfun=rfit, seed=123L)
  r2 <- cssLoop(input=sub, x=x_rng, y=y_rng, lambda=0.1, fitfun=rfit, seed=123L)
  testthat::expect_identical(r1, r2)

  # ...and the seed genuinely drives the (stochastic) selection -- different
  # seeds give more than one distinct result (so the check above is not vacuous).
  outs <- lapply(1:8, function(s) cssLoop(input=sub, x=x_rng, y=y_rng,
                                          lambda=0.1, fitfun=rfit, seed=s))
  testthat::expect_true(length(unique(outs)) > 1)

  # End-to-end, serial css is reproducible across re-runs.
  set.seed(1)
  d1 <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=1L)$feat_sel_mat
  set.seed(1)
  d2 <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=1L)$feat_sel_mat
  testthat::expect_identical(d1, d2)

  # Parallel checks need real forking; run the parallel css calls defensively
  # and skip (rather than fail) when forking is unavailable -- Windows, CRAN, or
  # a fully-loaded machine. A genuine reproducibility regression would still
  # surface as a failed expectation below, not a skip.
  par_runs <- tryCatch({
    set.seed(1)
    a <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=2L)$feat_sel_mat
    set.seed(1)
    b <- css(x_rng, y_rng, lambda=0.1, fitfun=rfit, B=10L, num_cores=2L)$feat_sel_mat
    list(a=a, b=b)
  }, error=function(e) NULL)
  if(is.null(par_runs)){
    testthat::skip("parallel forking unavailable in this environment")
  }
  testthat::expect_identical(par_runs$a, par_runs$b)   # parallel reproducible
  testthat::expect_identical(par_runs$a, d1)           # serial == parallel
})

## Test passed with 5 successes.

We test the remaining num_cores robustness fixes (issue #155). parallel::detectCores() returns NA when the core count is indeterminable; that must not brick css() (#155c). We mock it and confirm the default num_cores = 1 still runs:

testthat::test_that("detectCores() = NA does not brick css (#155c)", {
  set.seed(72)
  data <- genClusteredData(n = 50, p = 11, k_unclustered = 2,
                           cluster_size = 4, n_clusters = 1, snr = 3)
  testthat::local_mocked_bindings(
    detectCores = function(...) NA_integer_, .package = "parallel")
  res <- css(X = data$X, y = data$y, lambda = 0.01, B = 10, num_cores = 1L)
  testthat::expect_s3_class(res, "cssr")
})

## Test passed with 1 success.

…but when the core count IS known, the upper-bound guard must still fire, so an over-large num_cores is still rejected (#155c):

testthat::test_that("num_cores beyond a known core count still errors (#155c)", {
  set.seed(72)
  data <- genClusteredData(n = 50, p = 11, k_unclustered = 2,
                           cluster_size = 4, n_clusters = 1, snr = 3)
  testthat::local_mocked_bindings(
    detectCores = function(...) 2L, .package = "parallel")
  testthat::expect_error(
    css(X = data$X, y = data$y, lambda = 0.01, B = 10, num_cores = 5L),
    "num_cores <= n_cores_avail is not TRUE", fixed = TRUE)
})

## Test passed with 1 success.

Finally (#155d), on Windows – where parallel::mclapply cannot fork – num_cores > 1 must downgrade to serial with a warning rather than hard-error. We mock the platform check so the branch is exercised off-Windows (it runs serially, so no real forking is needed):

testthat::test_that("num_cores > 1 on Windows warns and runs serially (#155d)", {
  # Mocking a package-internal function needs a package context: local_mocked_bindings
  # resolves the package via testthat::testing_package(), which is set under
  # devtools::test and R CMD check but is empty during the bare litr weave (the cssr
  # functions live in the knit global env with no namespace to mock). Skip there; the
  # Windows branch is still exercised under devtools::test / R CMD check.
  if(testthat::testing_package() == ""){
    testthat::skip("runningOnWindows mock needs a package context (unavailable during the litr weave)")
  }
  set.seed(84)
  data <- genClusteredData(n = 50, p = 11, k_unclustered = 2,
                           cluster_size = 4, n_clusters = 1, snr = 3)
  testthat::local_mocked_bindings(runningOnWindows = function() TRUE)
  testthat::expect_warning(
    res <- css(X = data$X, y = data$y, lambda = 0.01, B = 10, num_cores = 2L),
    "not supported on Windows", fixed = TRUE)
  testthat::expect_s3_class(res, "cssr")
})

## -- Skip: num_cores > 1 on Windows warns and runs serially (#155d) --------------
## Reason: runningOnWindows mock needs a package context (unavailable during the litr weave)

We test the highly-discrete-response diagnostic (issue #131): the @noRd helper discreteYAbortProb() that estimates the abort probability, and the up-front css() warning it drives (which fires only for the default cssLasso, and only when the estimated probability exceeds 1%).

testthat::test_that("discreteYAbortProb estimates the constant-subsample probability (#131)", {
  # Continuous y: every value is distinct, so no size-floor(n/2) subsample can
  # be constant. The aggregate underflows to exactly 0 -- assert the inequality,
  # NOT expect_equal to a tiny value.
  set.seed(131)
  y_cont <- stats::rnorm(100)
  testthat::expect_true(discreteYAbortProb(y_cont, 50L, "SS") < 1e-6)

  # Balanced binary at n = 100: each value's count (50) equals m = 50, so
  # p_const = 2 * C(50,50)/C(100,50) ~ 2e-29 and the aggregate underflows to 0.
  # This is the KEY false positive that the naive max(table(y)) >= floor(n/2)
  # trigger gets wrong (there count == m would fire).
  y_bin <- rep(0:1, each = 50)
  testthat::expect_true(discreteYAbortProb(y_bin, 50L, "SS") < 1e-6)

  # Skewed y (95/5): 0 is a strong majority (count 95 >> m = 50), so some
  # subsample is very likely all-zeros (~94% at B = 50, SS).
  y_skew <- c(rep(0, 95), rep(1, 5))
  testthat::expect_true(discreteYAbortProb(y_skew, 50L, "SS") > 0.5)

  # Fully constant y: every subsample is constant -> probability exactly 1.
  testthat::expect_equal(discreteYAbortProb(rep(0, 100), 50L, "SS"), 1)

  # Monotone in n_sub: "SS" draws 2*B subsamples, a non-"SS" string draws B, so
  # the SS probability is at least the B-count probability for the same y. (Pure
  # helper call exercising the n_sub branch; not a css() run.)
  testthat::expect_true(
    discreteYAbortProb(y_skew, 50L, "SS") >=
      discreteYAbortProb(y_skew, 50L, "MB"))
})

testthat::test_that("css() warns up front on a highly discrete y for the default cssLasso (#131)", {
  set.seed(2718)
  n <- 40L
  x_disc <- matrix(stats::rnorm(n * 6), nrow = n, ncol = 6)
  # 0 is a strong majority (count 38 >> floor(40/2) = 20), so the estimated
  # abort probability is ~99% -- well over the 1% threshold. y has 3 unique
  # values, so it passes input checks and reaches the warning.
  y_disc <- c(rep(0, 38), 1, 2)

  # (a) discrete y + default cssLasso -> the up-front warning fires. Such a run
  # also aborts later in the loop (a constant-y subsample), so wrap the call in
  # try(): the warning is computed before any subsampling/RNG and fires either
  # way (B = 10L so the small-B checkB warning does not also fire).
  testthat::expect_warning(
    try(css(X = x_disc, y = y_disc, lambda = 0.01, B = 10L), silent = TRUE),
    "highly discrete", fixed = TRUE)

  # The warning message reports a plausible percentage and names cssLasso.
  disc_warns <- testthat::capture_warnings(
    try(css(X = x_disc, y = y_disc, lambda = 0.01, B = 10L), silent = TRUE))
  disc_msg <- disc_warns[grepl("highly discrete", disc_warns, fixed = TRUE)]
  testthat::expect_length(disc_msg, 1L)
  testthat::expect_match(disc_msg, "%", fixed = TRUE)
  testthat::expect_match(disc_msg, "cssLasso", fixed = TRUE)

  # (b) continuous y -> no warning at all (a clean, complete run).
  set.seed(2719)
  x_cont <- matrix(stats::rnorm(n * 6), nrow = n, ncol = 6)
  testthat::expect_no_warning(
    css(X = x_cont, y = stats::rnorm(n), lambda = 0.01, B = 10L))

  # (c) balanced binary at n = 100 -> NO "highly discrete" warning (the KEY
  # false-positive guard). At small n balanced binary can legitimately warn, so
  # keep n large. Capture all warnings and assert none match.
  set.seed(2720)
  x_bin <- matrix(stats::rnorm(100 * 6), nrow = 100, ncol = 6)
  y_bin <- rep(0:1, each = 50)
  bin_warns <- testthat::capture_warnings(
    css(X = x_bin, y = y_bin, lambda = 0.01, B = 10L))
  testthat::expect_false(any(grepl("highly discrete", bin_warns, fixed = TRUE)))

  # (d) custom fitfun with the SAME discrete y -> NO warning (proves the
  # identical(fitfun, cssLasso) gate). The stub ignores y (so it tolerates a
  # constant-y subsample) and returns a fixed, valid selected set.
  tol_fitfun <- function(X, y, lambda){ c(1L, 2L) }
  testthat::expect_no_warning(
    css(X = x_disc, y = y_disc, lambda = 0.01, B = 10L, fitfun = tol_fitfun))

  # (e) non-finite numeric y (majority Inf) + default cssLasso -> NO "highly
  # discrete" warning (proves the all(is.finite(sel_y)) guard). Without the
  # guard, table() would miscount the 38 Inf cells as a dominating value and
  # warn (~100%); with it, the diagnostic is skipped and cssLasso instead aborts
  # on the non-finite value (checkCssLassoInputs). Every size-floor(40/2) = 20
  # subsample must include an Inf (only 2 finite cells), so the abort is
  # deterministic; wrap the warning-capture call in try().
  y_nonfin <- c(rep(Inf, 38), 0, 1)
  nonfin_warns <- testthat::capture_warnings(
    try(css(X = x_disc, y = y_nonfin, lambda = 0.01, B = 10L), silent = TRUE))
  testthat::expect_false(any(grepl("highly discrete", nonfin_warns, fixed = TRUE)))
  # ...and cssLasso still rejects the non-finite y (no warning to suppress now).
  testthat::expect_error(
    css(X = x_disc, y = y_nonfin, lambda = 0.01, B = 10L),
    "non-finite", fixed = TRUE)
})

## Test passed with 5 successes.

## Test passed with 9 successes.

8 Tests for the core css() functions

8 Tests for the core `css()` functions