10 Tests for other useful functions

Tests for checkGenClusteredDataInputs()

testthat::test_that("checkGenClusteredDataInputs works", {
  set.seed(7612)

  # Should get no error
  checkGenClusteredDataInputs(p=19, k_unclustered=2, cluster_size=5, n_clusters=3,
                           sig_clusters=2, rho=.8, beta_latent=1.5,
                           beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  
  checkGenClusteredDataInputs(p=19, k_unclustered=2, cluster_size=5, n_clusters=3,
                           sig_clusters=2, rho=.8, beta_latent=1.5,
                           beta_unclustered=-2, snr=1, sigma_eps_sq=NA)
  
  # sig_clusters
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters="2", rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "is.numeric(sig_clusters) | is.integer(sig_clusters) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=4, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "sig_clusters <= n_clusters is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=-1, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "sig_clusters >= 0 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=.6, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "sig_clusters == round(sig_clusters) is not TRUE",
                         fixed=TRUE)
  
  
  # n_clusters
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5,
                                                  n_clusters="3",
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "is.numeric(n_clusters) | is.integer(n_clusters) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3.2,
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "n_clusters == round(n_clusters) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=0,
                                                  sig_clusters=0, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "n_clusters >= 1 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=1, n_clusters=3,
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "cluster_size >= 2 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=16, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "p >= n_clusters * cluster_size + k_unclustered is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "rho > 0 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=0,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "beta_latent != 0 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=0, snr=1,
                                                  sigma_eps_sq=NA),
                         "beta_unclustered != 0 is not TRUE", fixed=TRUE)
  
  # k_unclustered
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered="2",
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "is.numeric(k_unclustered) | is.integer(k_unclustered) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=0,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "k_unclustered >= 1 is not TRUE", fixed=TRUE)

  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2.2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "k_unclustered == round(k_unclustered) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=NA,
                                                  sigma_eps_sq=NA),
                         "Must specify one of snr or sigma_eps_sq", fixed=TRUE)

  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=-.2,
                                                  sigma_eps_sq=NA),
                         "snr > 0 is not TRUE", fixed=TRUE)

  testthat::expect_error(checkGenClusteredDataInputs(p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=NA,
                                                  sigma_eps_sq=-.3),
                         "sigma_eps_sq >= 0 is not TRUE", fixed=TRUE)
  
})

## Test passed with 18 successes.

Tests for getNoiseVar():

testthat::test_that("getNoiseVar works", {
  # getNoiseVar(cor) returns the noise variance v such that a proxy Z + N(0, v)
  # has correlation cor with Z, i.e. v = 1/cor^2 - 1 (vectorized; cor in (0, 1]).
  testthat::expect_equal(getNoiseVar(1), 0)
  testthat::expect_equal(getNoiseVar(0.5), 3)
  testthat::expect_equal(getNoiseVar(0.9), 1/0.81 - 1)
  testthat::expect_equal(getNoiseVar(c(0.5, 1, 0.9)), c(3, 0, 1/0.81 - 1))
  # The defining property: a proxy with this noise variance has correlation cor.
  testthat::expect_equal(1/sqrt(1 + getNoiseVar(0.8)), 0.8)

  # cor must be numeric, non-NA, non-empty, and in (0, 1] (checks are vectorized).
  testthat::expect_error(getNoiseVar(0), "all(cor > 0) is not TRUE", fixed=TRUE)
  testthat::expect_error(getNoiseVar(-0.5), "all(cor > 0) is not TRUE",
    fixed=TRUE)
  testthat::expect_error(getNoiseVar(c(0.5, 0)), "all(cor > 0) is not TRUE",
    fixed=TRUE)
  testthat::expect_error(getNoiseVar(1.5), "all(cor <= 1) is not TRUE",
    fixed=TRUE)
  testthat::expect_error(getNoiseVar(NA_real_), "all(!is.na(cor)) is not TRUE",
    fixed=TRUE)
  testthat::expect_error(getNoiseVar("0.5"),
    "is.numeric(cor) | is.integer(cor) is not TRUE", fixed=TRUE)
  testthat::expect_error(getNoiseVar(numeric(0)), "length(cor) >= 1 is not TRUE",
    fixed=TRUE)
})

## Test passed with 12 successes.

Finally, tests for genClusteredData()

testthat::test_that("genClusteredData works", {
  set.seed(23478)

  ret <- genClusteredData(n=25, p=19, k_unclustered=2, cluster_size=5,
                          n_clusters=3, sig_clusters=2, rho=.99,
                          beta_latent=1.5, beta_unclustered=-2, snr=NA,
                          sigma_eps_sq=.5)
  
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  
  testthat::expect_true(is.numeric(ret$X))
  testthat::expect_true(is.matrix(ret$X))
  testthat::expect_equal(ncol(ret$X), 19)
  testthat::expect_equal(nrow(ret$X), 25)
  # X is Gaussian with mean 0 and variance 4; expect all observations to lie
  # within 5 standard deviations of mean
  testthat::expect_true(all(abs(ret$X) < 5*2))
  # Test that clusters are correlated--within-cluster correlation should be
  # high, correlation with other features should be low
  testthat::expect_true(min(cor(ret$X[, 1:5])) > .9)
  testthat::expect_true(max(abs(cor(ret$X[, 1:5], ret$X[, 6:19]))) < .6)
  
  testthat::expect_true(min(cor(ret$X[, 6:10])) > .9)
  testthat::expect_true(max(abs(cor(ret$X[, 6:10],
                                    ret$X[, c(1:5, 11:19)]))) < .6)
  
  testthat::expect_true(min(cor(ret$X[, 11:15])) > .9)
  testthat::expect_true(max(abs(cor(ret$X[, 11:15],
                                    ret$X[, c(1:10, 16:19)]))) < .6)

  cor_indeps <- cor(ret$X[, 16:19])
  testthat::expect_true(max(abs(cor_indeps[lower.tri(cor_indeps)])) < .6)

  testthat::expect_true(is.numeric(ret$y))
  testthat::expect_equal(length(ret$y), 25)

  testthat::expect_true(is.numeric(ret$Z))
  testthat::expect_true(is.matrix(ret$Z))
  testthat::expect_equal(nrow(ret$Z), 25)
  testthat::expect_equal(ncol(ret$Z), 3)

  testthat::expect_true(is.numeric(ret$mu))
  testthat::expect_equal(length(ret$mu), 25)
  # Because y is Gaussian with mean mu and standard deviation .5 conditional on
  # mu, expect all observations to lie within 5 sds of mu
  testthat::expect_true(all(abs(ret$y - ret$mu) < 5*.5))

  # Specify SNR instead of sigma_eps_sq
  ret <- genClusteredData(n=5, p=19, k_unclustered=2, cluster_size=5,
                          n_clusters=3, sig_clusters=2, rho=.8, beta_latent=1.5,
                          beta_unclustered=-2, snr=1, sigma_eps_sq=NA)
  
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  
  # Specifying both snr and sigma_eps_sq is an error (issue #13): only one of
  # the two may be given.
  testthat::expect_error(
    genClusteredData(n=5, p=19, k_unclustered=2, cluster_size=5, n_clusters=3,
                     sig_clusters=2, rho=.8, beta_latent=1.5,
                     beta_unclustered=-2, snr=.01, sigma_eps_sq=.25),
    "Only one of snr and sigma_eps_sq may be specified", fixed=TRUE)
  
  # Try a single latent variable (z should be a one-column matrix)
  ret <- genClusteredData(n=5, p=19, k_unclustered=2, cluster_size=5,
                          n_clusters=1, sig_clusters=1, rho=.8, beta_latent=1.5,
                          beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  testthat::expect_true(is.numeric(ret$Z))
  testthat::expect_true(is.matrix(ret$Z))
  testthat::expect_equal(nrow(ret$Z), 5)
  testthat::expect_equal(ncol(ret$Z), 1)
  
  # Bad inputs
  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                          cluster_size=5, n_clusters=3,
                                          sig_clusters="2", rho=.8,
                                          beta_latent=1.5, beta_unclustered=-2,
                                          snr=1, sigma_eps_sq=NA),
                         "is.numeric(sig_clusters) | is.integer(sig_clusters) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                          cluster_size=5, n_clusters=3,
                                          sig_clusters=4, rho=.8,
                                          beta_latent=1.5, beta_unclustered=-2,
                                          snr=1, sigma_eps_sq=NA),
                         "sig_clusters <= n_clusters is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                          cluster_size=5, n_clusters=3,
                                          sig_clusters=-1, rho=.8,
                                          beta_latent=1.5, beta_unclustered=-2,
                                          snr=1, sigma_eps_sq=NA),
                         "sig_clusters >= 0 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=.6, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "sig_clusters == round(sig_clusters) is not TRUE",
                         fixed=TRUE)


  # n_clusters
  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5,
                                                  n_clusters="3",
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "is.numeric(n_clusters) | is.integer(n_clusters) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3.2,
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "n_clusters == round(n_clusters) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=0,
                                                  sig_clusters=0, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "n_clusters >= 1 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=1, n_clusters=3,
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "cluster_size >= 2 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(p=16, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "p >= n_clusters * cluster_size + k_unclustered is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "rho > 0 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=0,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "beta_latent != 0 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=0, snr=1,
                                                  sigma_eps_sq=NA),
                         "beta_unclustered != 0 is not TRUE", fixed=TRUE)

  # k_unclustered
  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered="2",
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "is.numeric(k_unclustered) | is.integer(k_unclustered) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=0,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "k_unclustered >= 1 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2.2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=1,
                                                  sigma_eps_sq=NA),
                         "k_unclustered == round(k_unclustered) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=NA,
                                                  sigma_eps_sq=NA),
                         "Must specify one of snr or sigma_eps_sq", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=-.2,
                                                  sigma_eps_sq=NA),
                         "snr > 0 is not TRUE", fixed=TRUE)

  testthat::expect_error(genClusteredData(n=5, p=19, k_unclustered=2,
                                                  cluster_size=5, n_clusters=3,
                                                  sig_clusters=2, rho=0.8,
                                                  beta_latent=1.5,
                                                  beta_unclustered=-2, snr=NA,
                                                  sigma_eps_sq=-.3),
                         "sigma_eps_sq >= 0 is not TRUE", fixed=TRUE)

  # sigma_eps_sq = 0 is allowed (issue #13) and yields noiseless data (y == mu)
  ret <- genClusteredData(n=5, p=19, k_unclustered=2, cluster_size=5,
                          n_clusters=3, sig_clusters=2, rho=.8, beta_latent=1.5,
                          beta_unclustered=-2, snr=NA, sigma_eps_sq=0)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ret$y, ret$mu)

  # k_unclustered = 1 is allowed (issue #13)
  ret <- genClusteredData(n=5, p=19, k_unclustered=1, cluster_size=5,
                          n_clusters=3, sig_clusters=2, rho=.8, beta_latent=1.5,
                          beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(nrow(ret$X), 5)
  testthat::expect_equal(ncol(ret$X), 19)

})

## Test passed with 57 successes.

Tests for genClusteredDataWeighted()

testthat::test_that("genClusteredDataWeighted works", {
  set.seed(23478)

  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_strong_cluster_vars=3, n_clusters=3,
                                  sig_clusters=2, rho_high=.99, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=.5)

  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  testthat::expect_true(is.numeric(ret$X))
  testthat::expect_true(is.matrix(ret$X))
  testthat::expect_equal(ncol(ret$X), 19)
  testthat::expect_equal(nrow(ret$X), 25)
  # X is Gaussian with mean 0 and variance 4; expect all observations to lie
  # within 5 standard deviations of mean
  testthat::expect_true(all(abs(ret$X) < 5*2))
  # Test that clusters are correlated--within-cluster correlation should be
  # high, correlation with other features should be low
  testthat::expect_true(min(cor(ret$X[, 1:3])) > .9)
  testthat::expect_true(max(abs(cor(ret$X[, 1:5], ret$X[, 6:19]))) < .6)

  testthat::expect_true(min(cor(ret$X[, 6:8])) > .9)
  testthat::expect_true(max(abs(cor(ret$X[, 6:10],
                                    ret$X[, c(1:5, 11:19)]))) < .6)

  testthat::expect_true(min(cor(ret$X[, 11:13])) > .9)
  testthat::expect_true(max(abs(cor(ret$X[, 11:15],
                                    ret$X[, c(1:10, 16:19)]))) < .7)

  cor_indeps <- cor(ret$X[, 16:19])
  testthat::expect_true(max(abs(cor_indeps[lower.tri(cor_indeps)])) < .6)

  testthat::expect_true(is.numeric(ret$y))
  testthat::expect_equal(length(ret$y), 25)

  testthat::expect_true(is.numeric(ret$Z))
  testthat::expect_true(is.matrix(ret$Z))
  testthat::expect_equal(nrow(ret$Z), 25)
  testthat::expect_equal(ncol(ret$Z), 3)

  testthat::expect_true(is.numeric(ret$mu))
  testthat::expect_equal(length(ret$mu), 25)
  # Because y is Gaussian with mean mu and standard deviation .5 conditional on
  # mu, expect all observations to lie within 5 sds of mu
  testthat::expect_true(all(abs(ret$y - ret$mu) < 5*.5))

  # Specify SNR instead of sigma_eps_sq
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_strong_cluster_vars=3, n_clusters=3,
                                  sig_clusters=2, rho_high=.99, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=1, sigma_eps_sq=NA)

  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  # Specifying both snr and sigma_eps_sq is an error (issue #13): only one of
  # the two may be given.
  testthat::expect_error(
    genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
                             n_strong_cluster_vars=3, n_clusters=3,
                             sig_clusters=2, rho_high=.99, rho_low=.5,
                             beta_latent=1.5, beta_unclustered=-2,
                             snr=.01, sigma_eps_sq=.25),
    "Only one of snr and sigma_eps_sq may be specified", fixed=TRUE)

  # Try a single latent variable (z should be a one-column matrix)
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_strong_cluster_vars=3, n_clusters=1,
                                  sig_clusters=1, rho_high=.99, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=.5)

  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  testthat::expect_true(is.numeric(ret$Z))
  testthat::expect_true(is.matrix(ret$Z))
  testthat::expect_equal(nrow(ret$Z), 25)
  testthat::expect_equal(ncol(ret$Z), 1)

  # sigma_eps_sq = 0 is allowed (issue #13) and yields noiseless data (y == mu)
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_strong_cluster_vars=3, n_clusters=3,
                                  sig_clusters=2, rho_high=.99, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=0)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ret$y, ret$mu)

  # k_unclustered = 1 is allowed (issue #13)
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=1, cluster_size=5,
                                  n_strong_cluster_vars=3, n_clusters=3,
                                  sig_clusters=2, rho_high=.99, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=.5)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ncol(ret$X), 19)

})

## Test passed with 38 successes.

Tests for genClusteredDataWeightedRandom()

testthat::test_that("genClusteredDataWeightedRandom works", {
  set.seed(23478)

  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_clusters=3,
                                  sig_clusters=2, rho_high=1, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=.5)

  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  testthat::expect_true(is.numeric(ret$X))
  testthat::expect_true(is.matrix(ret$X))
  testthat::expect_equal(ncol(ret$X), 19)
  testthat::expect_equal(nrow(ret$X), 25)
  # X is Gaussian with mean 0 and variance 4; expect all observations to lie
  # within 5 standard deviations of mean
  testthat::expect_true(all(abs(ret$X) < 5*2))
  # Test that clusters are correlated--within-cluster correlation should be
  # high, correlation with other features should be low
  testthat::expect_true(min(cor(ret$X[, 1:3])) > .2)
  testthat::expect_true(max(abs(cor(ret$X[, 1:5], ret$X[, 6:19]))) < .6)

  testthat::expect_true(min(cor(ret$X[, 6:8])) > .2)
  testthat::expect_true(max(abs(cor(ret$X[, 6:10],
                                    ret$X[, c(1:5, 11:19)]))) < .6)

  testthat::expect_true(min(cor(ret$X[, 11:13])) > .2)
  testthat::expect_true(max(abs(cor(ret$X[, 11:15],
                                    ret$X[, c(1:10, 16:19)]))) < .7)

  cor_indeps <- cor(ret$X[, 16:19])
  testthat::expect_true(max(abs(cor_indeps[lower.tri(cor_indeps)])) < .6)

  testthat::expect_true(is.numeric(ret$y))
  testthat::expect_equal(length(ret$y), 25)

  testthat::expect_true(is.numeric(ret$Z))
  testthat::expect_true(is.matrix(ret$Z))
  testthat::expect_equal(nrow(ret$Z), 25)
  testthat::expect_equal(ncol(ret$Z), 3)

  testthat::expect_true(is.numeric(ret$mu))
  testthat::expect_equal(length(ret$mu), 25)
  # Because y is Gaussian with mean mu and standard deviation .5 conditional on
  # mu, expect all observations to lie within 5 sds of mu
  testthat::expect_true(all(abs(ret$y - ret$mu) < 5*.5))

  # Specify SNR instead of sigma_eps_sq
  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_clusters=3,
                                  sig_clusters=2, rho_high=1, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=1, sigma_eps_sq=NA)

  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  # Specifying both snr and sigma_eps_sq is an error (issue #13): only one of
  # the two may be given.
  testthat::expect_error(
    genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
                                   n_clusters=3,
                                   sig_clusters=2, rho_high=.99, rho_low=.5,
                                   beta_latent=1.5, beta_unclustered=-2,
                                   snr=.01, sigma_eps_sq=.25),
    "Only one of snr and sigma_eps_sq may be specified", fixed=TRUE)

  # Try a single latent variable (z should be a one-column matrix)
  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_clusters=1,
                                  sig_clusters=1, rho_high=1, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=.5)

  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))

  testthat::expect_true(is.numeric(ret$Z))
  testthat::expect_true(is.matrix(ret$Z))
  testthat::expect_equal(nrow(ret$Z), 25)
  testthat::expect_equal(ncol(ret$Z), 1)

  # sigma_eps_sq = 0 is allowed (issue #13) and yields noiseless data (y == mu)
  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
                                  n_clusters=3,
                                  sig_clusters=2, rho_high=1, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=0)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ret$y, ret$mu)

  # k_unclustered = 1 is allowed (issue #13)
  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=1, cluster_size=5,
                                  n_clusters=3,
                                  sig_clusters=2, rho_high=1, rho_low=.5,
                                  beta_latent=1.5, beta_unclustered=-2,
                                  snr=NA, sigma_eps_sq=.5)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ncol(ret$X), 19)

})

## Test passed with 38 successes.

Finally, characterization snapshots that pin the exact generated output of all three genClusteredData* generators. The “… works” tests above only check distributional properties (correlations, ranges), not the exact RNG draw order (which differs across the three: bulk rnorm vs per-column vs random per column). These snapshots make the #16 consolidation of the generators and their validators provably behavior-preserving; regenerate only if the data-generating algorithm intentionally changes.

# Characterization snapshots: pin the EXACT generated values (X / y / Z / mu)
# under fixed seeds so the #16 consolidation of the genClusteredData* functions
# and their validators is provably behavior-preserving. The existing "... works"
# tests only check distributional properties (correlations, ranges), NOT the
# exact RNG draw order, which differs across the three generators (bulk vs
# per-column vs random-per-column). Regenerate ONLY if the data-generating
# algorithm intentionally changes.

testthat::test_that("genClusteredData output is byte-stable (characterization snapshot, #16)", {
  set.seed(23478)
  ret <- genClusteredData(n=25, p=19, k_unclustered=2, cluster_size=5,
    n_clusters=3, sig_clusters=2, rho=.99, beta_latent=1.5, beta_unclustered=-2,
    snr=NA, sigma_eps_sq=.5)
  testthat::expect_equal(ret$X[1:4, 1:6], structure(c(-1.02641989570528,
    -0.113749750828501, -0.0540798505887575, -0.719018251689331,
    -1.02002920513898, -0.133167075802704, -0.228504275273568,
    -0.594853445315136, -1.3743321671407, -0.691923420713347, -0.11653157345392,
    -0.949501763500117, -1.09911307059764, -0.234373142554013,
    -0.0610136281766905, -0.614831158775111, -0.875588846521461,
    -0.267096146835881, 0.243882464912086, -0.525573959766865, 0.38889466127034,
    -0.196999108158916, -0.548580993298435, 1.29708342622908), dim = c(4L, 6L)))
  testthat::expect_equal(ret$y[1:4], c(1.78818155412284, 2.55825580753328,
    0.0881535457958548, 1.21771079504409))
  testthat::expect_equal(ret$Z[1:4, , drop = FALSE], structure(c(-1.13051559031852,
    -0.338373896594486, 0.00224652036037424, -0.638671932314538,
    0.402628944106856, -0.0568963388614887, -0.533358544824988, 1.23055042537552,
    0.495888208134907, 2.19477907424054, 0.357725198858587, -0.863633909554439),
    dim = 4:3))
  testthat::expect_equal(ret$mu[1:4], c(1.12211695300006, 3.29507281236772,
    -0.106753094432726, 1.52394871772745))
  testthat::expect_equal(
    c(sumX=sum(ret$X), sumY=sum(ret$y), sumZ=sum(ret$Z), sumMu=sum(ret$mu)),
    c(sumX = 7.85857128981734, sumY = 13.5810460445817, sumZ = 3.15823002771053,
      sumMu = 9.93005749962466))

  # single latent variable (n_clusters = 1): Z is a one-column matrix, and the
  # proxy block adds bulk noise to the raw vector Z.
  set.seed(23478)
  ret <- genClusteredData(n=5, p=19, k_unclustered=2, cluster_size=5,
    n_clusters=1, sig_clusters=1, rho=.8, beta_latent=1.5, beta_unclustered=-2,
    snr=NA, sigma_eps_sq=.5)
  testthat::expect_equal(ret$X[1:4, 1:6], structure(c(-0.00161764234876749,
    -0.557794858227907, -0.827386446973581, -0.460377784885637, -2.67619233265038,
    0.671711827459298, 1.65236925524076, -0.600115109545631, -0.280741265470752,
    -1.67026187144442, -0.0473803806232191, 0.0826508004462609, 0.170200984778884,
    0.976190994120857, -0.499749723868518, -0.474295388720649, -1.17346153974724,
    -0.574655697546479, -0.120791507647279, -0.567678976454219, -1.3596277359555,
    -0.0619416164679036, 0.538496824968004, 0.51591446765952), dim = c(4L, 6L)))
  testthat::expect_equal(ret$y[1:4], c(-0.558336409902388, -2.73369761842788,
    -0.884087327686163, -3.17343267752372))
  testthat::expect_equal(ret$Z[1:4, , drop = FALSE], structure(c(-1.13051559031852,
    -0.338373896594486, 0.00224652036037424, -0.638671932314538),
    dim = c(4L, 1L)))
  testthat::expect_equal(ret$mu[1:4], c(0.195781398124046, -1.51661095607912,
    -0.722190912632797, -2.90119777610212))
  testthat::expect_equal(
    c(sumX=sum(ret$X), sumY=sum(ret$y), sumZ=sum(ret$Z), sumMu=sum(ret$mu)),
    c(sumX = -5.88971766152033, sumY = -7.36101068003228, sumZ = -2.4782435456402,
      sumMu = -5.08843939501818))
})

testthat::test_that("genClusteredDataWeighted output is byte-stable (characterization snapshot, #16)", {
  set.seed(23478)
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
    n_strong_cluster_vars=3, n_clusters=3, sig_clusters=2, rho_high=.99,
    rho_low=.5, beta_latent=1.5, beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  testthat::expect_equal(ret$X[1:4, 1:6], structure(c(-1.02641989570528,
    -0.113749750828501, -0.0540798505887575, -0.719018251689331,
    -1.02002920513898, -0.133167075802704, -0.228504275273568,
    -0.594853445315136, -1.3743321671407, -0.691923420713347, -0.11653157345392,
    -0.949501763500117, -0.748805376677026, 0.925796953277494, -0.766705940388196,
    -0.348877774329742, 1.96822115764394, 0.528035720534176, 2.93942827641455,
    0.736079254765787, 0.38889466127034, -0.196999108158916, -0.548580993298435,
    1.29708342622908), dim = c(4L, 6L)))
  testthat::expect_equal(ret$y[1:4], c(1.78818155412284, 2.55825580753328,
    0.0881535457958548, 1.21771079504409))
  testthat::expect_equal(ret$Z[1:4, , drop = FALSE], structure(c(-1.13051559031852,
    -0.338373896594486, 0.00224652036037424, -0.638671932314538,
    0.402628944106856, -0.0568963388614887, -0.533358544824988, 1.23055042537552,
    0.495888208134907, 2.19477907424054, 0.357725198858587, -0.863633909554439),
    dim = 4:3))
  testthat::expect_equal(ret$mu[1:4], c(1.12211695300006, 3.29507281236772,
    -0.106753094432726, 1.52394871772745))
  testthat::expect_equal(
    c(sumX=sum(ret$X), sumY=sum(ret$y), sumZ=sum(ret$Z), sumMu=sum(ret$mu)),
    c(sumX = 15.3829830134111, sumY = 13.5810460445817, sumZ = 3.15823002771053,
      sumMu = 9.93005749962466))

  # single latent variable (n_clusters = 1)
  set.seed(23478)
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
    n_strong_cluster_vars=3, n_clusters=1, sig_clusters=1, rho_high=.99,
    rho_low=.5, beta_latent=1.5, beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  testthat::expect_equal(ret$X[1:4, 1:6], structure(c(-1.07114417787482,
    -0.368013007285506, 0.014438919713251, -0.845503147886515, -1.23739163512048,
    -0.174811089926991, -0.0829364724064908, -0.479069797377207, -1.12138274604786,
    -0.402828108682171, 0.104291325515473, -0.844159398768831, 0.352430473911601,
    -1.4960796572646, -1.23544829374631, 0.148162490964714, -1.03021388828778,
    0.576416731940481, -0.513340292522171, -1.14008022670496, 0.402628944106856,
    -0.0568963388614887, -0.533358544824988, 1.23055042537552), dim = c(4L, 6L)))
  testthat::expect_equal(ret$y[1:4], c(-3.91149964768012, -4.73964429926795,
    0.513991031913566, -2.95972727928473))
  testthat::expect_equal(ret$Z[1:4, , drop = FALSE], structure(c(-1.13051559031852,
    -0.338373896594486, 0.00224652036037424, -0.638671932314538),
    dim = c(4L, 1L)))
  testthat::expect_equal(ret$mu[1:4], c(-3.20232310305676, -3.49765450037239,
    0.564187042362111, -2.19774596140566))
  testthat::expect_equal(
    c(sumX=sum(ret$X), sumY=sum(ret$y), sumZ=sum(ret$Z), sumMu=sum(ret$mu)),
    c(sumX = -24.0234160115409, sumY = -10.2198990332918, sumZ = 0.586596841562748,
      sumMu = -5.15679097838167))
})

testthat::test_that("genClusteredDataWeightedRandom output is byte-stable (characterization snapshot, #16)", {
  set.seed(23478)
  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2,
    cluster_size=5, n_clusters=3, sig_clusters=2, rho_high=1, rho_low=.5,
    beta_latent=1.5, beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  testthat::expect_equal(ret$X[1:4, 1:6], structure(c(-1.1897691657824,
    -0.00982041549935259, 0.665172439447775, -0.756543570417209, 1.15949678203449,
    -2.91344510959381, 0.491240389956153, -1.30476423524191, -0.485131132932798,
    0.707045727376943, 3.2222522503854, -3.27350445562509, -1.85428176680484,
    -0.0656090373877866, -0.551130785327929, -1.90090513408521, -1.29616909361208,
    -0.144918159176611, 0.544180897409365, -0.50148178532334, 0.627200376817289,
    0.171599744439457, -1.07507438640901, 1.5693792481444), dim = c(4L, 6L)))
  testthat::expect_equal(ret$y[1:4], c(1.78818155412284, 2.55825580753328,
    0.0881535457958548, 1.21771079504409))
  testthat::expect_equal(ret$Z[1:4, , drop = FALSE], structure(c(-1.13051559031852,
    -0.338373896594486, 0.00224652036037424, -0.638671932314538,
    0.402628944106856, -0.0568963388614887, -0.533358544824988, 1.23055042537552,
    0.495888208134907, 2.19477907424054, 0.357725198858587, -0.863633909554439),
    dim = 4:3))
  testthat::expect_equal(ret$mu[1:4], c(1.12211695300006, 3.29507281236772,
    -0.106753094432726, 1.52394871772745))
  testthat::expect_equal(
    c(sumX=sum(ret$X), sumY=sum(ret$y), sumZ=sum(ret$Z), sumMu=sum(ret$mu)),
    c(sumX = 3.23025166163967, sumY = 13.5810460445817, sumZ = 3.15823002771053,
      sumMu = 9.93005749962466))

  # single latent variable (n_clusters = 1)
  set.seed(23478)
  ret <- genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2,
    cluster_size=5, n_clusters=1, sig_clusters=1, rho_high=1, rho_low=.5,
    beta_latent=1.5, beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  testthat::expect_equal(ret$X[1:4, 1:6], structure(c(-1.36526166060881,
    0.586409931283558, 1.2227469255493, -2.17655371414979, 0.326418806550987,
    -1.09714072579561, 1.4239012653582, -0.760011975131564, -0.227256340192126,
    -1.84845832529723, 0.184388770399019, -0.394752299747441, -1.47062212216842,
    -0.12215942773548, -0.0749451055463801, -0.842364831584477, -1.41223753691949,
    -1.68487949598676, 0.247717311839652, -1.86068883953814, 0.402628944106856,
    -0.0568963388614887, -0.533358544824988, 1.23055042537552), dim = c(4L, 6L)))
  testthat::expect_equal(ret$y[1:4], c(-3.91149964768012, -4.73964429926795,
    0.513991031913566, -2.95972727928473))
  testthat::expect_equal(ret$Z[1:4, , drop = FALSE], structure(c(-1.13051559031852,
    -0.338373896594486, 0.00224652036037424, -0.638671932314538),
    dim = c(4L, 1L)))
  testthat::expect_equal(ret$mu[1:4], c(-3.20232310305676, -3.49765450037239,
    0.564187042362111, -2.19774596140566))
  testthat::expect_equal(
    c(sumX=sum(ret$X), sumY=sum(ret$y), sumZ=sum(ret$Z), sumMu=sum(ret$mu)),
    c(sumX = -28.8273582900822, sumY = -10.2198990332918, sumZ = 0.586596841562748,
      sumMu = -5.15679097838167))
})

## Test passed with 10 successes.

## Test passed with 10 successes.

## Test passed with 10 successes.

The two weighted generators now run the same snr / sigma_eps_sq type checks as genClusteredData() (#35): a non-numeric snr / sigma_eps_sq is rejected up front rather than silently passing the old lean check and failing later inside genZmuY().

testthat::test_that("genClusteredData* reject n < 2 and non-integer cluster_size / p (#70)", {
  ok <- list(n = 10, p = 8, k_unclustered = 2, cluster_size = 3, n_clusters = 1,
             sig_clusters = 1, sigma_eps_sq = 1)
  testthat::expect_error(do.call(genClusteredData, utils::modifyList(ok, list(n = 1L))),
                         "n >= 2", fixed = TRUE)
  testthat::expect_error(do.call(genClusteredData, utils::modifyList(ok, list(cluster_size = 2.5))),
                         "cluster_size == round(cluster_size)", fixed = TRUE)
  testthat::expect_error(do.call(genClusteredData, utils::modifyList(ok, list(p = 8.5))),
                         "p == round(p)", fixed = TRUE)
  # n = 2 is the tight boundary -- it must still WORK (locks the threshold
  # against a future "n >= 3" / "> 2" regression).
  res2 <- do.call(genClusteredData, utils::modifyList(ok, list(n = 2L)))
  testthat::expect_equal(nrow(res2$X), 2L)
  # The shared genZmuY n-guard and the shared Pre cluster_size check cover the
  # weighted generators too:
  wok <- list(n = 1L, p = 8, k_unclustered = 2, cluster_size = 3, n_clusters = 1,
              n_strong_cluster_vars = 2, sig_clusters = 1, rho_high = 0.8,
              rho_low = 0.2, sigma_eps_sq = 1)
  testthat::expect_error(do.call(genClusteredDataWeighted, wok),
                         "n >= 2", fixed = TRUE)
  testthat::expect_error(
    do.call(genClusteredDataWeighted, utils::modifyList(wok, list(n = 10L, cluster_size = 2.5))),
    "cluster_size == round(cluster_size)", fixed = TRUE)
})

testthat::test_that("genClusteredDataWeighted/Random validate snr/sigma_eps_sq type (#35)", {
  # Newly rejected (these previously slipped past the weighted validators' lean
  # checks and only failed later, cryptically, inside genZmuY).
  testthat::expect_error(
    genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
      n_strong_cluster_vars=3, n_clusters=3, sig_clusters=2, rho_high=.99,
      rho_low=.5, beta_latent=1.5, beta_unclustered=-2, snr="1",
      sigma_eps_sq=NA),
    "is.numeric(snr) | is.integer(snr) is not TRUE", fixed=TRUE)
  testthat::expect_error(
    genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
      n_strong_cluster_vars=3, n_clusters=3, sig_clusters=2, rho_high=.99,
      rho_low=.5, beta_latent=1.5, beta_unclustered=-2, snr=NA,
      sigma_eps_sq="0.5"),
    "is.numeric(sigma_eps_sq) | is.integer(sigma_eps_sq) is not TRUE", fixed=TRUE)
  testthat::expect_error(
    genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
      n_clusters=3, sig_clusters=2, rho_high=1, rho_low=.5, beta_latent=1.5,
      beta_unclustered=-2, snr="1", sigma_eps_sq=NA),
    "is.numeric(snr) | is.integer(snr) is not TRUE", fixed=TRUE)
  testthat::expect_error(
    genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
      n_clusters=3, sig_clusters=2, rho_high=1, rho_low=.5, beta_latent=1.5,
      beta_unclustered=-2, snr=NA, sigma_eps_sq="0.5"),
    "is.numeric(sigma_eps_sq) | is.integer(sigma_eps_sq) is not TRUE", fixed=TRUE)

  # Range checks (snr > 0, sigma_eps_sq >= 0) are unchanged by #35.
  testthat::expect_error(
    genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
      n_strong_cluster_vars=3, n_clusters=3, sig_clusters=2, rho_high=.99,
      rho_low=.5, beta_latent=1.5, beta_unclustered=-2, snr=-.2,
      sigma_eps_sq=NA),
    "snr > 0 is not TRUE", fixed=TRUE)
  testthat::expect_error(
    genClusteredDataWeightedRandom(n=25, p=19, k_unclustered=2, cluster_size=5,
      n_clusters=3, sig_clusters=2, rho_high=1, rho_low=.5, beta_latent=1.5,
      beta_unclustered=-2, snr=NA, sigma_eps_sq=-.3),
    "sigma_eps_sq >= 0 is not TRUE", fixed=TRUE)

  # A valid scalar numeric snr / sigma_eps_sq is still accepted.
  ret <- genClusteredDataWeighted(n=25, p=19, k_unclustered=2, cluster_size=5,
    n_strong_cluster_vars=3, n_clusters=3, sig_clusters=2, rho_high=.99,
    rho_low=.5, beta_latent=1.5, beta_unclustered=-2, snr=NA, sigma_eps_sq=.5)
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
})

## Test passed with 6 successes.

## Test passed with 7 successes.

Edge-case regression tests for genZmuY() (the shared core all three generators funnel through), covering the two bugs fixed in #124: a missing drop = FALSE that crashed the exactly-one-weak-signal/noise-feature configuration, and a sig_clusters == 0 case that silently kept a latent cluster in the true model for y instead of yielding a null latent model. The fix consumes NO randomness (only deterministic reshaping of mu), so every generator test and characterization snapshot above stays byte-identical.

testthat::test_that("genZmuY one-weak-signal config no longer crashes, all 3 generators (#124 bug 1)", {
  # p == n_clusters*cluster_size + 1 leaves exactly ONE weak-signal/noise
  # feature, so other_X had a single column; without drop = FALSE it collapsed
  # to a vector and other_X[, j] errored "incorrect number of dimensions". The
  # pre-check only requires p_orig_feat_mat >= k_unclustered + n_clusters
  # (here 2 >= 2), so it passed straight into the crash.
  ret <- genClusteredData(n = 20, p = 3, k_unclustered = 1, cluster_size = 2,
                          n_clusters = 1, snr = 2)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_true(is.matrix(ret$X))
  testthat::expect_equal(ncol(ret$X), 3)
  testthat::expect_equal(nrow(ret$X), 20)

  # genClusteredDataWeighted: n_strong_cluster_vars is REQUIRED and its checker
  # enforces >= 1 AND < cluster_size, so with cluster_size = 2 the only valid
  # value is 1.
  retW <- genClusteredDataWeighted(n = 20, p = 3, k_unclustered = 1,
                                   cluster_size = 2, n_clusters = 1,
                                   n_strong_cluster_vars = 1, snr = 2)
  testthat::expect_true(is.list(retW))
  testthat::expect_identical(names(retW), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ncol(retW$X), 3)

  retR <- genClusteredDataWeightedRandom(n = 20, p = 3, k_unclustered = 1,
                                         cluster_size = 2, n_clusters = 1,
                                         snr = 2)
  testthat::expect_true(is.list(retR))
  testthat::expect_identical(names(retR), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ncol(retR$X), 3)
})

testthat::test_that("genZmuY sig_clusters == 0 yields a null latent model (#124 bug 2)", {
  # The zero-beta case MUST bypass genClusteredData: its validator rejects
  # beta_unclustered == 0 / beta_latent == 0, so exercise the internal helper
  # directly. sig_clusters == 0 AND beta_unclustered == 0 => mu is exactly 0
  # (no latent term, no weak-signal term).
  set.seed(53)
  g0 <- genZmuY(n = 20, p = 6, k_unclustered = 1, cluster_size = 2,
                       n_clusters = 2, sig_clusters = 0, beta_latent = 1.5,
                       beta_unclustered = 0, snr = NA, sigma_eps_sq = 0.5)
  testthat::expect_true(all(g0$mu == 0))

  # Contrast, n_clusters > 1: reset the seed before EACH call so the SAME
  # orig_feat_mat (hence the same Z and weak-signal block) is drawn; then
  # g1$mu - g0$mu isolates the pure latent term Z[, 1] * beta_latent.
  cfgA <- list(n = 20, p = 6, k_unclustered = 1, cluster_size = 2,
               n_clusters = 2, beta_latent = 1.5, beta_unclustered = 1,
               snr = NA, sigma_eps_sq = 0.5)
  set.seed(51)
  g0A <- do.call(genZmuY, c(cfgA, sig_clusters = 0))
  set.seed(51)
  g1A <- do.call(genZmuY, c(cfgA, sig_clusters = 1))
  testthat::expect_equal(g1A$Z, g0A$Z)              # same orig_feat_mat drawn
  testthat::expect_false(all(g0A$mu == 0))          # weak signal still retained
  testthat::expect_equal(g1A$mu - g0A$mu, as.numeric(g1A$Z[, 1] * 1.5))
  # Lock the n_clusters > 1 & sig_clusters == 1 branch with an explicit
  # expected value (no characterization snapshot covers it).
  testthat::expect_equal(g1A$mu[1:4], c(1.71187484261165, -1.8816097377914,
    -0.434623721956133, 0.201053072650191))

  # Contrast, n_clusters == 1 (Z is a bare vector; the else branch).
  cfgB <- list(n = 20, p = 4, k_unclustered = 1, cluster_size = 2,
               n_clusters = 1, beta_latent = 1.5, beta_unclustered = 1,
               snr = NA, sigma_eps_sq = 0.5)
  set.seed(52)
  g0B <- do.call(genZmuY, c(cfgB, sig_clusters = 0))
  set.seed(52)
  g1B <- do.call(genZmuY, c(cfgB, sig_clusters = 1))
  testthat::expect_equal(g1B$mu - g0B$mu, as.numeric(g1B$Z * 1.5))
  testthat::expect_equal(g1B$mu[1:4], c(-1.35778032140589, 0.59232273049822,
    2.44822804281879, -0.29256382836018))

  # End-to-end: genClusteredData with sig_clusters = 0 and NONZERO default
  # betas returns valid data (no error; the weak-signal features still
  # legitimately contribute, only the latent part is zeroed).
  ret <- genClusteredData(n = 20, p = 6, k_unclustered = 1, cluster_size = 2,
                          n_clusters = 2, sig_clusters = 0, sigma_eps_sq = 0.5)
  testthat::expect_true(is.list(ret))
  testthat::expect_identical(names(ret), c("X", "y", "Z", "mu"))
  testthat::expect_equal(ncol(ret$X), 6)
})

## Test passed with 11 successes.

## Test passed with 10 successes.

Tests for getLassoLambda():

testthat::test_that("getLassoLambda works", {
  set.seed(7252)
  
  x <- matrix(stats::rnorm(10*6), nrow=10, ncol=6)
  y <- stats::rnorm(10)

  ret <- getLassoLambda(X=x, y=y, lambda_choice="1se", nfolds=4)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_true(ret >= 0)
  
  ret <- getLassoLambda(X=x, y=y, lambda_choice="min", nfolds=5)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_true(ret >= 0)
  
  # Different mixing parameter
  
   ret <- getLassoLambda(X=x, y=y, lambda_choice="min", nfolds=5, alpha=0.5)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_true(ret >= 0)

  # data.frame X is now accepted (coerced internally), like the sibling exports (#51)
  ret_df <- getLassoLambda(X = as.data.frame(x), y = y, lambda_choice = "1se", nfolds = 4)
  testthat::expect_true(is.numeric(ret_df) | is.integer(ret_df))
  testthat::expect_true(!is.na(ret_df))
  testthat::expect_equal(length(ret_df), 1)
  testthat::expect_true(ret_df >= 0)

  # Bad inputs
  testthat::expect_error(getLassoLambda(X="x", y=y), "must be a numeric matrix",
                         fixed=TRUE)
  
  testthat::expect_error(getLassoLambda(X=x[1:9, ], y=y),
                         "n == length(y) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(getLassoLambda(X=x, y=TRUE),
                         "must be a numeric (real-valued) vector",
                         fixed=TRUE)

  # Error has quotation marks in it
  testthat::expect_error(getLassoLambda(X=x, y=y, lambda_choice="mni"))

  testthat::expect_error(getLassoLambda(X=x, y=y,
                                        lambda_choice=c("min", "1se")),
                         "length(lambda_choice) == 1 is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(getLassoLambda(X=x, y=y, lambda_choice=1),
                         "is.character(lambda_choice) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(getLassoLambda(X=x, y=y, nfolds="5"),
                         "is.numeric(nfolds) | is.integer(nfolds) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(getLassoLambda(X=x, y=y, nfolds=1:4),
                         "length(nfolds) == 1 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(getLassoLambda(X=x, y=y, nfolds=3.2),
                         "nfolds == round(nfolds) is not TRUE", fixed=TRUE)
  
  # (#158e) nfolds=3 is cv.glmnet's minimum and is now accepted.
  ret <- getLassoLambda(X=x, y=y, nfolds=3)
  testthat::expect_true(is.numeric(ret) && length(ret) == 1 && ret >= 0)
  # nfolds=2 (below the minimum) is rejected at the new boundary.
  testthat::expect_error(getLassoLambda(X=x, y=y, nfolds=2),
                         "nfolds >= 3 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(getLassoLambda(X=x, y=y, nfolds=4, alpha=1.2),
                         "alpha <= 1 is not TRUE", fixed=TRUE)

  # alpha = 0 (degenerate ridge) is now rejected (previously allowed)
  testthat::expect_error(getLassoLambda(X=x, y=y, nfolds=4, alpha=0),
                         "alpha > 0 is not TRUE", fixed=TRUE)

  # An NA in the design matrix now yields a friendly message (#56) rather than
  # falling through to a cryptic glmnet-level error.
  x_na <- x; x_na[2, 1] <- NA
  testthat::expect_error(getLassoLambda(X=x_na, y=y),
                         "must not contain missing", fixed=TRUE)
  # A data.frame with a numeric NA must error with the same message BEFORE
  # coercion: model.matrix's na.action=na.omit would otherwise silently drop
  # the NA row and the failure would surface downstream (length mismatch), not
  # as this message. This case pins the pre-coercion placement of checkNoNAs.
  df_na <- as.data.frame(x); df_na[2, 1] <- NA
  testthat::expect_error(getLassoLambda(X=df_na, y=y),
                         "must not contain missing", fixed=TRUE)

  # (#104, L7) When n is too small for even 3-fold cross-validation, error
  # clearly here instead of crashing inside cv.glmnet ("nfolds must be bigger
  # than 3"). With n = 4 and the default nfolds = 10, n_sample rounds to 2 (< 3).
  testthat::expect_error(
    getLassoLambda(X=matrix(stats::rnorm(4*5), nrow=4, ncol=5),
                   y=stats::rnorm(4)),
    "is too small to choose lambda", fixed=TRUE)

  # (#127) A single-column X now fires the same clean "p >= 2 is not TRUE"
  # message as css(), rather than crashing later inside cv.glmnet.
  testthat::expect_error(
    getLassoLambda(X=matrix(stats::rnorm(40), nrow=40, ncol=1),
                   y=stats::rnorm(40)),
    "p >= 2 is not TRUE", fixed=TRUE)

})

## -- Warning: getLassoLambda works -----------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getLassoLambda(X = x, y = y, lambda_choice = "1se", nfolds = 4)
##  2.   \-glmnet::cv.glmnet(...)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getLassoLambda works -----------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getLassoLambda(X = x, y = y, lambda_choice = "min", nfolds = 5)
##  2.   \-glmnet::cv.glmnet(...)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getLassoLambda works -----------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getLassoLambda(...)
##  2.   \-glmnet::cv.glmnet(...)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getLassoLambda works -----------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getLassoLambda(...)
##  2.   \-glmnet::cv.glmnet(...)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getLassoLambda works -----------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getLassoLambda(X = x, y = y, nfolds = 3)
##  2.   \-glmnet::cv.glmnet(...)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## Test passed with 33 successes.

Out-of-range cluster indices are caught up front (#104, L3) through the public css() and protolasso() entry points – covering a bare-vector clusters argument (which is wrapped into a list after the per-element validation loop) and a list clusters argument:

testthat::test_that("out-of-range cluster indices error via css/protolasso (#104)", {
  set.seed(104105)
  X <- matrix(stats::rnorm(20*8), nrow=20, ncol=8)
  y <- stats::rnorm(20)

  # Bare-vector clusters with an index beyond p (= 5 here).
  testthat::expect_error(
    css(X[, 1:5], y, lambda=0.01, clusters=c(1, 2, 99)),
    "Cluster index 99 exceeds the number of features (p = 5).", fixed=TRUE)

  # List clusters routed through protolasso.
  testthat::expect_error(
    protolasso(X[, 1:5], y, clusters=list(c(1, 2, 99))),
    "Cluster index 99 exceeds the number of features (p = 5).", fixed=TRUE)
})

## Test passed with 2 successes.

css() reports a too-small sample size clearly (#104, L4) instead of the misleading “Too many training indices provided” when no training indices were supplied:

testthat::test_that("css reports a too-small sample size clearly (#104)", {
  set.seed(104104)

  # No train_inds: the message must blame the sample size, not training indices.
  testthat::expect_error(
    css(matrix(stats::rnorm(15), nrow=3, ncol=5), stats::rnorm(3), lambda=0.01),
    "is too small", fixed=TRUE)

  # When train_inds leave fewer than 4 observations for selection, the original
  # "Too many training indices" branch still fires.
  testthat::expect_error(
    css(matrix(stats::rnorm(10*5), nrow=10, ncol=5), stats::rnorm(10),
        lambda=0.01, train_inds=1:7),
    "Too many training indices provided", fixed=TRUE)
})

## Test passed with 2 successes.

Tests for getModelSize():

testthat::test_that("getModelSize works", {
  set.seed(1723)
  
  data <- genClusteredData(n=15, p=11, k_unclustered=2, cluster_size=3,
                        n_clusters=2, sigma_eps_sq=10^(-6))
  
  x <- data$X
  y <- data$y
  
  good_clusters <- list(red_cluster=1L:3L, green_cluster=4L:6L)
  
  ret <- getModelSize(X=x, y=y, clusters=good_clusters)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_equal(ret, round(ret))
  testthat::expect_true(ret >= 1)
  # 11 features, but two clusters of size 3, so maximum size should
  # be 11 - 2 - 2 = 7
  testthat::expect_true(ret <= 7)
  
  ## Trying other inputs

  unnamed_clusters <- list(1L:3L, 5L:8L)
  
  ret <- getModelSize(X=x, y=y, clusters=unnamed_clusters)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_equal(ret, round(ret))
  testthat::expect_true(ret >= 1)
  # 11 features, but 3 in one cluster and 4 in another, so maximum size should
  # be 11 - 2 - 3 = 6
  testthat::expect_true(ret <= 6)
  
  # Single cluster
  ret <- getModelSize(X=x, y=y, clusters=2:5)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_equal(ret, round(ret))
  testthat::expect_true(ret >= 1)
  # 11 features, but 4 in one cluster, so maximum size should be 11 - 3 = 8
  testthat::expect_true(ret <= 8)

  # Regression test: when prototype reduction leaves a single column (all
  # provided features in one cluster), getModelSize must return 1L rather than
  # crashing in cv.glmnet. Previously X_size[, -feats_to_drop] dropped to a
  # vector when one column remained.
  testthat::expect_equal(getModelSize(X=x[, 1:3, drop=FALSE], y=y,
                                      clusters=list(1:3)), 1L)


  # Intentionally don't provide clusters for all feature, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1:3, 5:8)
  
  ret <- getModelSize(X=x, y=y, clusters=good_clusters)
  
  testthat::expect_true(is.numeric(ret) | is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_equal(ret, round(ret))
  testthat::expect_true(ret >= 1)
  # 11 features, but 3 in one cluster and 4 in another, so maximum size should
  # be 11 - 2 - 3 = 6
  testthat::expect_true(ret <= 6)
  
  ## Trying bad inputs
  
  testthat::expect_error(getModelSize(X="x", y=y, clusters=good_clusters),
                         "is.matrix(X) | is.data.frame(X) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(getModelSize(X=x[1:5, ], y=y, clusters=good_clusters),
                         "length(y) == n is not TRUE", fixed=TRUE)

  testthat::expect_error(getModelSize(X=x, y=FALSE, clusters=good_clusters),
                         "must be a numeric (real-valued) vector",
                         fixed=TRUE)

  testthat::expect_error(getModelSize(X=x, y=y, clusters=list(3:7, 7:10)),
                         "Overlapping clusters detected; clusters must be non-overlapping. Overlapping clusters: 1, 2.",
                         fixed=TRUE)
  
  # Duplicate clusters are now silently removed (#156): the model size for a
  # duplicated cluster equals that of the deduplicated input. getModelSize is
  # stochastic (cv.glmnet), so seed both calls identically.
  set.seed(15678)
  dup_ms <- getModelSize(X=x, y=y, clusters=list(5:8, 5:8))
  set.seed(15678)
  dedup_ms <- getModelSize(X=x, y=y, clusters=list(5:8))
  testthat::expect_equal(dup_ms, dedup_ms)

  # Out-of-range cluster index now caught up front by checkFormatClustersInput
  # (#104, L3); x has 11 columns, so index 50 is out of range.
  testthat::expect_error(getModelSize(X=x, y=y, clusters=6:50),
                         "Cluster index 50 exceeds the number of features (p = 11).",
                         fixed=TRUE)

  testthat::expect_error(getModelSize(X=x, y=y,
                                      clusters=list(2:3, as.integer(NA))),
                         "!is.na(clusters) are not all TRUE",
                         fixed=TRUE)

  testthat::expect_error(getModelSize(X=x, y=y, clusters=list(2:3, c(4, 4, 5))),
                         "length(clusters[[i]]) == length(unique(clusters[[i]])) is not TRUE",
                         fixed=TRUE)

   testthat::expect_error(getModelSize(X=x, y=y, clusters=list(1:4, -1)),
                         "all(clusters[[i]] >= 1) is not TRUE",
                         fixed=TRUE)

   testthat::expect_error(getModelSize(X=x, y=y, clusters=list(1:4,
                                                               c(2.3, 1.2))),
                         "all(clusters[[i]] == round(clusters[[i]])) is not TRUE",
                         fixed=TRUE)

  ## alpha (#21): the elastic-net mixing parameter now drives the model-size
  ## estimate (the internal cv.glmnet), keeping it consistent with elastic-net
  ## feature selection.

  # Validation, mirroring getLassoLambda: alpha must be in (0, 1].
  testthat::expect_error(getModelSize(X=x, y=y, clusters=good_clusters, alpha=0),
                         "alpha > 0 is not TRUE", fixed=TRUE)
  testthat::expect_error(getModelSize(X=x, y=y, clusters=good_clusters, alpha=1.5),
                         "alpha <= 1 is not TRUE", fixed=TRUE)
  testthat::expect_error(getModelSize(X=x, y=y, clusters=good_clusters, alpha=NA),
                         "is.numeric(alpha) | is.integer(alpha) is not TRUE",
                         fixed=TRUE)

  # Back-compat: default is pure lasso (alpha = 1). cv.glmnet uses random CV
  # folds, so seed before each call to compare apples-to-apples.
  set.seed(7); a_default <- getModelSize(X=x, y=y, clusters=good_clusters)
  set.seed(7); a_alpha1 <- getModelSize(X=x, y=y, clusters=good_clusters, alpha=1)
  testthat::expect_equal(a_default, a_alpha1)

  # Headline: on a design with a block of highly correlated relevant features
  # (each feature its own singleton cluster, so cv.glmnet sees the whole block
  # with no prototype reduction), a lower alpha (elastic net) keeps more of the
  # correlated features than pure lasso, giving a strictly larger estimated
  # size. On main, getModelSize has no alpha argument, so this cannot happen.
  set.seed(2718)
  n_h <- 120
  z_h <- stats::rnorm(n_h)
  X_block <- matrix(rep(z_h, 10), nrow=n_h) +
    matrix(stats::rnorm(n_h * 10, sd=0.05), nrow=n_h)
  X_noise <- matrix(stats::rnorm(n_h * 4), nrow=n_h)
  X_h <- cbind(X_block, X_noise)
  y_h <- 2 * z_h + stats::rnorm(n_h, sd=0.5)
  singletons_h <- as.list(1:ncol(X_h))
  set.seed(11)
  size_lasso <- getModelSize(X=X_h, y=y_h, clusters=singletons_h, alpha=1)
  set.seed(11)
  size_enet <- getModelSize(X=X_h, y=y_h, clusters=singletons_h, alpha=0.2)
  testthat::expect_true(size_enet >= size_lasso)
  testthat::expect_true(size_enet > size_lasso)

})

## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = good_clusters)
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = unnamed_clusters)
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = 2:5)
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = good_clusters)
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = list(5:8, 5:8))
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = list(5:8))
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = good_clusters)
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## -- Warning: getModelSize works -------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) getModelSize(X = x, y = y, clusters = good_clusters, alpha = 1)
##  2.   \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  3.     \-glmnet:::cv.glmnet.raw(...)
## Test passed with 41 successes.

Tests for getSelectionPrototypes():

testthat::test_that("getSelectionPrototypes works", {
  set.seed(67234)
  
  data <- genClusteredData(n=15, p=11, k_unclustered=2, cluster_size=3,
                        n_clusters=2, sig_clusters=1, sigma_eps_sq=10^(-6))
  
  x <- data$X
  y <- data$y
  
  good_clusters <- list(red_cluster=1L:3L, green_cluster=4L:6L)
  
  css_results <- css(X=x, y=y, lambda=0.01, clusters=good_clusters)
  
  ret <- getSelectionPrototypes(css_results, selected_clusts=good_clusters)
  
  testthat::expect_true(is.integer(ret))
  testthat::expect_true(all(!is.na(ret)))
  testthat::expect_equal(length(ret), length(good_clusters))
  testthat::expect_equal(length(ret), length(unique(ret)))
  for(i in 1:length(ret)){
    testthat::expect_true(ret[i] %in% good_clusters[[i]])
    # Find the largest selection proportion of any feature in cluster i
    max_prop <- max(colMeans(css_results$feat_sel_mat[, good_clusters[[i]]]))
    # Find the selection proportion of the identified prototype
    proto_prop <- colMeans(css_results$feat_sel_mat)[ret[i]]
    testthat::expect_equal(max_prop, proto_prop)
  }
  
  # Try with only one selected cluster (still should be in a list)

  ret <- getSelectionPrototypes(css_results,
                                selected_clusts=list(red_cluster=1L:3L))

  testthat::expect_true(is.integer(ret))
  testthat::expect_true(!is.na(ret))
  testthat::expect_equal(length(ret), 1)
  testthat::expect_true(ret %in% 1L:3L)
  # Find the largest selection proportion of any feature in the cluster
  max_prop <- max(colMeans(css_results$feat_sel_mat[, 1L:3L]))
  # Find the selection proportion of the identified prototype
  proto_prop <- colMeans(css_results$feat_sel_mat)[ret]
  testthat::expect_equal(max_prop, proto_prop)

  ## Trying bad inputs

  # Error contains quotation marks
  testthat::expect_error(getSelectionPrototypes(x, good_clusters))

  testthat::expect_error(getSelectionPrototypes(css_results, 1L:3L),
                         "is.list(selected_clusts) is not TRUE", fixed=TRUE)

  # An empty selection (empty list of selected clusters) is valid post-#107:
  # getSelectionPrototypes() returns an empty integer vector, no error (#120).
  testthat::expect_identical(getSelectionPrototypes(css_results, list()),
                             integer(0))

  testthat::expect_error(getSelectionPrototypes(css_results,
                                                list(red_cluster=1L:3L,
                                                     green_cluster=4L:6L,
                                                     bad_cluster=integer())),
                         "all(lengths(selected_clusts) >= 1) is not TRUE",
                         fixed=TRUE)

})

testthat::test_that("getSelectionPrototypes handles a constant tied column without crashing (#68)", {
  set.seed(680)
  n <- 20L; p <- 4L
  X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
  X[, 2] <- 3.5                       # feature 2 is a CONSTANT column
  colnames(X) <- paste0("V", 1:p)
  y <- X[, 1] + stats::rnorm(n)       # y correlated with feature 1 (varying)
  # feat_sel_mat: features 1 and 2 tie at selection proportion 0.5 (both
  # "selected" in the same 5 of 10 subsamples), forcing the tie-break path.
  B <- 10L
  M <- matrix(0L, nrow = B, ncol = p)
  M[1:5, 1] <- 1L
  M[1:5, 2] <- 1L
  css_results <- structure(list(X = X, y = y, feat_sel_mat = M), class = "cssr")
  # Pre-fix: cor(X[, c(1,2)], y) -> c(<corr>, NA) -> max NA -> stopifnot abort.
  proto <- getSelectionPrototypes(css_results, selected_clusts = list(c(1L, 2L)))
  testthat::expect_false(is.na(proto))
  testthat::expect_identical(unname(proto), 1L)   # the varying, y-correlated member

  # All-constant tied cluster: must not crash, picks the first deterministically.
  # (suppressWarnings: the names-assignment emits a pre-existing "number of items
  # to replace..." warning when proto_i stays length>1; not introduced by this
  # fix, unrelated to #68.)
  X2 <- X; X2[, 3] <- 2.0; X2[, 4] <- 9.0
  M2 <- matrix(0L, nrow = B, ncol = p); M2[1:5, 3] <- 1L; M2[1:5, 4] <- 1L
  css2 <- structure(list(X = X2, y = y, feat_sel_mat = M2), class = "cssr")
  proto2 <- suppressWarnings(
      getSelectionPrototypes(css2, selected_clusts = list(c(3L, 4L))))
  testthat::expect_identical(unname(proto2), 3L)
})

testthat::test_that("getSelectionPrototypes skips the correlation tie-break for non-numeric y (#128)", {
  # css() accepts a non-numeric y (response validation is delegated to fitfun),
  # so getSelectionPrototypes must NOT call cor() -- which requires a numeric y --
  # when breaking a selection-proportion tie. Hand-build a cssr object with a
  # CHARACTER y and a feat_sel_mat in which members 1 and 2 tie (equal column
  # means) so the tie block IS entered (length(proto_i) > 1). Without the
  # is.numeric(y) guard, cor(X[, 1:2], <character y>) would error; the guard skips
  # it and the first tied index is returned deterministically.
  B <- 10L; p <- 2L; n <- 8L
  X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
  colnames(X) <- paste0("V", 1:p)
  y <- letters[1:n]                    # non-numeric (character) response
  M <- matrix(0L, nrow = B, ncol = p)
  M[1:5, 1] <- 1L                      # features 1 and 2 tie at proportion 0.5,
  M[1:5, 2] <- 1L                      # forcing the tie-break path
  css <- structure(list(X = X, y = y, feat_sel_mat = M), class = "cssr")
  # suppressWarnings: an unbroken length-2 tie triggers a benign R "number of
  # items to replace" warning at the names-assignment (it fires for ANY persisted
  # tie, incl. the numeric #68 case) that would otherwise fail testthat edition 3.
  res <- suppressWarnings(getSelectionPrototypes(css, list(c(1L, 2L))))
  testthat::expect_identical(unname(res), 1L)   # first tied index, deterministic
})

testthat::test_that("getSelectionPrototypes emits no warning on an unbroken tie (#126)", {
  # Item-5 regression (#126): the name-assignment now indexes proto_i[1] (a single
  # element), not the possibly length>1 proto_i. On an UNBROKEN within-cluster tie
  # -- here a CHARACTER y, so the numeric correlation tie-break is skipped and
  # proto_i stays length 2 -- the pre-fix code raised R's "number of items to
  # replace is not a multiple of replacement length" warning at the names<-
  # assignment (surfacing through print.cssr / summary.cssr / printCssDf). After
  # the fix NO warning fires, and the returned prototype is still the first tied
  # index (value unchanged; only the spurious warning is removed).
  B <- 10L; p <- 2L; n <- 8L
  X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
  colnames(X) <- paste0("V", 1:p)
  y <- letters[1:n]                    # non-numeric (character) response
  M <- matrix(0L, nrow = B, ncol = p)
  M[1:5, 1] <- 1L                      # features 1 and 2 tie at proportion 0.5,
  M[1:5, 2] <- 1L                      # forcing the (unbroken) tie-break path
  css <- structure(list(X = X, y = y, feat_sel_mat = M), class = "cssr")
  # No suppressWarnings here: the fix must make this call warning-free.
  testthat::expect_no_warning(
      res <- getSelectionPrototypes(css, list(c(1L, 2L))))
  testthat::expect_identical(unname(res), 1L)    # first tied index, deterministic
  testthat::expect_identical(names(res), "V1")   # name taken from proto_i[1] only
})

## Test passed with 17 successes.

## Test passed with 3 successes.

## Test passed with 1 success.

## Test passed with 3 successes.

Tests for printCssDf():

testthat::test_that("printCssDf works", {
  set.seed(67234)
  
  data <- genClusteredData(n=15, p=11, k_unclustered=2, cluster_size=3,
                        n_clusters=2, sig_clusters=1, sigma_eps_sq=10^(-6))
  
  x <- data$X
  y <- data$y
  
  good_clusters <- list(red_cluster=1L:3L, green_cluster=4L:6L)
  
  css_results <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B=10)
  
  ret <- printCssDf(css_results)
  
  testthat::expect_true(is.data.frame(ret))
  testthat::expect_identical(colnames(ret), c("ClustName", "ClustProtoNum",
                                              "ClustSelProp", "ClustSize"))
  
  # Total number of clusters is 11 - (3 - 1) - (3 - 1) = 7
  testthat::expect_equal(nrow(ret), 7)

  testthat::expect_true(is.character(ret$ClustName))
  testthat::expect_true(all(names(good_clusters) %in% ret$ClustName))
  testthat::expect_equal(length(ret$ClustName), length(unique(ret$ClustName)))
  
  testthat::expect_true(is.integer(ret$ClustProtoNum))
  testthat::expect_true(ret[ret$ClustName=="red_cluster",
                            "ClustProtoNum"] %in% 1L:3L)
  testthat::expect_true(ret[ret$ClustName=="green_cluster",
                            "ClustProtoNum"] %in% 4L:6L)
  other_rows <- !(ret$ClustName %in% c("red_cluster", "green_cluster"))
  testthat::expect_true(all(ret[other_rows, "ClustProtoNum"] %in% 7L:11L))
  testthat::expect_true(length(ret[other_rows, "ClustProtoNum"]) ==
                          length(unique(ret[other_rows, "ClustProtoNum"])))
  
  testthat::expect_true(is.numeric(ret$ClustSelProp))
  testthat::expect_identical(ret$ClustSelProp, sort(ret$ClustSelProp,
                                                    decreasing=TRUE))
  
  testthat::expect_true(is.integer(ret$ClustSize))
  testthat::expect_equal(ret[ret$ClustName=="red_cluster", "ClustSize"], 3)
  testthat::expect_equal(ret[ret$ClustName=="green_cluster", "ClustSize"], 3)
  testthat::expect_true(all(ret[other_rows, "ClustSize"] == 1))
  
  # Try again naming features
  
  x_named <- x
  colnames(x_named) <- LETTERS[1:11]
  
  css_results_name_feats <- css(X=x_named, y=y, lambda=0.01,
                                clusters=good_clusters, B=10)
  
  ret <- printCssDf(css_results_name_feats)
  
  testthat::expect_true(is.data.frame(ret))
  testthat::expect_identical(colnames(ret), c("ClustName", "ClustProtoName",
                                              "ClustProtoNum", "ClustSelProp",
                                              "ClustSize"))
  
  # Total number of clusters is 11 - (3 - 1) - (3 - 1) = 7
  testthat::expect_equal(nrow(ret), 7)

  testthat::expect_true(is.character(ret$ClustName))
  testthat::expect_true(all(names(good_clusters) %in% ret$ClustName))
  testthat::expect_equal(length(ret$ClustName), length(unique(ret$ClustName)))
  
  testthat::expect_true(is.character(ret$ClustProtoName))
  testthat::expect_true(ret[ret$ClustName=="red_cluster",
                            "ClustProtoName"] %in% LETTERS[1:3])
  testthat::expect_true(ret[ret$ClustName=="green_cluster",
                            "ClustProtoName"] %in% LETTERS[4:6])
  other_rows <- !(ret$ClustName %in% c("red_cluster", "green_cluster"))
  testthat::expect_true(all(ret[other_rows, "ClustProtoName"] %in% LETTERS[7:11]))
  testthat::expect_true(length(ret[other_rows, "ClustProtoName"]) ==
                          length(unique(ret[other_rows, "ClustProtoName"])))
  
  testthat::expect_true(is.integer(ret$ClustProtoNum))
  testthat::expect_true(ret[ret$ClustName=="red_cluster",
                            "ClustProtoNum"] %in% 1L:3L)
  testthat::expect_true(ret[ret$ClustName=="green_cluster",
                            "ClustProtoNum"] %in% 4L:6L)
  testthat::expect_true(all(ret[other_rows, "ClustProtoNum"] %in% 7L:11L))
  testthat::expect_true(length(ret[other_rows, "ClustProtoNum"]) ==
                          length(unique(ret[other_rows, "ClustProtoNum"])))
  
  testthat::expect_true(is.numeric(ret$ClustSelProp))
  testthat::expect_identical(ret$ClustSelProp, sort(ret$ClustSelProp,
                                                    decreasing=TRUE))
  
  testthat::expect_true(is.integer(ret$ClustSize))
  testthat::expect_equal(ret[ret$ClustName=="red_cluster", "ClustSize"], 3)
  testthat::expect_equal(ret[ret$ClustName=="green_cluster", "ClustSize"], 3)
  testthat::expect_true(all(ret[other_rows, "ClustSize"] == 1))
  
  # Unnamed clusters
  
  unnamed_clusters <- list(1:3, 4:6)
  
  css_results_unnamed <- css(X=x, y=y, lambda=0.01, clusters=unnamed_clusters,
                             B=10)
  
  ret <- printCssDf(css_results_unnamed)
  
  testthat::expect_true(is.data.frame(ret))
  testthat::expect_identical(colnames(ret), c("ClustName", "ClustProtoNum",
                                              "ClustSelProp", "ClustSize"))
  
  # Total number of clusters is 11 - (3 - 1) - (3 - 1) = 7
  testthat::expect_equal(nrow(ret), 7)

  testthat::expect_true(is.character(ret$ClustName))
  testthat::expect_equal(length(ret$ClustName), length(unique(ret$ClustName)))
  
  testthat::expect_true(is.integer(ret$ClustProtoNum))
  
  testthat::expect_true(is.numeric(ret$ClustSelProp))
  testthat::expect_identical(ret$ClustSelProp, sort(ret$ClustSelProp,
                                                    decreasing=TRUE))
  
  testthat::expect_true(is.integer(ret$ClustSize))
  
  # Try other settings for cutoff, min_num_clusts, max_num_clusts, etc.
  
  ret <- printCssDf(css_results, max_num_clusts=3)
  
  testthat::expect_true(is.data.frame(ret))
  testthat::expect_identical(colnames(ret), c("ClustName", "ClustProtoNum",
                                              "ClustSelProp", "ClustSize"))

  testthat::expect_true(nrow(ret) <= 3)

  testthat::expect_true(is.character(ret$ClustName))
  testthat::expect_equal(length(ret$ClustName), length(unique(ret$ClustName)))
  
  testthat::expect_true(is.integer(ret$ClustProtoNum))
  other_rows <- !(ret$ClustName %in% c("red_cluster", "green_cluster"))
  testthat::expect_true(all(ret[other_rows, "ClustProtoNum"] %in% 7L:11L))
  testthat::expect_true(length(ret[other_rows, "ClustProtoNum"]) ==
                          length(unique(ret[other_rows, "ClustProtoNum"])))
  
  testthat::expect_true(is.numeric(ret$ClustSelProp))
  testthat::expect_identical(ret$ClustSelProp, sort(ret$ClustSelProp,
                                                    decreasing=TRUE))
  
  testthat::expect_true(is.integer(ret$ClustSize))
  testthat::expect_true(all(ret[other_rows, "ClustSize"] == 1))
  
  if("red_cluster" %in% ret$ClustName){
    testthat::expect_true(ret[ret$ClustName=="red_cluster",
                              "ClustProtoNum"] %in% 1L:3L)
    testthat::expect_equal(ret[ret$ClustName=="red_cluster", "ClustSize"], 3)
  }
  
  if("green_cluster" %in% ret$ClustName){
    testthat::expect_true(ret[ret$ClustName=="green_cluster",
                              "ClustProtoNum"] %in% 4L:6L)
    testthat::expect_equal(ret[ret$ClustName=="green_cluster", "ClustSize"], 3)
  }
  
  ret <- printCssDf(css_results, min_num_clusts=2, cutoff=1)
  
  testthat::expect_true(is.data.frame(ret))
  testthat::expect_identical(colnames(ret), c("ClustName", "ClustProtoNum",
                                              "ClustSelProp", "ClustSize"))
  
  # Total number of clusters is 11 - (3 - 1) - (3 - 1) = 7
  testthat::expect_true(nrow(ret) >= 2)
  testthat::expect_true(nrow(ret) <= 7)

  testthat::expect_true(is.character(ret$ClustName))
  testthat::expect_equal(length(ret$ClustName), length(unique(ret$ClustName)))
  
  testthat::expect_true(is.integer(ret$ClustProtoNum))
  other_rows <- !(ret$ClustName %in% c("red_cluster", "green_cluster"))
  testthat::expect_true(all(ret[other_rows, "ClustProtoNum"] %in% 7L:11L))
  testthat::expect_true(length(ret[other_rows, "ClustProtoNum"]) ==
                          length(unique(ret[other_rows, "ClustProtoNum"])))
  
  testthat::expect_true(is.numeric(ret$ClustSelProp))
  testthat::expect_identical(ret$ClustSelProp, sort(ret$ClustSelProp,
                                                    decreasing=TRUE))
  
  testthat::expect_true(is.integer(ret$ClustSize))
  testthat::expect_true(all(ret[other_rows, "ClustSize"] == 1))
  
  if("red_cluster" %in% ret$ClustName){
    testthat::expect_true(ret[ret$ClustName=="red_cluster",
                              "ClustProtoNum"] %in% 1L:3L)
    testthat::expect_equal(ret[ret$ClustName=="red_cluster", "ClustSize"], 3)
  }
  
  if("green_cluster" %in% ret$ClustName){
    testthat::expect_true(ret[ret$ClustName=="green_cluster",
                              "ClustProtoNum"] %in% 4L:6L)
    testthat::expect_equal(ret[ret$ClustName=="green_cluster", "ClustSize"], 3)
  }
  
  #
  ret <- printCssDf(css_results, cutoff=1)

  testthat::expect_true(is.data.frame(ret))
  testthat::expect_identical(colnames(ret), c("ClustName", "ClustProtoNum",
                                              "ClustSelProp", "ClustSize"))

  testthat::expect_true(nrow(ret) >= 1)
  testthat::expect_true(nrow(ret) <= 7)

  testthat::expect_true(is.character(ret$ClustName))
  testthat::expect_equal(length(ret$ClustName), length(unique(ret$ClustName)))

  testthat::expect_true(is.integer(ret$ClustProtoNum))
  other_rows <- !(ret$ClustName %in% c("red_cluster", "green_cluster"))
  testthat::expect_true(all(ret[other_rows, "ClustProtoNum"] %in% 7L:11L))
  testthat::expect_true(length(ret[other_rows, "ClustProtoNum"]) ==
                          length(unique(ret[other_rows, "ClustProtoNum"])))

  testthat::expect_true(is.numeric(ret$ClustSelProp))
  testthat::expect_identical(ret$ClustSelProp, sort(ret$ClustSelProp,
                                                    decreasing=TRUE))

  testthat::expect_true(is.integer(ret$ClustSize))
  testthat::expect_true(all(ret[other_rows, "ClustSize"] == 1))

  if("red_cluster" %in% ret$ClustName){
    testthat::expect_true(ret[ret$ClustName=="red_cluster",
                              "ClustProtoNum"] %in% 1L:3L)
    testthat::expect_equal(ret[ret$ClustName=="red_cluster", "ClustSize"], 3)
  }

  if("green_cluster" %in% ret$ClustName){
    testthat::expect_true(ret[ret$ClustName=="green_cluster",
                              "ClustProtoNum"] %in% 4L:6L)
    testthat::expect_equal(ret[ret$ClustName=="green_cluster", "ClustSize"], 3)
  }

  ## Trying bad inputs

  # Error has quotation marks in it
  testthat::expect_error(printCssDf("css_results"))
  
  testthat::expect_error(printCssDf(css_results, cutoff=-.1),
                         "cutoff >= 0 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(printCssDf(css_results, min_num_clusts=3.2),
                         "min_num_clusts == round(min_num_clusts) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(printCssDf(css_results, max_num_clusts="5"),
                         "is.numeric(max_num_clusts) | is.integer(max_num_clusts) is not TRUE",
                         fixed=TRUE)
})

## Test passed with 98 successes.

Tests for print.cssr() (and a minimal-B smoke test for css()):

testthat::test_that("print.cssr works", {
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B = 10)

  # print() dispatches to print.cssr, which returns its argument invisibly (the
  # standard print-method convention; see print.cssr's @return / #16).
  testthat::expect_invisible(print(css_res))
  ret <- print(css_res)
  testthat::expect_identical(ret, css_res)

  # It prints the printCssDf summary table (column headers included).
  testthat::expect_output(print(css_res), "ClustName")
  testthat::expect_output(print(css_res), "ClustSize")

  # max_num_clusts routes through to printCssDf (at most that many cluster rows).
  out_all <- utils::capture.output(print(css_res))
  out_one <- utils::capture.output(print(css_res, max_num_clusts=1))
  testthat::expect_true(length(out_one) <= length(out_all))
})

testthat::test_that("css runs with B = 1 (minimal subsampling)", {
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)

  # B = 1 is allowed (checkB requires only B > 0) but warns that small B is poor.
  testthat::expect_warning(
    css_b1 <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B = 1),
    "Small values of B may lead to poor results.", fixed=TRUE)
  testthat::expect_true(inherits(css_b1, "cssr"))

  # The minimal-B object is still well-formed downstream.
  sel <- getCssSelections(css_b1)
  testthat::expect_identical(names(sel),
    c("selected_clusts", "selected_feats", "weights"))
})

##    ClustName ClustProtoNum ClustSelProp ClustSize
## 1     banana             4         1.00         2
## 2 cantaloupe             5         0.80         1
## 3         c4             6         0.60         1
## 4      apple             1         0.55         2
## 5         c5             7         0.30         1
##    ClustName ClustProtoNum ClustSelProp ClustSize
## 1     banana             4         1.00         2
## 2 cantaloupe             5         0.80         1
## 3         c4             6         0.60         1
## 4      apple             1         0.55         2
## 5         c5             7         0.30         1
## Test passed with 5 successes.

## Test passed with 3 successes.

Tests for selected() / selected.cssr():

testthat::test_that("selected.cssr returns clusters and features", {
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B = 10)

  ref <- getCssSelections(css_res)

  # type = "clusters" (the default): a named list of integer vectors, identical
  # to getCssSelections()$selected_clusts at the same parameters.
  clusts <- selected(css_res)
  testthat::expect_true(is.list(clusts))
  testthat::expect_identical(clusts, ref$selected_clusts)
  testthat::expect_identical(selected(css_res, type="clusters"),
                             ref$selected_clusts)

  # type = "features": a flat integer vector, identical to
  # getCssSelections()$selected_feats.
  feats <- selected(css_res, type="features")
  testthat::expect_true(is.integer(feats))
  testthat::expect_identical(feats, ref$selected_feats)

  # S3 dispatch: selected(obj) reaches selected.cssr().
  testthat::expect_identical(selected(css_res), selected.cssr(css_res))

  # Consistency with getCssSelections() at a nondefault cutoff and weighting
  # (features path).
  ref2 <- getCssSelections(css_res, weighting="simple_avg", cutoff=0.3)
  testthat::expect_identical(
    selected(css_res, type="features", weighting="simple_avg", cutoff=0.3),
    ref2$selected_feats)

  # Invalid inputs are rejected via the reused checks (checkCutoff /
  # checkWeighting), same messages as getCssSelections().
  testthat::expect_error(selected(css_res, cutoff=-0.5),
                         "cutoff >= 0 is not TRUE", fixed=TRUE)
  testthat::expect_error(selected(css_res, weighting="spasre"),
    "Weighting must be a character and one of sparse, simple_avg, or weighted_avg",
    fixed=TRUE)
})

testthat::test_that("selected(type='clusters') is invariant to weighting", {
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B = 10)

  # weighting only affects individual-feature weights, never which clusters are
  # selected: the type = "clusters" output is identical across weighting values.
  sparse <- selected(css_res, type="clusters", weighting="sparse", cutoff=0.2)
  wavg <- selected(css_res, type="clusters", weighting="weighted_avg",
                   cutoff=0.2)
  savg <- selected(css_res, type="clusters", weighting="simple_avg",
                   cutoff=0.2)
  testthat::expect_identical(sparse, wavg)
  testthat::expect_identical(sparse, savg)

  # By contrast the features path CAN depend on weighting: "simple_avg" returns
  # every member of the selected clusters, so at least as many as "sparse".
  feats_sparse <- selected(css_res, type="features", weighting="sparse",
                           cutoff=0.2)
  feats_savg <- selected(css_res, type="features", weighting="simple_avg",
                         cutoff=0.2)
  testthat::expect_true(length(feats_savg) >= length(feats_sparse))
})

## Test passed with 9 successes.

## Test passed with 3 successes.

Tests for summary.cssr() / print.summary.cssr():

testthat::test_that("summary.cssr summarizes clusters, sizes, and header counts", {
  set.seed(67234)
  data <- genClusteredData(n=15, p=11, k_unclustered=2, cluster_size=3,
                           n_clusters=2, sig_clusters=1, sigma_eps_sq=10^(-6))
  x <- data$X
  y <- data$y
  good_clusters <- list(red_cluster=1L:3L, green_cluster=4L:6L)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B=10)

  smry <- summary(css_res)
  testthat::expect_true(inherits(smry, "summary.cssr"))

  ref <- getCssSelections(css_res)
  ref_df <- printCssDf(css_res)

  # Header counts are consistent with getCssSelections().
  testthat::expect_equal(smry$n_selected_clusts, length(ref$selected_clusts))
  testthat::expect_equal(smry$n_selected_feats, length(ref$selected_feats))
  testthat::expect_equal(smry$cutoff, 0)

  # The per-cluster table IS printCssDf()'s data.frame (Theta-hat = ClustSelProp,
  # plus ClustSize), so clusters / proportions / sizes all match.
  testthat::expect_identical(smry$table, ref_df)
  testthat::expect_equal(nrow(smry$table), 7)
  testthat::expect_equal(smry$n_selected_clusts, nrow(smry$table))
  testthat::expect_true(all(c("ClustName", "ClustSelProp", "ClustSize") %in%
                             colnames(smry$table)))
  testthat::expect_equal(smry$table[smry$table$ClustName=="red_cluster",
                                    "ClustSize"], 3)
  testthat::expect_equal(smry$table[smry$table$ClustName=="green_cluster",
                                    "ClustSize"], 3)

  # PFER placeholder present (to be filled in by #87).
  testthat::expect_true("pfer" %in% names(smry))
  testthat::expect_true(is.na(smry$pfer))

  # print.summary.cssr runs, prints the header + table, returns x invisibly.
  testthat::expect_invisible(print(smry))
  ret <- print(smry)
  testthat::expect_identical(ret, smry)
  testthat::expect_output(print(smry), "selected at cutoff")
  testthat::expect_output(print(smry), "ClustName")

  # S3 dispatch: summary(obj) reaches summary.cssr().
  testthat::expect_identical(summary(css_res), summary.cssr(css_res))

  # Invalid cutoff rejected via the reused check.
  testthat::expect_error(summary(css_res, cutoff=1.5),
                         "cutoff <= 1 is not TRUE", fixed=TRUE)
})

## Cluster stability selection summary
## 7 clusters / 7 features selected at cutoff 0
## 
##       ClustName ClustProtoNum ClustSelProp ClustSize
## 1   red_cluster             1         1.00         3
## 2            c3             7         1.00         1
## 3 green_cluster             6         0.80         3
## 4            c7            11         0.70         1
## 5            c6            10         0.65         1
## 6            c4             8         0.60         1
## 7            c5             9         0.40         1
## 
## PFER: NA (not yet implemented; see issue #87)
## Cluster stability selection summary
## 7 clusters / 7 features selected at cutoff 0
## 
##       ClustName ClustProtoNum ClustSelProp ClustSize
## 1   red_cluster             1         1.00         3
## 2            c3             7         1.00         1
## 3 green_cluster             6         0.80         3
## 4            c7            11         0.70         1
## 5            c6            10         0.65         1
## 6            c4             8         0.60         1
## 7            c5             9         0.40         1
## 
## PFER: NA (not yet implemented; see issue #87)
## Test passed with 18 successes.

Test for the empty-selection path of the print layer (issues #107 and #120). Post-#107 an empty selection is valid; #120 makes printCssDf(), print.cssr(), and getSelectionPrototypes() return clean empty output – a zero-row data.frame, a “(no clusters selected …)” message, and an empty integer vector, respectively – via an early return before the nrow >= 1 / n_selected_clusts >= 1 guards, rather than tripping those assertions. The block also covers the higher-level selected() and a zero-row summary.cssr().

testthat::test_that("selected() and summary() handle an empty selection", {
  # Same empty-producing fixture as the getCssSelections #107 test: fixed seed,
  # all-singleton clusters, and a large lambda so the base lasso selects almost
  # nothing; cutoff = 1 with min_num_clusts = 0 then selects no cluster.
  set.seed(8)
  B <- 10
  x <- matrix(stats::rnorm(30*8), nrow=30, ncol=8)
  y <- stats::rnorm(30)
  css_res <- css(X=x, y=y, lambda=0.5, clusters=list(), B=B)
  testthat::expect_true(max(colMeans(css_res$clus_sel_mat)) < 1 - 1/(2*B))

  # selected(): a clean empty list / empty integer vector, no error.
  empty_clusts <- selected(css_res, cutoff=1, min_num_clusts=0)
  testthat::expect_true(is.list(empty_clusts))
  testthat::expect_length(empty_clusts, 0)
  empty_feats <- selected(css_res, type="features", cutoff=1, min_num_clusts=0)
  testthat::expect_true(is.integer(empty_feats))
  testthat::expect_length(empty_feats, 0)

  # summary(): a well-formed zero-row summary (table NULL, "0 clusters" header),
  # NOT an opaque stopifnot() failure from printCssDf/getSelectionPrototypes.
  smry <- summary(css_res, cutoff=1, min_num_clusts=0)
  testthat::expect_true(inherits(smry, "summary.cssr"))
  testthat::expect_equal(smry$n_selected_clusts, 0)
  testthat::expect_equal(smry$n_selected_feats, 0)
  testthat::expect_true(is.null(smry$table))

  # print still works and reports 0 clusters / 0 features.
  testthat::expect_invisible(print(smry))
  testthat::expect_output(print(smry), "0 clusters / 0 features")

  # printCssDf() (#120): an empty selection returns a well-formed zero-row
  # data.frame with the SAME columns as a non-empty printCssDf() on the same
  # object, rather than tripping stopifnot(nrow >= 1). Unnamed X (this fixture)
  # -> 4 columns, no ClustProtoName.
  empty_df <- printCssDf(css_res, cutoff=1, min_num_clusts=0)
  testthat::expect_true(is.data.frame(empty_df))
  testthat::expect_equal(nrow(empty_df), 0)
  testthat::expect_equal(ncol(empty_df), 4)
  nonempty_df <- printCssDf(css_res, cutoff=0)
  testthat::expect_true(nrow(nonempty_df) >= 1)
  # The invariant that matters: empty and non-empty share the same column set.
  testthat::expect_identical(colnames(empty_df), colnames(nonempty_df))
  testthat::expect_identical(
    colnames(empty_df),
    c("ClustName", "ClustProtoNum", "ClustSelProp", "ClustSize"))
  # Correct 0-length column types.
  testthat::expect_true(is.character(empty_df$ClustName))
  testthat::expect_true(is.integer(empty_df$ClustProtoNum))
  testthat::expect_true(is.numeric(empty_df$ClustSelProp))
  testthat::expect_true(is.integer(empty_df$ClustSize))

  # Named X -> the 5-column path (adds ClustProtoName). Same data + column names,
  # so the selection is still empty at cutoff = 1 with min_num_clusts = 0.
  x_named <- x
  colnames(x_named) <- paste0("V", seq_len(ncol(x_named)))
  css_named <- css(X=x_named, y=y, lambda=0.5, clusters=list(), B=B)
  empty_named_df <- printCssDf(css_named, cutoff=1, min_num_clusts=0)
  testthat::expect_true(is.data.frame(empty_named_df))
  testthat::expect_equal(nrow(empty_named_df), 0)
  testthat::expect_equal(ncol(empty_named_df), 5)
  testthat::expect_true("ClustProtoName" %in% colnames(empty_named_df))
  testthat::expect_identical(colnames(empty_named_df),
                             colnames(printCssDf(css_named, cutoff=0)))

  # getSelectionPrototypes() on an empty list of clusters -> integer(0) (#120).
  testthat::expect_identical(getSelectionPrototypes(css_res, list()),
                             integer(0))

  # print.cssr() (#120): prints a "(no clusters selected ...)" message instead
  # of a table, does not error, and returns the cssr object invisibly unchanged.
  testthat::expect_output(
    ret <- print(css_res, cutoff=1, min_num_clusts=0),
    "no clusters selected")
  testthat::expect_identical(ret, css_res)
  testthat::expect_invisible(print(css_res, cutoff=1, min_num_clusts=0))
})

## Cluster stability selection summary
## 0 clusters / 0 features selected at cutoff 1
## (no clusters selected at this cutoff)
## 
## PFER: NA (not yet implemented; see issue #87)
## (no clusters selected at cutoff 1)
## Test passed with 30 successes.

Tests for plot.cssr(). All plotting is drawn to a null device (grDevices::pdf(NULL) / grDevices::dev.off()) so the tests produce no file or window. They cover the return value (the sorted selection proportions), the index-based feature highlight for an unnamed X (the B2 guard), ... passthrough to barplot() without a duplicate-argument crash (the B1 guard), and the empty-selection and single-cluster edge cases.

testthat::test_that("plot.cssr draws cluster proportions and returns them sorted", {
  # Named X so the feature matrix (tested below) carries feature names.
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  colnames(x) <- paste0("V", seq_len(ncol(x)))
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B=10)

  grDevices::pdf(NULL)  # draw to a null device (no file, no window)

  # (a) Clusters (default): runs without error and invisibly returns the sorted
  # named cluster selection proportions (a deterministic proof of the return).
  testthat::expect_invisible(plot(css_res))
  ret <- plot(css_res)
  testthat::expect_equal(ret,
    sort(colMeans(css_res$clus_sel_mat), decreasing=TRUE))
  testthat::expect_false(is.null(names(ret)))  # cluster names guaranteed

  # (b) Features, NAMED X: returns the sorted NAMED feature selection
  # proportions, and the (index-based) highlight matches getCssSelections().
  ret_f <- plot(css_res, type="features")
  props_f <- colMeans(css_res$feat_sel_mat)
  testthat::expect_equal(ret_f, sort(props_f, decreasing=TRUE))
  testthat::expect_identical(names(ret_f),
    paste0("V", order(props_f, decreasing=TRUE)))  # named + correctly ordered
  sel_f <- getCssSelections(css_res, weighting="sparse")
  ord <- order(props_f, decreasing=TRUE)
  is_sel <- (seq_along(props_f) %in% as.integer(sel_f$selected_feats))[ord]
  testthat::expect_equal(sum(is_sel), length(sel_f$selected_feats))
  testthat::expect_setequal(ord[is_sel], as.integer(sel_f$selected_feats))

  grDevices::dev.off()
})

testthat::test_that("plot.cssr feature highlight is index-based for unnamed X (B2)", {
  # genClusteredData returns X with NULL colnames -> feat_sel_mat is UNNAMED.
  # A name-based feature highlight would silently be all-FALSE; the method uses
  # an INDEX-based highlight, which must still be non-empty. This is the B2
  # regression proof.
  set.seed(721)
  dat <- genClusteredData(n=60, p=11, k_unclustered=2, cluster_size=4,
    n_clusters=1, snr=3)
  clusters <- list(cluster1=1:4)
  css_res <- suppressWarnings(css(X=dat$X, y=dat$y, lambda=0.01,
    clusters=clusters, B=10))

  # Precondition for the B2 guard: the feature matrix really is unnamed.
  testthat::expect_null(colnames(css_res$feat_sel_mat))

  grDevices::pdf(NULL)

  props <- colMeans(css_res$feat_sel_mat)
  ret <- plot(css_res, type="features", cutoff=0.3)
  # Returns the plain (unnamed) sorted colMeans regardless of the cutoff.
  testthat::expect_equal(ret, sort(props, decreasing=TRUE))
  testthat::expect_null(names(ret))

  # Re-derive the highlight exactly as plot.cssr does: INDEX-based.
  sel <- getCssSelections(css_res, weighting="sparse", cutoff=0.3)
  testthat::expect_true(length(sel$selected_feats) > 0)  # there ARE selections
  ord <- order(props, decreasing=TRUE)
  is_sel <- (seq_along(props) %in% as.integer(sel$selected_feats))[ord]
  testthat::expect_true(any(is_sel))                     # NON-empty highlight
  testthat::expect_equal(sum(is_sel), length(sel$selected_feats))
  # The highlighted bars are exactly the selected features, by ORIGINAL index.
  testthat::expect_setequal(ord[is_sel], as.integer(sel$selected_feats))

  # Contrast: a NAME-based highlight WOULD have failed here (names are NULL, so
  # names(props) %in% ... is logical(0) -> no bar highlighted).
  testthat::expect_false(any(names(props) %in%
    as.character(sel$selected_feats)))

  grDevices::dev.off()
})

testthat::test_that("plot.cssr forwards ... to barplot without a duplicate-arg crash (B1)", {
  # The method builds a SINGLE merged barplot arg list, so graphical params on
  # ... cannot collide with col/ylim/height (which previously crashed with
  # "formal argument 'col' matched by multiple actual arguments").
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  colnames(x) <- paste0("V", seq_len(ncol(x)))
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B=10)

  grDevices::pdf(NULL)
  testthat::expect_no_error(plot(css_res, col="red"))
  testthat::expect_no_error(plot(css_res, ylim=c(0, 0.5)))
  testthat::expect_no_error(plot(css_res, main="x"))
  testthat::expect_no_error(plot(css_res, type="features", col="red",
    main="feats"))
  grDevices::dev.off()
})

testthat::test_that("plot.cssr handles an empty selection and a single cluster", {
  # (e) Empty selection: min_num_clusts = 0 with a cutoff strictly above every
  # cluster proportion selects nothing (min_num_clusts = 1 is never empty). All
  # bars get unsel_col, there is no highlight, and there is no crash.
  set.seed(8)
  B <- 10
  x <- matrix(stats::rnorm(30*8), nrow=30, ncol=8)
  y <- stats::rnorm(30)
  css_res <- css(X=x, y=y, lambda=0.5, clusters=list(), B=B)
  max_prop <- max(colMeans(css_res$clus_sel_mat))
  testthat::expect_true(max_prop < 1)
  cut <- (max_prop + 1) / 2  # strictly between max_prop and 1 (and <= 1)
  sel <- getCssSelections(css_res, cutoff=cut, min_num_clusts=0)
  testthat::expect_length(sel$selected_clusts, 0)  # genuinely empty

  grDevices::pdf(NULL)
  # A bare call that errored would fail the test -> this asserts "no crash".
  ret <- plot(css_res, min_num_clusts=0, cutoff=cut)
  testthat::expect_equal(ret,
    sort(colMeans(css_res$clus_sel_mat), decreasing=TRUE))
  grDevices::dev.off()

  # (f) Single-cluster css: clus_sel_mat has exactly one column -> a length-1
  # barplot, no error.
  set.seed(1)
  x1 <- matrix(stats::rnorm(10*4), nrow=10, ncol=4)
  y1 <- stats::rnorm(10)
  css_1 <- css(X=x1, y=y1, lambda=0.01, clusters=list(all=1:4), B=10)
  testthat::expect_equal(ncol(css_1$clus_sel_mat), 1)

  grDevices::pdf(NULL)
  ret1 <- plot(css_1)
  testthat::expect_length(ret1, 1)
  grDevices::dev.off()
})

testthat::test_that("plot.cssr surfaces the tie-breach warning (#159c)", {
  # plot.cssr no longer wraps getCssSelections in suppressWarnings, so a
  # max_num_clusts breach reaches the user. Two clusters tied at proportion 1.0
  # with max_num_clusts = 1 cannot be reduced below 2 without emptying the
  # selection, so checkSelectedClusters warns. Pre-#159c the blanket
  # suppressWarnings swallowed it, so this test FAILS on the old code.
  B <- 10
  clusters <- list(c1 = 1L, c2 = 2L)
  clus_sel_mat <- cbind(c1 = rep(1L, B), c2 = rep(1L, B))
  feat_sel_mat <- cbind(f1 = rep(1L, B), f2 = rep(1L, B))
  obj <- structure(list(feat_sel_mat = feat_sel_mat, clus_sel_mat = clus_sel_mat,
                        clusters = clusters), class = "cssr")

  grDevices::pdf(NULL)  # draw to a null device (no file, no window)
  on.exit(grDevices::dev.off(), add = TRUE)
  testthat::expect_warning(plot(obj, max_num_clusts = 1),
    "Returning more than max_num_clusts", fixed = TRUE)
})

## Test passed with 7 successes.

## Test passed with 8 successes.

## Test passed with 4 successes.

## Test passed with 5 successes.

## Test passed with 1 success.

Two further tests pin down the issue #160 fixes. Both mock the graphics drawing primitives with testthat::with_mocked_bindings() so they can assert on the arguments the method actually passes to them. The first checks that a user-supplied col in ... now reaches barplot() verbatim – and that, with no col, the method’s own highlight colours still apply. The second checks that the dashed cutoff line is drawn only for type = "clusters" (whose axis matches the highlight) and never for type = "features".

testthat::test_that("plot.cssr honours a user col in ... and otherwise applies the highlight (#160a)", {
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  colnames(x) <- paste0("V", seq_len(ncol(x)))
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B=10)

  # barplot is mocked (nothing is drawn) and cutoff = 0 (no abline), so no real
  # graphics call is made; the device is opened only defensively.
  grDevices::pdf(NULL)
  on.exit(grDevices::dev.off(), add=TRUE)

  # (a1) A user col in ... must reach barplot() verbatim. Before #160a the method
  # overwrote it with the highlight colours (this asserted "red" but got
  # steelblue/grey70).
  captured <- NULL
  testthat::with_mocked_bindings(
    barplot = function(...) captured <<- list(...),
    plot(css_res, col="red"),
    .package = "graphics"
  )
  testthat::expect_identical(captured$col, "red")

  # (a2) With no col the method's own highlight must STILL apply -- guards against
  # over-fixing (a1) by dropping the default. max_num_clusts=2 forces a genuine
  # mix (>=1 selected, >=1 unselected) so both ifelse branches are exercised.
  captured2 <- NULL
  testthat::with_mocked_bindings(
    barplot = function(...) captured2 <<- list(...),
    plot(css_res, max_num_clusts=2),
    .package = "graphics"
  )
  props <- colMeans(css_res$clus_sel_mat)
  sel <- suppressWarnings(getCssSelections(css_res, weighting="sparse",
    cutoff=0, min_num_clusts=1, max_num_clusts=2))
  is_sel <- (names(props) %in% names(sel$selected_clusts))[order(props,
    decreasing=TRUE)]
  testthat::expect_identical(captured2$col, ifelse(is_sel, "steelblue", "grey70"))
  testthat::expect_true("steelblue" %in% captured2$col)  # a genuine mix, so both
  testthat::expect_true("grey70" %in% captured2$col)     # branches are covered
})

testthat::test_that("plot.cssr draws the cutoff line only for type='clusters' (#160b)", {
  set.seed(26717)
  x <- matrix(stats::rnorm(10*7), nrow=10, ncol=7)
  colnames(x) <- paste0("V", seq_len(ncol(x)))
  y <- stats::rnorm(10)
  good_clusters <- list("apple"=1:2, "banana"=3:4, "cantaloupe"=5)
  css_res <- css(X=x, y=y, lambda=0.01, clusters=good_clusters, B=10)

  # Real barplot draws to a null device; only abline is mocked (a flag/capture).
  grDevices::pdf(NULL)
  on.exit(grDevices::dev.off(), add=TRUE)

  # (b1) type="features": the line would sit on the feature-proportion axis while
  # highlighting follows cluster selection, so no reference line is drawn (#160b).
  called <- FALSE
  testthat::with_mocked_bindings(
    abline = function(...) called <<- TRUE,
    plot(css_res, type="features", cutoff=0.6),
    .package = "graphics"
  )
  testthat::expect_false(called)

  # (b2) type="clusters": the line's axis matches the highlight, so it IS drawn at
  # h = cutoff.
  called <- FALSE
  ab_args <- NULL
  testthat::with_mocked_bindings(
    abline = function(...) { called <<- TRUE; ab_args <<- list(...) },
    plot(css_res, type="clusters", cutoff=0.6),
    .package = "graphics"
  )
  testthat::expect_true(called)
  testthat::expect_equal(ab_args$h, 0.6)

  # (b3) cutoff=0: no line in either mode (unchanged behaviour).
  called <- FALSE
  testthat::with_mocked_bindings(
    abline = function(...) called <<- TRUE,
    plot(css_res, type="clusters", cutoff=0),
    .package = "graphics"
  )
  testthat::expect_false(called)
})

## Test passed with 4 successes.

## Test passed with 4 successes.

Byte-identity tests for the issue #129 performance micro-optimizations. Each asserts that the de-duplicated / hoisted path returns a value identical to the recomputed path it replaced. Per issue #129 these compare RETURN VALUES (wrapped in suppressWarnings), never warning counts: Opts 2 and 3 intentionally collapse repeated getSelectedClusters() calls, which only de-duplicates a warning side-channel and leaves the returned objects unchanged.

testthat::test_that("Opt 2 (#129): formCssDesign weights= equals internal recompute; getCssPreds finite", {
  set.seed(721)
  dat <- genClusteredData(n = 60, p = 11, k_unclustered = 2, cluster_size = 4,
      n_clusters = 1, snr = 3)
  clusters <- list(cluster1 = 1:4)
  res <- suppressWarnings(css(X = dat$X, y = dat$y, lambda = 0.01,
      clusters = clusters, B = 15))

  # The hoist: passing weights= (as getCssPreds now does) must build a design
  # matrix byte-identical to letting formCssDesign() recompute the weights via
  # getSelectedClusters(), across every weighting rule.
  for(w in c("sparse", "weighted_avg", "simple_avg")){
    wts <- getSelectedClusters(res, w, 0, 1, NA)$weights
    d_recompute <- suppressWarnings(formCssDesign(res, weighting = w,
        newx = dat$X))
    d_passed <- suppressWarnings(formCssDesign(res, weighting = w,
        newx = dat$X, weights = wts))
    testthat::expect_identical(d_passed, d_recompute)
  }

  # End-to-end getCssPreds still returns finite predictions of the right length
  # for both the provided-trainX path and the train_inds path, across weightings.
  res_ti <- suppressWarnings(css(X = dat$X, y = dat$y, lambda = 0.01,
      clusters = clusters, B = 15, train_inds = 41:60))
  for(w in c("sparse", "weighted_avg", "simple_avg")){
    p_prov <- suppressWarnings(getCssPreds(res, testX = dat$X[1:10, ],
        trainX = dat$X[11:40, ], trainY = dat$y[11:40], weighting = w))
    testthat::expect_length(p_prov, 10)
    testthat::expect_true(all(is.finite(p_prov)))
    p_ti <- suppressWarnings(getCssPreds(res_ti, testX = dat$X[1:10, ],
        weighting = w))
    testthat::expect_length(p_ti, 10)
    testthat::expect_true(all(is.finite(p_ti)))
  }
})

testthat::test_that("Opt 3 (#129): buildCssDf equals printCssDf; summary$table reuses it", {
  set.seed(824)
  dat <- genClusteredData(n = 55, p = 11, k_unclustered = 2, cluster_size = 4,
      n_clusters = 1, snr = 3)
  clusters <- list(cluster1 = 1:4)
  res <- suppressWarnings(css(X = dat$X, y = dat$y, lambda = 0.01,
      clusters = clusters, B = 12))

  # buildCssDf(obj, getCssSelections(obj, ...)$selected_clusts) reproduces
  # printCssDf(obj, ...) exactly, across cutoffs (including the empty selection
  # at cutoff = 1 with min_num_clusts = 0).
  for(cut in c(0, 0.2, 0.5, 1)){
    sc <- suppressWarnings(getCssSelections(res, cutoff = cut,
        min_num_clusts = 0))$selected_clusts
    testthat::expect_identical(
        suppressWarnings(buildCssDf(res, sc)),
        suppressWarnings(printCssDf(res, cutoff = cut, min_num_clusts = 0)))
  }

  # summary.cssr()$table (now built via buildCssDf() on summary's own
  # sel$selected_clusts) equals printCssDf() on the same object across
  # weightings -- cluster selection is weighting-invariant, so reusing summary's
  # sel$selected_clusts is byte-identical to printCssDf's internal recompute.
  ref <- suppressWarnings(printCssDf(res))
  for(w in c("sparse", "weighted_avg", "simple_avg")){
    s <- suppressWarnings(summary(res, weighting = w))
    testthat::expect_identical(s$table, ref)
  }
})

## Test passed with 15 successes.

## Test passed with 7 successes.