11 Tests for wrapper functions

Tests for cssSelect():

testthat::test_that("cssSelect works", {
  set.seed(73212)
  
  data <- genClusteredData(n=15, p=11, k_unclustered=2, cluster_size=3,
                        n_clusters=2, sig_clusters=1, sigma_eps_sq=1)
  
  x <- data$X
  y <- data$y
  
  # Intentionally don't provide clusters for all features, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1L:3L, 4:6)
  
  res <- cssSelect(X=x, y=y, clusters=good_clusters)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  testthat::expect_true(!is.null(names(res$selected_clusts)))
  testthat::expect_true(is.character(names(res$selected_clusts)))
  testthat::expect_true(length(res$selected_clusts) <=
                          length(res$selected_feats))
  # Total of 11 - 2*(3 - 1) = 7 clusters
  testthat::expect_true(length(res$selected_clusts) <= 7)
  testthat::expect_true(length(res$selected_clusts) >= 1)

  testthat::expect_true(is.list(res$selected_clusts))
  testthat::expect_equal(length(names(res$selected_clusts)),
                           length(unique(names(res$selected_clusts))))
  already_used_feats <- integer()
  for(i in 1:length(res$selected_clusts)){
    sels_i <- res$selected_clusts[[i]]
    testthat::expect_true(length(sels_i) >= 1)
    testthat::expect_true(is.integer(sels_i))
    testthat::expect_true(all(sels_i %in% 1:11))
    testthat::expect_equal(length(sels_i), length(unique(sels_i)))
    testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
    already_used_feats <- c(already_used_feats, sels_i)
  }
  testthat::expect_true(length(already_used_feats) <= 11)
  testthat::expect_equal(length(already_used_feats),
                         length(unique(already_used_feats)))
  testthat::expect_true(all(already_used_feats %in% 1:11))
  
  testthat::expect_true(length(res$selected_feats) <= 11)
  testthat::expect_true(is.integer(res$selected_feats))
  testthat::expect_true(length(res$selected_feats) >= 1)
  testthat::expect_equal(length(names(res$selected_feats)),
                         length(unique(names(res$selected_feats))))
  testthat::expect_true(all(res$selected_feats >= 1))
  testthat::expect_true(all(res$selected_feats <= 11))
  testthat::expect_equal(length(res$selected_feats),
                             length(unique(res$selected_feats)))
  
  # No provided clusters
  
  res <- cssSelect(X=x, y=y)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  testthat::expect_true(!is.null(names(res$selected_clusts)))
  testthat::expect_true(is.character(names(res$selected_clusts)))
  testthat::expect_true(length(res$selected_clusts) <=
                          length(res$selected_feats))

  testthat::expect_true(length(res$selected_clusts) <= 11)
  testthat::expect_true(length(res$selected_clusts) >= 1)

  testthat::expect_true(is.list(res$selected_clusts))
  testthat::expect_equal(length(names(res$selected_clusts)),
                           length(unique(names(res$selected_clusts))))
  already_used_feats <- integer()
  for(i in 1:length(res$selected_clusts)){
    sels_i <- res$selected_clusts[[i]]
    testthat::expect_true(length(sels_i) >= 1)
    testthat::expect_true(is.integer(sels_i))
    testthat::expect_true(all(sels_i %in% 1:11))
    testthat::expect_equal(length(sels_i), length(unique(sels_i)))
    testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
    already_used_feats <- c(already_used_feats, sels_i)
  }
  testthat::expect_true(length(already_used_feats) <= 11)
  testthat::expect_equal(length(already_used_feats),
                         length(unique(already_used_feats)))
  testthat::expect_true(all(already_used_feats %in% 1:11))
  
  testthat::expect_true(length(res$selected_feats) <= 11)
  testthat::expect_true(is.integer(res$selected_feats))
  testthat::expect_true(length(res$selected_feats) >= 1)
  testthat::expect_equal(length(names(res$selected_feats)),
                         length(unique(names(res$selected_feats))))
  testthat::expect_true(all(res$selected_feats >= 1))
  testthat::expect_true(all(res$selected_feats <= 11))
  testthat::expect_equal(length(res$selected_feats),
                             length(unique(res$selected_feats)))

  ## Trying other inputs

  # X as a data.frame
  X_df <- datasets::mtcars
  
  res <- cssSelect(X=X_df, y=stats::rnorm(nrow(X_df)), clusters=1:3)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  testthat::expect_true(!is.null(names(res$selected_clusts)))
  testthat::expect_true(is.character(names(res$selected_clusts)))
  testthat::expect_true(length(res$selected_clusts) <=
                          length(res$selected_feats))
  # Total of ncol(X_df) - (3 - 1) clusters
  testthat::expect_true(length(res$selected_clusts) <= ncol(X_df) - 2)
  testthat::expect_true(length(res$selected_clusts) >= 1)

  testthat::expect_true(is.list(res$selected_clusts))
  testthat::expect_equal(length(names(res$selected_clusts)),
                           length(unique(names(res$selected_clusts))))
  already_used_feats <- integer()
  for(i in 1:length(res$selected_clusts)){
    sels_i <- res$selected_clusts[[i]]
    testthat::expect_true(length(sels_i) >= 1)
    testthat::expect_true(is.integer(sels_i))
    testthat::expect_true(all(sels_i %in% 1:ncol(X_df)))
    testthat::expect_equal(length(sels_i), length(unique(sels_i)))
    testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
    already_used_feats <- c(already_used_feats, sels_i)
  }
  testthat::expect_true(length(already_used_feats) <= ncol(X_df))
  testthat::expect_equal(length(already_used_feats),
                         length(unique(already_used_feats)))
  testthat::expect_true(all(already_used_feats %in% 1:ncol(X_df)))

  testthat::expect_true(length(res$selected_feats) <= ncol(X_df))
  testthat::expect_true(is.integer(res$selected_feats))
  testthat::expect_true(length(res$selected_feats) >= 1)
  testthat::expect_equal(length(names(res$selected_feats)),
                         length(unique(names(res$selected_feats))))
  testthat::expect_true(all(res$selected_feats >= 1))
  testthat::expect_true(all(res$selected_feats <= ncol(X_df)))
  testthat::expect_equal(length(res$selected_feats),
                             length(unique(res$selected_feats)))

  # X as a dataframe with factors (number of columns of final design matrix
  # after one-hot encoding factors won't match number of columns of df2)
  # cyl, gear, and carb are factors with more than 2 levels
  df2 <- X_df
  df2$cyl <- as.factor(df2$cyl)
  df2$vs <- as.factor(df2$vs)
  df2$am <- as.factor(df2$am)
  df2$gear <- as.factor(df2$gear)
  df2$carb <- as.factor(df2$carb)
  
  # Should get error if I try to use data.frame with clusters, since data.frame
  # has factors with more than two levels
  testthat::expect_error(cssSelect(X=df2, y=stats::rnorm(nrow(X_df)),
                                   clusters=1:3),
                         "When stats::model.matrix converted the provided data.frame X to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert the data.frame X to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.", fixed=TRUE)
  
  # Should be fine if I don't use clusters
  res <- cssSelect(X=df2, y=stats::rnorm(nrow(X_df)))
  
  X_df_mat <- stats::model.matrix(~ ., df2)
  X_df_mat <- X_df_mat[, colnames(X_df_mat) != "(Intercept)"]
  p <- ncol(X_df_mat)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  testthat::expect_true(!is.null(names(res$selected_clusts)))
  testthat::expect_true(is.character(names(res$selected_clusts)))
  testthat::expect_true(length(res$selected_clusts) <=
                          length(res$selected_feats))
  # Total of p - (3 - 1) clusters
  testthat::expect_true(length(res$selected_clusts) <= p - 2)
  testthat::expect_true(length(res$selected_clusts) >= 1)

  testthat::expect_true(is.list(res$selected_clusts))
  testthat::expect_equal(length(names(res$selected_clusts)),
                           length(unique(names(res$selected_clusts))))
  already_used_feats <- integer()
  for(i in 1:length(res$selected_clusts)){
    sels_i <- res$selected_clusts[[i]]
    testthat::expect_true(length(sels_i) >= 1)
    testthat::expect_true(is.integer(sels_i))
    testthat::expect_true(all(sels_i %in% 1:p))
    testthat::expect_equal(length(sels_i), length(unique(sels_i)))
    testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
    already_used_feats <- c(already_used_feats, sels_i)
  }
  testthat::expect_true(length(already_used_feats) <= p)
  testthat::expect_equal(length(already_used_feats),
                         length(unique(already_used_feats)))
  testthat::expect_true(all(already_used_feats %in% 1:p))

  testthat::expect_true(length(res$selected_feats) <= p)
  testthat::expect_true(is.integer(res$selected_feats))
  testthat::expect_true(length(res$selected_feats) >= 1)
  testthat::expect_equal(length(names(res$selected_feats)),
                         length(unique(names(res$selected_feats))))
  testthat::expect_true(all(res$selected_feats >= 1))
  testthat::expect_true(all(res$selected_feats <= p))
  testthat::expect_equal(length(res$selected_feats),
                             length(unique(res$selected_feats)))


  # X as a matrix with column names
  x2 <- x
  colnames(x2) <- LETTERS[1:11]
  
  res <- cssSelect(X=x2, y=y, clusters=good_clusters)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  testthat::expect_true(!is.null(names(res$selected_clusts)))
  testthat::expect_true(is.character(names(res$selected_clusts)))
  testthat::expect_true(length(res$selected_clusts) <=
                          length(res$selected_feats))
  # Total of 11 - 2*(3 - 1) = 7 clusters
  testthat::expect_true(length(res$selected_clusts) <= 7)
  testthat::expect_true(length(res$selected_clusts) >= 1)

  testthat::expect_true(is.list(res$selected_clusts))
  testthat::expect_equal(length(names(res$selected_clusts)),
                           length(unique(names(res$selected_clusts))))
  already_used_feats <- integer()
  for(i in 1:length(res$selected_clusts)){
    sels_i <- res$selected_clusts[[i]]
    testthat::expect_true(length(sels_i) >= 1)
    testthat::expect_true(is.integer(sels_i))
    testthat::expect_true(all(sels_i %in% 1:11))
    testthat::expect_equal(length(sels_i), length(unique(sels_i)))
    testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
    already_used_feats <- c(already_used_feats, sels_i)
  }
  testthat::expect_true(length(already_used_feats) <= 11)
  testthat::expect_equal(length(already_used_feats),
                         length(unique(already_used_feats)))
  testthat::expect_true(all(already_used_feats %in% 1:11))
  
  testthat::expect_true(length(res$selected_feats) <= 11)
  testthat::expect_true(is.integer(res$selected_feats))
  testthat::expect_true(length(res$selected_feats) >= 1)
  testthat::expect_equal(length(names(res$selected_feats)),
                         length(unique(names(res$selected_feats))))
  testthat::expect_true(all(res$selected_feats >= 1))
  testthat::expect_true(all(res$selected_feats <= 11))
  testthat::expect_equal(length(res$selected_feats),
                             length(unique(res$selected_feats)))
  
  # Vary inputs
  res <- cssSelect(X=x, y=y, clusters=good_clusters, lambda=0.01)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  res <- cssSelect(X=x, y=y, clusters=good_clusters, cutoff=0.6)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  
  res <- cssSelect(X=x, y=y, clusters=good_clusters, max_num_clusts=6)
  
  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  testthat::expect_true(length(res$selected_clusts) <= 6)
  
  res <- cssSelect(X=x, y=y, clusters=good_clusters, auto_select_size=FALSE)

  testthat::expect_true(is.list(res))
  testthat::expect_equal(length(res), 3)
  testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
                                           "weights"))
  # Total of 11 - 2*(3 - 1) = 7 clusters
  testthat::expect_equal(length(res$selected_clusts), 7)
  
  # Bad inputs
  testthat::expect_error(cssSelect(X=x[1:10, ], y=y),
                         "n == length(y) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=character(5), y=y),
                         "is.matrix(X) | is.data.frame(X) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=matrix(1:15, 5, 3)),
                         "!is.matrix(y) is not TRUE", fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=factor(rbinom(15, size=1, prob=.5))),
                         "The provided y must be real-valued, because cssSelect uses the lasso for feature selection. (In order to use a different form of response, use the css function and provide your own selection function accommodating your choice of y.)",
                         fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=y, clusters="clusters"),
                         "is.numeric(clusters) | is.integer(clusters) is not TRUE",
                         fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=y, lambda=-.1),
                         "For method cssLasso, lambda must be nonnegative.",
                         fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=y, cutoff=1.1),
                         "cutoff <= 1 is not TRUE", fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=y, max_num_clusts=1000),
                         "max_num_clusts <= p is not TRUE", fixed=TRUE)
  
  testthat::expect_error(cssSelect(X=x, y=y, auto_select_size=1),
                         "is.logical(auto_select_size) is not TRUE", fixed=TRUE)

  # alpha = 0 (degenerate ridge) is rejected
  testthat::expect_error(cssSelect(X=x, y=y, alpha=0),
                         "alpha > 0 is not TRUE", fixed=TRUE)

  # alpha > 1 is rejected
  testthat::expect_error(cssSelect(X=x, y=y, alpha=1.5),
                         "alpha <= 1 is not TRUE", fixed=TRUE)

  # alpha = NA is rejected with a clear message (rather than a cryptic error
  # deep inside the bundling/subsampling logic). A bare (logical) NA trips the
  # type check first; a numeric NA reaches the dedicated !is.na guard.
  testthat::expect_error(cssSelect(X=x, y=y, alpha=NA),
                         "is.numeric(alpha) | is.integer(alpha) is not TRUE",
                         fixed=TRUE)
  testthat::expect_error(cssSelect(X=x, y=y, alpha=NA_real_),
                         "!is.na(alpha) is not TRUE", fixed=TRUE)

  # An NA in the design matrix yields the shared friendly message (#56).
  x_na <- x; x_na[2, 1] <- NA
  testthat::expect_error(cssSelect(X=x_na, y=y),
                         "must not contain missing", fixed=TRUE)
})
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters)
##  2.   \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y)
##  2.   \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = X_df, y = stats::rnorm(nrow(X_df)), clusters = 1:3)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Returning more than max_num_clusts = 1 clusters because increasing the cutoff any further would require returning 0 clusters
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = X_df, y = stats::rnorm(nrow(X_df)), clusters = 1:3)
##  2.   \-litr (local) getCssSelections(...)
##  3.     \-litr (local) getSelectedClusters(...)
##  4.       \-litr (local) checkSelectedClusters(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = df2, y = stats::rnorm(nrow(X_df)))
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x2, y = y, clusters = good_clusters)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x2, y = y, clusters = good_clusters)
##  2.   \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, lambda = 0.01)
##  2.   \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, cutoff = 0.6)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, max_num_clusts = 6)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, auto_select_size = FALSE)
##  2.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssSelect(X = x, y = matrix(1:15, 5, 3))
##   8.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssSelect(X = x, y = y, clusters = "clusters")
##   8.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssSelect(X = x, y = y, cutoff = 1.1)
##   8.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssSelect(X = x, y = y, max_num_clusts = 1000)
##   8.   \-litr (local) getLassoLambda(X, y, alpha = alpha)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## Test passed with 219 successes.

Tests confirming that a non-default alpha is threaded through cssSelect() end-to-end and changes the selected set on a correlated-block design (this is the wrapper-level analogue of the cssLasso headline test; it is new behavior absent on main, where alpha could not affect selection):

testthat::test_that("cssSelect threads alpha through to selection", {
  set.seed(1)
  n <- 100
  p <- 20
  rho <- 0.9
  block <- 4
  Z <- matrix(stats::rnorm(n*p), n, p)
  common <- stats::rnorm(n)
  for(j in 1:block){
    Z[, j] <- sqrt(rho)*common + sqrt(1 - rho)*Z[, j]
  }
  y <- as.numeric(Z %*% c(rep(1, block), rep(0, p - block)) + stats::rnorm(n))

  # Treat the correlated block as one cluster; the remaining features are
  # singletons. Fix lambda and the model size so the only thing that varies
  # between the two calls is alpha.
  clusters <- list(block=1:block)

  res_lasso <- cssSelect(X=Z, y=y, clusters=clusters, lambda=3.72251,
                         max_num_clusts=p - block + 1, alpha=1)
  res_enet <- cssSelect(X=Z, y=y, clusters=clusters, lambda=3.72251,
                        max_num_clusts=p - block + 1, alpha=0.5)

  # Both return a well-formed selection
  for(res in list(res_lasso, res_enet)){
    testthat::expect_true(is.list(res))
    testthat::expect_true(all(c("selected_clusts", "selected_feats") %in%
                                 names(res)))
    testthat::expect_true(is.integer(res$selected_feats))
    testthat::expect_true(all(res$selected_feats %in% 1:p))
  }

  # The elastic net selects (weakly) more features than the pure lasso on this
  # correlated design, and the two selected sets differ.
  testthat::expect_true(length(res_enet$selected_feats) >=
                          length(res_lasso$selected_feats))
  testthat::expect_false(setequal(res_enet$selected_feats,
                                  res_lasso$selected_feats))
})
## Test passed with 10 successes.

Tests for cssPredict():

testthat::test_that("cssPredict works", {
  set.seed(84231)
  
  train_data <- genClusteredData(n=30, p=11, k_unclustered=2, cluster_size=3,
                              n_clusters=2, sig_clusters=1, sigma_eps_sq=1)
  
  x <- train_data$X
  y <- train_data$y
  
  test_x <- genClusteredData(n=5, p=11, k_unclustered=2, cluster_size=3,
                          n_clusters=2, sig_clusters=1, sigma_eps_sq=1)$X
  
  # Intentionally don't provide clusters for all features, mix up formatting,
  # etc.
  good_clusters <- list(red_cluster=1L:3L, 4:6)
  
  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                    clusters=good_clusters)
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)
  
  # No provided clusters

  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x)

  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)
  
  # Provide training indices
  
  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                    train_inds=13:28)
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  ## Trying other inputs

  # X as a data.frame
  X_df <- datasets::mtcars
  
  n <- nrow(X_df)
  test_inds <- 1:round(n/3)
  n_test <- length(test_inds)
  selec_train_inds <- setdiff(1:n, test_inds)
  n_selec_train <- length(selec_train_inds)

  res <- cssPredict(X_train_selec=X_df[selec_train_inds, ],
                    y_train_selec=stats::rnorm(n_selec_train),
                    X_test=X_df[test_inds, ])
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), n_test)

  # X as a dataframe with factors (number of columns of final design matrix
  # after one-hot encoding factors won't match number of columns of df2)
  # cyl, gear, and carb are factors with more than 2 levels
  df2 <- X_df
  df2$cyl <- as.factor(df2$cyl)
  df2$vs <- as.factor(df2$vs)
  df2$am <- as.factor(df2$am)
  df2$gear <- as.factor(df2$gear)
  df2$carb <- as.factor(df2$carb)
  
  # Should get error if clusters are provided because df2 contains factors with
  # more than two levels
  testthat::expect_error(cssPredict(X_train_selec=df2[selec_train_inds, ],
                                    y_train_selec=stats::rnorm(n_selec_train),
                                    X_test=df2[test_inds, ], clusters=1:3),
                         "When stats::model.matrix converted the provided data.frame X_train_selec to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert X_train_selec to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.",
                         fixed=TRUE)
  
  # Should be fine if no clusters are provided
  res <- cssPredict(X_train_selec=df2[selec_train_inds, ],
                    y_train_selec=stats::rnorm(n_selec_train),
                    X_test=df2[test_inds, ])
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), n_test)

  # X as a matrix with column names
  x2 <- x
  colnames(x2) <- LETTERS[1:11]
  test_x2 <- test_x
  colnames(test_x2) <- LETTERS[1:11]

  res <- cssPredict(X_train_selec=x2, y_train_selec=y, X_test=test_x2,
                    clusters=good_clusters)
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  # Vary inputs
  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                    lambda=0.01)
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x, cutoff=0.6)
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                    max_num_clusts=6)
  
  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                    auto_select_size=FALSE)

  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  # Non-default alpha (elastic net) threaded through end-to-end; smoke test
  # confirming it returns a well-formed prediction vector of the right length.
  res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
                    alpha=0.5)

  testthat::expect_true(all(!is.na(res)))
  testthat::expect_true(is.numeric(res))
  testthat::expect_equal(length(res), 5)

  # Bad inputs
  testthat::expect_error(cssPredict(X_train_selec=x[1:10, ], y_train_selec=y,
                                    X_test=test_x),
                         "length(y) == n is not TRUE", fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=character(30),
                                    y_train_selec=y, X_test=test_x),
                         "is.matrix(X_train_selec) | is.data.frame(X_train_selec) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x,
                                    y_train_selec=matrix(1:30, 10, 3),
                                    X_test=test_x), "!is.matrix(y) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x,
                                    y_train_selec=factor(rbinom(30, size=1,
                                                                prob=.5)),
                                    X_test=test_x),
                         "The provided y_train_selec must be real-valued, because predictions will be generated by ordinary least squares regression.",
                         fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, clusters="clusters"),
                         "is.numeric(clusters) | is.integer(clusters) is not TRUE",
                         fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, lambda="lambda"),
                         "For method cssLasso, lambda must be a numeric.",
                         fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, cutoff=-.1),
                         "cutoff >= 0 is not TRUE", fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, max_num_clusts=0),
                         "max_num_clusts >= 1 is not TRUE", fixed=TRUE)

  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, auto_select_size=c(TRUE,
                                                                      FALSE)),
                         "length(auto_select_size) == 1 is not TRUE",
                         fixed=TRUE)

  # alpha = 0 (degenerate ridge) is rejected
  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, alpha=0),
                         "alpha > 0 is not TRUE", fixed=TRUE)

  # alpha > 1 is rejected
  testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
                                    X_test=test_x, alpha=1.5),
                         "alpha <= 1 is not TRUE", fixed=TRUE)
})
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(X_train_selec = x, y_train_selec = y, X_test = test_x)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(X_train_selec = x, y_train_selec = y, X_test = test_x)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getLassoLambda(...)
##  3.     \-glmnet::cv.glmnet(...)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##     x
##  1. \-litr (local) cssPredict(...)
##  2.   \-litr (local) getModelSize(...)
##  3.     \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
##  4.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssPredict(X_train_selec = x[1:10, ], y_train_selec = y, X_test = test_x)
##   8.   \-litr (local) getLassoLambda(...)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssPredict(...)
##   8.   \-litr (local) getLassoLambda(...)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssPredict(...)
##   8.   \-litr (local) getLassoLambda(...)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssPredict(...)
##   8.   \-litr (local) getLassoLambda(...)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
##      x
##   1. +-testthat::expect_error(...)
##   2. | \-testthat:::expect_condition_matching_(...)
##   3. |   \-testthat:::quasi_capture(...)
##   4. |     +-testthat (local) .capture(...)
##   5. |     | \-base::withCallingHandlers(...)
##   6. |     \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
##   7. \-litr (local) cssPredict(...)
##   8.   \-litr (local) getLassoLambda(...)
##   9.     \-glmnet::cv.glmnet(...)
##  10.       \-glmnet:::cv.glmnet.raw(...)
## Test passed with 45 successes.