11 Tests for wrapper functions
Tests for cssSelect():
testthat::test_that("cssSelect works", {
set.seed(73212)
data <- genClusteredData(n=15, p=11, k_unclustered=2, cluster_size=3,
n_clusters=2, sig_clusters=1, sigma_eps_sq=1)
x <- data$X
y <- data$y
# Intentionally don't provide clusters for all features, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1L:3L, 4:6)
res <- cssSelect(X=x, y=y, clusters=good_clusters)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
testthat::expect_true(!is.null(names(res$selected_clusts)))
testthat::expect_true(is.character(names(res$selected_clusts)))
testthat::expect_true(length(res$selected_clusts) <=
length(res$selected_feats))
# Total of 11 - 2*(3 - 1) = 7 clusters
testthat::expect_true(length(res$selected_clusts) <= 7)
testthat::expect_true(length(res$selected_clusts) >= 1)
testthat::expect_true(is.list(res$selected_clusts))
testthat::expect_equal(length(names(res$selected_clusts)),
length(unique(names(res$selected_clusts))))
already_used_feats <- integer()
for(i in 1:length(res$selected_clusts)){
sels_i <- res$selected_clusts[[i]]
testthat::expect_true(length(sels_i) >= 1)
testthat::expect_true(is.integer(sels_i))
testthat::expect_true(all(sels_i %in% 1:11))
testthat::expect_equal(length(sels_i), length(unique(sels_i)))
testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
already_used_feats <- c(already_used_feats, sels_i)
}
testthat::expect_true(length(already_used_feats) <= 11)
testthat::expect_equal(length(already_used_feats),
length(unique(already_used_feats)))
testthat::expect_true(all(already_used_feats %in% 1:11))
testthat::expect_true(length(res$selected_feats) <= 11)
testthat::expect_true(is.integer(res$selected_feats))
testthat::expect_true(length(res$selected_feats) >= 1)
testthat::expect_equal(length(names(res$selected_feats)),
length(unique(names(res$selected_feats))))
testthat::expect_true(all(res$selected_feats >= 1))
testthat::expect_true(all(res$selected_feats <= 11))
testthat::expect_equal(length(res$selected_feats),
length(unique(res$selected_feats)))
# No provided clusters
res <- cssSelect(X=x, y=y)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
testthat::expect_true(!is.null(names(res$selected_clusts)))
testthat::expect_true(is.character(names(res$selected_clusts)))
testthat::expect_true(length(res$selected_clusts) <=
length(res$selected_feats))
testthat::expect_true(length(res$selected_clusts) <= 11)
testthat::expect_true(length(res$selected_clusts) >= 1)
testthat::expect_true(is.list(res$selected_clusts))
testthat::expect_equal(length(names(res$selected_clusts)),
length(unique(names(res$selected_clusts))))
already_used_feats <- integer()
for(i in 1:length(res$selected_clusts)){
sels_i <- res$selected_clusts[[i]]
testthat::expect_true(length(sels_i) >= 1)
testthat::expect_true(is.integer(sels_i))
testthat::expect_true(all(sels_i %in% 1:11))
testthat::expect_equal(length(sels_i), length(unique(sels_i)))
testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
already_used_feats <- c(already_used_feats, sels_i)
}
testthat::expect_true(length(already_used_feats) <= 11)
testthat::expect_equal(length(already_used_feats),
length(unique(already_used_feats)))
testthat::expect_true(all(already_used_feats %in% 1:11))
testthat::expect_true(length(res$selected_feats) <= 11)
testthat::expect_true(is.integer(res$selected_feats))
testthat::expect_true(length(res$selected_feats) >= 1)
testthat::expect_equal(length(names(res$selected_feats)),
length(unique(names(res$selected_feats))))
testthat::expect_true(all(res$selected_feats >= 1))
testthat::expect_true(all(res$selected_feats <= 11))
testthat::expect_equal(length(res$selected_feats),
length(unique(res$selected_feats)))
## Trying other inputs
# X as a data.frame
X_df <- datasets::mtcars
res <- cssSelect(X=X_df, y=stats::rnorm(nrow(X_df)), clusters=1:3)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
testthat::expect_true(!is.null(names(res$selected_clusts)))
testthat::expect_true(is.character(names(res$selected_clusts)))
testthat::expect_true(length(res$selected_clusts) <=
length(res$selected_feats))
# Total of ncol(X_df) - (3 - 1) clusters
testthat::expect_true(length(res$selected_clusts) <= ncol(X_df) - 2)
testthat::expect_true(length(res$selected_clusts) >= 1)
testthat::expect_true(is.list(res$selected_clusts))
testthat::expect_equal(length(names(res$selected_clusts)),
length(unique(names(res$selected_clusts))))
already_used_feats <- integer()
for(i in 1:length(res$selected_clusts)){
sels_i <- res$selected_clusts[[i]]
testthat::expect_true(length(sels_i) >= 1)
testthat::expect_true(is.integer(sels_i))
testthat::expect_true(all(sels_i %in% 1:ncol(X_df)))
testthat::expect_equal(length(sels_i), length(unique(sels_i)))
testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
already_used_feats <- c(already_used_feats, sels_i)
}
testthat::expect_true(length(already_used_feats) <= ncol(X_df))
testthat::expect_equal(length(already_used_feats),
length(unique(already_used_feats)))
testthat::expect_true(all(already_used_feats %in% 1:ncol(X_df)))
testthat::expect_true(length(res$selected_feats) <= ncol(X_df))
testthat::expect_true(is.integer(res$selected_feats))
testthat::expect_true(length(res$selected_feats) >= 1)
testthat::expect_equal(length(names(res$selected_feats)),
length(unique(names(res$selected_feats))))
testthat::expect_true(all(res$selected_feats >= 1))
testthat::expect_true(all(res$selected_feats <= ncol(X_df)))
testthat::expect_equal(length(res$selected_feats),
length(unique(res$selected_feats)))
# X as a dataframe with factors (number of columns of final design matrix
# after one-hot encoding factors won't match number of columns of df2)
# cyl, gear, and carb are factors with more than 2 levels
df2 <- X_df
df2$cyl <- as.factor(df2$cyl)
df2$vs <- as.factor(df2$vs)
df2$am <- as.factor(df2$am)
df2$gear <- as.factor(df2$gear)
df2$carb <- as.factor(df2$carb)
# Should get error if I try to use data.frame with clusters, since data.frame
# has factors with more than two levels
testthat::expect_error(cssSelect(X=df2, y=stats::rnorm(nrow(X_df)),
clusters=1:3),
"When stats::model.matrix converted the provided data.frame X to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert the data.frame X to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.", fixed=TRUE)
# Should be fine if I don't use clusters
res <- cssSelect(X=df2, y=stats::rnorm(nrow(X_df)))
X_df_mat <- stats::model.matrix(~ ., df2)
X_df_mat <- X_df_mat[, colnames(X_df_mat) != "(Intercept)"]
p <- ncol(X_df_mat)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
testthat::expect_true(!is.null(names(res$selected_clusts)))
testthat::expect_true(is.character(names(res$selected_clusts)))
testthat::expect_true(length(res$selected_clusts) <=
length(res$selected_feats))
# Total of p - (3 - 1) clusters
testthat::expect_true(length(res$selected_clusts) <= p - 2)
testthat::expect_true(length(res$selected_clusts) >= 1)
testthat::expect_true(is.list(res$selected_clusts))
testthat::expect_equal(length(names(res$selected_clusts)),
length(unique(names(res$selected_clusts))))
already_used_feats <- integer()
for(i in 1:length(res$selected_clusts)){
sels_i <- res$selected_clusts[[i]]
testthat::expect_true(length(sels_i) >= 1)
testthat::expect_true(is.integer(sels_i))
testthat::expect_true(all(sels_i %in% 1:p))
testthat::expect_equal(length(sels_i), length(unique(sels_i)))
testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
already_used_feats <- c(already_used_feats, sels_i)
}
testthat::expect_true(length(already_used_feats) <= p)
testthat::expect_equal(length(already_used_feats),
length(unique(already_used_feats)))
testthat::expect_true(all(already_used_feats %in% 1:p))
testthat::expect_true(length(res$selected_feats) <= p)
testthat::expect_true(is.integer(res$selected_feats))
testthat::expect_true(length(res$selected_feats) >= 1)
testthat::expect_equal(length(names(res$selected_feats)),
length(unique(names(res$selected_feats))))
testthat::expect_true(all(res$selected_feats >= 1))
testthat::expect_true(all(res$selected_feats <= p))
testthat::expect_equal(length(res$selected_feats),
length(unique(res$selected_feats)))
# X as a matrix with column names
x2 <- x
colnames(x2) <- LETTERS[1:11]
res <- cssSelect(X=x2, y=y, clusters=good_clusters)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
testthat::expect_true(!is.null(names(res$selected_clusts)))
testthat::expect_true(is.character(names(res$selected_clusts)))
testthat::expect_true(length(res$selected_clusts) <=
length(res$selected_feats))
# Total of 11 - 2*(3 - 1) = 7 clusters
testthat::expect_true(length(res$selected_clusts) <= 7)
testthat::expect_true(length(res$selected_clusts) >= 1)
testthat::expect_true(is.list(res$selected_clusts))
testthat::expect_equal(length(names(res$selected_clusts)),
length(unique(names(res$selected_clusts))))
already_used_feats <- integer()
for(i in 1:length(res$selected_clusts)){
sels_i <- res$selected_clusts[[i]]
testthat::expect_true(length(sels_i) >= 1)
testthat::expect_true(is.integer(sels_i))
testthat::expect_true(all(sels_i %in% 1:11))
testthat::expect_equal(length(sels_i), length(unique(sels_i)))
testthat::expect_equal(length(intersect(already_used_feats, sels_i)), 0)
already_used_feats <- c(already_used_feats, sels_i)
}
testthat::expect_true(length(already_used_feats) <= 11)
testthat::expect_equal(length(already_used_feats),
length(unique(already_used_feats)))
testthat::expect_true(all(already_used_feats %in% 1:11))
testthat::expect_true(length(res$selected_feats) <= 11)
testthat::expect_true(is.integer(res$selected_feats))
testthat::expect_true(length(res$selected_feats) >= 1)
testthat::expect_equal(length(names(res$selected_feats)),
length(unique(names(res$selected_feats))))
testthat::expect_true(all(res$selected_feats >= 1))
testthat::expect_true(all(res$selected_feats <= 11))
testthat::expect_equal(length(res$selected_feats),
length(unique(res$selected_feats)))
# Vary inputs
res <- cssSelect(X=x, y=y, clusters=good_clusters, lambda=0.01)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
res <- cssSelect(X=x, y=y, clusters=good_clusters, cutoff=0.6)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
res <- cssSelect(X=x, y=y, clusters=good_clusters, max_num_clusts=6)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
testthat::expect_true(length(res$selected_clusts) <= 6)
res <- cssSelect(X=x, y=y, clusters=good_clusters, auto_select_size=FALSE)
testthat::expect_true(is.list(res))
testthat::expect_equal(length(res), 3)
testthat::expect_identical(names(res), c("selected_clusts", "selected_feats",
"weights"))
# Total of 11 - 2*(3 - 1) = 7 clusters
testthat::expect_equal(length(res$selected_clusts), 7)
# Bad inputs
testthat::expect_error(cssSelect(X=x[1:10, ], y=y),
"n == length(y) is not TRUE", fixed=TRUE)
testthat::expect_error(cssSelect(X=character(5), y=y),
"is.matrix(X) | is.data.frame(X) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=matrix(1:15, 5, 3)),
"!is.matrix(y) is not TRUE", fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=factor(rbinom(15, size=1, prob=.5))),
"The provided y must be real-valued, because cssSelect uses the lasso for feature selection. (In order to use a different form of response, use the css function and provide your own selection function accommodating your choice of y.)",
fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=y, clusters="clusters"),
"is.numeric(clusters) | is.integer(clusters) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=y, lambda=-.1),
"For method cssLasso, lambda must be nonnegative.",
fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=y, cutoff=1.1),
"cutoff <= 1 is not TRUE", fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=y, max_num_clusts=1000),
"max_num_clusts <= p is not TRUE", fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=y, auto_select_size=1),
"is.logical(auto_select_size) is not TRUE", fixed=TRUE)
# alpha = 0 (degenerate ridge) is rejected
testthat::expect_error(cssSelect(X=x, y=y, alpha=0),
"alpha > 0 is not TRUE", fixed=TRUE)
# alpha > 1 is rejected
testthat::expect_error(cssSelect(X=x, y=y, alpha=1.5),
"alpha <= 1 is not TRUE", fixed=TRUE)
# alpha = NA is rejected with a clear message (rather than a cryptic error
# deep inside the bundling/subsampling logic). A bare (logical) NA trips the
# type check first; a numeric NA reaches the dedicated !is.na guard.
testthat::expect_error(cssSelect(X=x, y=y, alpha=NA),
"is.numeric(alpha) | is.integer(alpha) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssSelect(X=x, y=y, alpha=NA_real_),
"!is.na(alpha) is not TRUE", fixed=TRUE)
# An NA in the design matrix yields the shared friendly message (#56).
x_na <- x; x_na[2, 1] <- NA
testthat::expect_error(cssSelect(X=x_na, y=y),
"must not contain missing", fixed=TRUE)
})## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters)
## 2. \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y)
## 2. \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = X_df, y = stats::rnorm(nrow(X_df)), clusters = 1:3)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Returning more than max_num_clusts = 1 clusters because increasing the cutoff any further would require returning 0 clusters
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = X_df, y = stats::rnorm(nrow(X_df)), clusters = 1:3)
## 2. \-litr (local) getCssSelections(...)
## 3. \-litr (local) getSelectedClusters(...)
## 4. \-litr (local) checkSelectedClusters(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = df2, y = stats::rnorm(nrow(X_df)))
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x2, y = y, clusters = good_clusters)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x2, y = y, clusters = good_clusters)
## 2. \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, lambda = 0.01)
## 2. \-litr (local) getModelSize(X, y, css_results$clusters, alpha = alpha)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, cutoff = 0.6)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, max_num_clusts = 6)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssSelect(X = x, y = y, clusters = good_clusters, auto_select_size = FALSE)
## 2. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssSelect(X = x, y = matrix(1:15, 5, 3))
## 8. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssSelect(X = x, y = y, clusters = "clusters")
## 8. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssSelect(X = x, y = y, cutoff = 1.1)
## 8. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssSelect works ----------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssSelect(X = x, y = y, max_num_clusts = 1000)
## 8. \-litr (local) getLassoLambda(X, y, alpha = alpha)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## Test passed with 219 successes.
Tests confirming that a non-default alpha is threaded through cssSelect()
end-to-end and changes the selected set on a correlated-block design (this is
the wrapper-level analogue of the cssLasso headline test; it is new behavior
absent on main, where alpha could not affect selection):
testthat::test_that("cssSelect threads alpha through to selection", {
set.seed(1)
n <- 100
p <- 20
rho <- 0.9
block <- 4
Z <- matrix(stats::rnorm(n*p), n, p)
common <- stats::rnorm(n)
for(j in 1:block){
Z[, j] <- sqrt(rho)*common + sqrt(1 - rho)*Z[, j]
}
y <- as.numeric(Z %*% c(rep(1, block), rep(0, p - block)) + stats::rnorm(n))
# Treat the correlated block as one cluster; the remaining features are
# singletons. Fix lambda and the model size so the only thing that varies
# between the two calls is alpha.
clusters <- list(block=1:block)
res_lasso <- cssSelect(X=Z, y=y, clusters=clusters, lambda=3.72251,
max_num_clusts=p - block + 1, alpha=1)
res_enet <- cssSelect(X=Z, y=y, clusters=clusters, lambda=3.72251,
max_num_clusts=p - block + 1, alpha=0.5)
# Both return a well-formed selection
for(res in list(res_lasso, res_enet)){
testthat::expect_true(is.list(res))
testthat::expect_true(all(c("selected_clusts", "selected_feats") %in%
names(res)))
testthat::expect_true(is.integer(res$selected_feats))
testthat::expect_true(all(res$selected_feats %in% 1:p))
}
# The elastic net selects (weakly) more features than the pure lasso on this
# correlated design, and the two selected sets differ.
testthat::expect_true(length(res_enet$selected_feats) >=
length(res_lasso$selected_feats))
testthat::expect_false(setequal(res_enet$selected_feats,
res_lasso$selected_feats))
})## Test passed with 10 successes.
Tests for cssPredict():
testthat::test_that("cssPredict works", {
set.seed(84231)
train_data <- genClusteredData(n=30, p=11, k_unclustered=2, cluster_size=3,
n_clusters=2, sig_clusters=1, sigma_eps_sq=1)
x <- train_data$X
y <- train_data$y
test_x <- genClusteredData(n=5, p=11, k_unclustered=2, cluster_size=3,
n_clusters=2, sig_clusters=1, sigma_eps_sq=1)$X
# Intentionally don't provide clusters for all features, mix up formatting,
# etc.
good_clusters <- list(red_cluster=1L:3L, 4:6)
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
clusters=good_clusters)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
# No provided clusters
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
# Provide training indices
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
train_inds=13:28)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
## Trying other inputs
# X as a data.frame
X_df <- datasets::mtcars
n <- nrow(X_df)
test_inds <- 1:round(n/3)
n_test <- length(test_inds)
selec_train_inds <- setdiff(1:n, test_inds)
n_selec_train <- length(selec_train_inds)
res <- cssPredict(X_train_selec=X_df[selec_train_inds, ],
y_train_selec=stats::rnorm(n_selec_train),
X_test=X_df[test_inds, ])
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), n_test)
# X as a dataframe with factors (number of columns of final design matrix
# after one-hot encoding factors won't match number of columns of df2)
# cyl, gear, and carb are factors with more than 2 levels
df2 <- X_df
df2$cyl <- as.factor(df2$cyl)
df2$vs <- as.factor(df2$vs)
df2$am <- as.factor(df2$am)
df2$gear <- as.factor(df2$gear)
df2$carb <- as.factor(df2$carb)
# Should get error if clusters are provided because df2 contains factors with
# more than two levels
testthat::expect_error(cssPredict(X_train_selec=df2[selec_train_inds, ],
y_train_selec=stats::rnorm(n_selec_train),
X_test=df2[test_inds, ], clusters=1:3),
"When stats::model.matrix converted the provided data.frame X_train_selec to a matrix, the number of columns changed (probably because the provided data.frame contained a factor variable with at least three levels). Please convert X_train_selec to a matrix yourself using model.matrix and provide cluster assignments according to the columns of the new matrix.",
fixed=TRUE)
# Should be fine if no clusters are provided
res <- cssPredict(X_train_selec=df2[selec_train_inds, ],
y_train_selec=stats::rnorm(n_selec_train),
X_test=df2[test_inds, ])
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), n_test)
# X as a matrix with column names
x2 <- x
colnames(x2) <- LETTERS[1:11]
test_x2 <- test_x
colnames(test_x2) <- LETTERS[1:11]
res <- cssPredict(X_train_selec=x2, y_train_selec=y, X_test=test_x2,
clusters=good_clusters)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
# Vary inputs
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
lambda=0.01)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x, cutoff=0.6)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
max_num_clusts=6)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
auto_select_size=FALSE)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
# Non-default alpha (elastic net) threaded through end-to-end; smoke test
# confirming it returns a well-formed prediction vector of the right length.
res <- cssPredict(X_train_selec=x, y_train_selec=y, X_test=test_x,
alpha=0.5)
testthat::expect_true(all(!is.na(res)))
testthat::expect_true(is.numeric(res))
testthat::expect_equal(length(res), 5)
# Bad inputs
testthat::expect_error(cssPredict(X_train_selec=x[1:10, ], y_train_selec=y,
X_test=test_x),
"length(y) == n is not TRUE", fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=character(30),
y_train_selec=y, X_test=test_x),
"is.matrix(X_train_selec) | is.data.frame(X_train_selec) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x,
y_train_selec=matrix(1:30, 10, 3),
X_test=test_x), "!is.matrix(y) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x,
y_train_selec=factor(rbinom(30, size=1,
prob=.5)),
X_test=test_x),
"The provided y_train_selec must be real-valued, because predictions will be generated by ordinary least squares regression.",
fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, clusters="clusters"),
"is.numeric(clusters) | is.integer(clusters) is not TRUE",
fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, lambda="lambda"),
"For method cssLasso, lambda must be a numeric.",
fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, cutoff=-.1),
"cutoff >= 0 is not TRUE", fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, max_num_clusts=0),
"max_num_clusts >= 1 is not TRUE", fixed=TRUE)
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, auto_select_size=c(TRUE,
FALSE)),
"length(auto_select_size) == 1 is not TRUE",
fixed=TRUE)
# alpha = 0 (degenerate ridge) is rejected
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, alpha=0),
"alpha > 0 is not TRUE", fixed=TRUE)
# alpha > 1 is rejected
testthat::expect_error(cssPredict(X_train_selec=x, y_train_selec=y,
X_test=test_x, alpha=1.5),
"alpha <= 1 is not TRUE", fixed=TRUE)
})## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(X_train_selec = x, y_train_selec = y, X_test = test_x)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(X_train_selec = x, y_train_selec = y, X_test = test_x)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getLassoLambda(...)
## 3. \-glmnet::cv.glmnet(...)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. \-litr (local) cssPredict(...)
## 2. \-litr (local) getModelSize(...)
## 3. \-glmnet::cv.glmnet(x = X_size, y = y, family = "gaussian", alpha = alpha)
## 4. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssPredict(X_train_selec = x[1:10, ], y_train_selec = y, X_test = test_x)
## 8. \-litr (local) getLassoLambda(...)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssPredict(...)
## 8. \-litr (local) getLassoLambda(...)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssPredict(...)
## 8. \-litr (local) getLassoLambda(...)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssPredict(...)
## 8. \-litr (local) getLassoLambda(...)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## -- Warning: cssPredict works ---------------------------------------------------
## Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
## Backtrace:
## x
## 1. +-testthat::expect_error(...)
## 2. | \-testthat:::expect_condition_matching_(...)
## 3. | \-testthat:::quasi_capture(...)
## 4. | +-testthat (local) .capture(...)
## 5. | | \-base::withCallingHandlers(...)
## 6. | \-rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo))
## 7. \-litr (local) cssPredict(...)
## 8. \-litr (local) getLassoLambda(...)
## 9. \-glmnet::cv.glmnet(...)
## 10. \-glmnet:::cv.glmnet.raw(...)
## Test passed with 45 successes.