diff --git a/.github/workflows/dev-cmd-check.yml b/.github/workflows/dev-cmd-check.yml index a839b224..ac9760c3 100644 --- a/.github/workflows/dev-cmd-check.yml +++ b/.github/workflows/dev-cmd-check.yml @@ -35,6 +35,7 @@ jobs: matrix: config: - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/paradox'} + - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/mlr3@weights_reworked'} steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 78df5cc9..61e1e951 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,r # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,r +tests/testthat/Rplots.pdf + ### Linux ### *~ diff --git a/R/LearnerClustAffinityPropagation.R b/R/LearnerClustAffinityPropagation.R index f0afd1c2..fcc84be5 100644 --- a/R/LearnerClustAffinityPropagation.R +++ b/R/LearnerClustAffinityPropagation.R @@ -66,7 +66,7 @@ LearnerClustAP = R6Class("LearnerClustAP", }, .predict = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") sim_func = pv$s exemplar_data = attributes(self$model)$exemplar_data diff --git a/R/LearnerClustAgnes.R b/R/LearnerClustAgnes.R index 5cdebea3..f58b8a98 100644 --- a/R/LearnerClustAgnes.R +++ b/R/LearnerClustAgnes.R @@ -32,7 +32,7 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes", tags = "train" ), trace.lev = p_int(0L, default = 0L, tags = "train"), - k = p_int(1L, default = 2L, tags = "predict"), + k = p_int(1L, default = 2L, tags = c("train", "predict")), par.method = p_uty( tags = "train", depends = quote(method %in% c("flexible", "gaverage")), @@ -65,7 +65,7 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes", ), private = list( .train = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") m = invoke(cluster::agnes, x = task$data(), diss = FALSE, diff --git a/R/LearnerClustCMeans.R b/R/LearnerClustCMeans.R index 763e77f6..c24a2e9b 100644 --- a/R/LearnerClustCMeans.R +++ b/R/LearnerClustCMeans.R @@ -34,13 +34,7 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans", method = p_fct(levels = c("cmeans", "ufcl"), default = "cmeans", tags = "train"), m = p_dbl(1, default = 2, tags = "train"), rate.par = p_dbl(0, 1, tags = "train", depends = quote(method == "ufcl")), - weights = p_uty(default = 1L, tags = "train", custom_check = crate(function(x) { - if (test_numeric(x) && all(x > 0) || check_count(x, positive = TRUE)) { - TRUE - } else { - "`weights` must be positive numeric vector or a single positive number" - } - })), + use_weights = p_lgl(default = FALSE, tags = "train"), control = p_uty(tags = "train") ) @@ -51,7 +45,7 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans", feature_types = c("logical", "integer", "numeric"), predict_types = c("partition", "prob"), param_set = param_set, - properties = c("partitional", "fuzzy", "complete"), + properties = c("partitional", "fuzzy", "complete", "weights"), packages = "e1071", man = "mlr3cluster::mlr_learners_clust.cmeans", label = "Fuzzy C-Means Clustering Learner" @@ -63,6 +57,11 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans", pv = self$param_set$get_values(tags = "train") assert_centers_param(pv$centers, task, test_data_frame, "centers") + if (isTRUE(pv$use_weights)) { + pv$weights = task$weights_learner$weight + } + pv$use_weights = NULL + m = invoke(e1071::cmeans, x = task$data(), .args = pv, .opts = allow_partial_matching) if (self$save_assignments) { self$assignments = m$cluster diff --git a/R/LearnerClustDBSCAN.R b/R/LearnerClustDBSCAN.R index 7386485b..b4a11333 100644 --- a/R/LearnerClustDBSCAN.R +++ b/R/LearnerClustDBSCAN.R @@ -25,7 +25,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", eps = p_dbl(0, tags = c("required", "train")), minPts = p_int(0L, default = 5L, tags = "train"), borderPoints = p_lgl(default = TRUE, tags = "train"), - weights = p_uty(tags = "train", custom_check = check_numeric), + use_weights = p_lgl(default = FALSE, tags = "train"), search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"), bucketSize = p_int(1L, default = 10L, tags = "train", depends = quote(search == "kdtree")), splitRule = p_fct( @@ -42,7 +42,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", feature_types = c("logical", "integer", "numeric"), predict_types = "partition", param_set = param_set, - properties = c("density", "exclusive", "complete"), + properties = c("density", "exclusive", "complete", "weights"), packages = "dbscan", man = "mlr3cluster::mlr_learners_clust.dbscan", label = "Density-Based Clustering" @@ -52,11 +52,19 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(dbscan::dbscan, x = task$data(), .args = pv) - m = insert_named(m, list(data = task$data())) + data = task$data() + + if (isTRUE(pv$use_weights)) { + pv$weights = task$weights_learner$weight + } + pv$use_weights = NULL + + m = invoke(dbscan::dbscan, x = data, .args = pv) + m = insert_named(m, list(data = data)) if (self$save_assignments) { self$assignments = m$cluster } + m }, diff --git a/R/LearnerClustDBSCANfpc.R b/R/LearnerClustDBSCANfpc.R index 958acb5a..26ab2690 100644 --- a/R/LearnerClustDBSCANfpc.R +++ b/R/LearnerClustDBSCANfpc.R @@ -62,8 +62,9 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(fpc::dbscan, data = task$data(), .args = pv) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(fpc::dbscan, data = data, .args = pv) + m = insert_named(m, list(data = data)) if (self$save_assignments) { self$assignments = m$cluster } diff --git a/R/LearnerClustDiana.R b/R/LearnerClustDiana.R index 235b7105..547ea338 100644 --- a/R/LearnerClustDiana.R +++ b/R/LearnerClustDiana.R @@ -27,7 +27,7 @@ LearnerClustDiana = R6Class("LearnerClustDiana", metric = p_fct(default = "euclidean", levels = c("euclidean", "manhattan"), tags = "train"), stand = p_lgl(default = FALSE, tags = "train"), trace.lev = p_int(0L, default = 0L, tags = "train"), - k = p_int(1L, default = 2L, tags = "predict") + k = p_int(1L, default = 2L, tags = c("train", "predict")) ) param_set$set_values(k = 2L) @@ -46,7 +46,7 @@ LearnerClustDiana = R6Class("LearnerClustDiana", ), private = list( .train = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") m = invoke(cluster::diana, x = task$data(), diss = FALSE, diff --git a/R/LearnerClustHDBSCAN.R b/R/LearnerClustHDBSCAN.R index 37645301..c734d3da 100644 --- a/R/LearnerClustHDBSCAN.R +++ b/R/LearnerClustHDBSCAN.R @@ -43,8 +43,9 @@ LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(dbscan::hdbscan, x = task$data(), .args = pv) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(dbscan::hdbscan, x = data, .args = pv) + m = insert_named(m, list(data = data)) if (self$save_assignments) { self$assignments = m$cluster diff --git a/R/LearnerClustHclust.R b/R/LearnerClustHclust.R index 482b64ea..db97aa45 100644 --- a/R/LearnerClustHclust.R +++ b/R/LearnerClustHclust.R @@ -35,7 +35,7 @@ LearnerClustHclust = R6Class("LearnerClustHclust", diag = p_lgl(default = FALSE, tags = c("train", "dist")), upper = p_lgl(default = FALSE, tags = c("train", "dist")), p = p_dbl(default = 2, tags = c("train", "dist"), depends = quote(distmethod == "minkowski")), - k = p_int(1L, default = 2L, tags = "predict") + k = p_int(1L, default = 2L, tags = c("train", "predict")) ) param_set$set_values(k = 2L, distmethod = "euclidean") @@ -54,7 +54,7 @@ LearnerClustHclust = R6Class("LearnerClustHclust", ), private = list( .train = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") dist = invoke(stats::dist, x = task$data(), method = pv$d %??% "euclidean", diff --git a/R/LearnerClustKKMeans.R b/R/LearnerClustKKMeans.R index 12f894c2..7d5aaa02 100644 --- a/R/LearnerClustKKMeans.R +++ b/R/LearnerClustKKMeans.R @@ -78,12 +78,13 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans", c = kernlab::centers(self$model) K = kernlab::kernelf(self$model) + data = task$data() # kernel product between each new datapoint and the centers - d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(task$data()), c), ncol = nrow(c)) + d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(data), c), ncol = nrow(c)) # kernel product between each new datapoint and itself: rows are identical d_xx = matrix( - rep(diag(kernlab::kernelMatrix(K, as.matrix(task$data()))), each = ncol(d_xc)), + rep(diag(kernlab::kernelMatrix(K, as.matrix(data))), each = ncol(d_xc)), ncol = ncol(d_xc), byrow = TRUE ) # kernel product between each center and itself: columns are identical diff --git a/R/LearnerClustMiniBatchKMeans.R b/R/LearnerClustMiniBatchKMeans.R index d6a9178a..8a5f9c70 100644 --- a/R/LearnerClustMiniBatchKMeans.R +++ b/R/LearnerClustMiniBatchKMeans.R @@ -66,18 +66,20 @@ LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans", stopf("`CENTROIDS` must have same number of rows as `clusters`") } - m = invoke(ClusterR::MiniBatchKmeans, data = task$data(), .args = pv) + data = task$data() + m = invoke(ClusterR::MiniBatchKmeans, data = data, .args = pv) if (self$save_assignments) { - self$assignments = as.integer(invoke(predict, m, newdata = task$data())) + self$assignments = as.integer(invoke(predict, m, newdata = data)) } m }, .predict = function(task) { - partition = as.integer(invoke(predict, self$model, newdata = task$data())) + data = task$data() + partition = as.integer(invoke(predict, self$model, newdata = data)) prob = NULL if (self$predict_type == "prob") { - prob = invoke(predict, self$model, newdata = task$data(), fuzzy = TRUE) + prob = invoke(predict, self$model, newdata = data, fuzzy = TRUE) colnames(prob) = seq_len(ncol(prob)) } PredictionClust$new(task = task, partition = partition, prob = prob) diff --git a/R/LearnerClustOPTICS.R b/R/LearnerClustOPTICS.R index e07dda2d..ddcdb610 100644 --- a/R/LearnerClustOPTICS.R +++ b/R/LearnerClustOPTICS.R @@ -51,8 +51,9 @@ LearnerClustOPTICS = R6Class("LearnerClustOPTICS", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(dbscan::optics, x = task$data(), .args = remove_named(pv, "eps_cl")) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(dbscan::optics, x = data, .args = remove_named(pv, "eps_cl")) + m = insert_named(m, list(data = data)) m = invoke(dbscan::extractDBSCAN, object = m, eps_cl = pv$eps_cl) if (self$save_assignments) { diff --git a/R/zzz.R b/R/zzz.R index 42c598e9..ab0a0659 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -24,7 +24,7 @@ register_mlr3 = function() { x$task_col_roles$clust = x$task_col_roles$regr x$task_properties$clust = x$task_properties$regr x$learner_properties$clust = c( - "missings", "partitional", "hierarchical", "exclusive", "overlapping", "fuzzy", "complete", "partial", "density" + "weights", "missings", "partitional", "hierarchical", "exclusive", "overlapping", "fuzzy", "complete", "partial", "density" ) # measure diff --git a/man/PredictionClust.Rd b/man/PredictionClust.Rd index 0b7291cb..54523b96 100644 --- a/man/PredictionClust.Rd +++ b/man/PredictionClust.Rd @@ -39,11 +39,12 @@ Access to the stored probabilities.} } } \if{html}{\out{ -
Inherited methods +
Inherited methods diff --git a/man/mlr_learners_clust.cmeans.Rd b/man/mlr_learners_clust.cmeans.Rd index 36dffa8c..e69735fb 100644 --- a/man/mlr_learners_clust.cmeans.Rd +++ b/man/mlr_learners_clust.cmeans.Rd @@ -40,7 +40,7 @@ lrn("clust.cmeans") method \tab character \tab cmeans \tab cmeans, ufcl \tab - \cr m \tab numeric \tab 2 \tab \tab \eqn{[1, \infty)}{[1, Inf)} \cr rate.par \tab numeric \tab - \tab \tab \eqn{[0, 1]}{[0, 1]} \cr - weights \tab untyped \tab 1L \tab \tab - \cr + use_weights \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr control \tab untyped \tab - \tab \tab - \cr } } diff --git a/man/mlr_learners_clust.dbscan.Rd b/man/mlr_learners_clust.dbscan.Rd index 751382e4..342e7977 100644 --- a/man/mlr_learners_clust.dbscan.Rd +++ b/man/mlr_learners_clust.dbscan.Rd @@ -33,7 +33,7 @@ lrn("clust.dbscan") eps \tab numeric \tab - \tab \tab \eqn{[0, \infty)}{[0, Inf)} \cr minPts \tab integer \tab 5 \tab \tab \eqn{[0, \infty)}{[0, Inf)} \cr borderPoints \tab logical \tab TRUE \tab TRUE, FALSE \tab - \cr - weights \tab untyped \tab - \tab \tab - \cr + use_weights \tab logical \tab FALSE \tab TRUE, FALSE \tab - \cr search \tab character \tab kdtree \tab kdtree, linear, dist \tab - \cr bucketSize \tab integer \tab 10 \tab \tab \eqn{[1, \infty)}{[1, Inf)} \cr splitRule \tab character \tab SUGGEST \tab STD, MIDPT, FAIR, SL_MIDPT, SL_FAIR, SUGGEST \tab - \cr diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf deleted file mode 100644 index 5caa8db3..00000000 Binary files a/tests/testthat/Rplots.pdf and /dev/null differ