Generic function to calibrate binary classification models.
Details
The goal of calibration is to adjust the predicted probabilities of a binary classification model so that they better reflect the true probabilities (i.e. empirical risk) of the positive class.
Method-specific parameters
For Classification objects:
predicted_probabilities: Numeric vector of predicted probabilitiestrue_labels: Factor of true class labels
For ClassificationRes objects:
resampler_config:ResamplerConfigobject for calibration trainingtrain_verbosity: Integer controlling calibration model training output
Examples
# --- Calibrate Classification ---
dat <- iris[51:150, ]
res <- resample(dat)
#> 2026-02-22 18:59:13
#> Input contains more than one column; stratifying on last.
#> [resample]
#> 2026-02-22 18:59:13
#> Using max n bins possible = 2.
#> [kfold]
dat$Species <- factor(dat$Species)
dat_train <- dat[res[[1]], ]
dat_test <- dat[-res[[1]], ]
# Train GLM on a training/test split
mod_c_glm <- train(
x = dat_train,
dat_test = dat_test,
algorithm = "glm"
)
#> 2026-02-22 18:59:13
#> ▶
#> [train]
#> 2026-02-22 18:59:13
#> Training set: 90 cases x 4 features.
#> [summarize_supervised]
#> 2026-02-22 18:59:13
#> Test set: 10 cases x 4 features.
#> [summarize_supervised]
#> 2026-02-22 18:59:13
#> // Max workers: 7 => Algorithm: 1; Tuning: 1; Outer Resampling: 1
#> [get_n_workers]
#> 2026-02-22 18:59:13
#> Training GLM Classification...
#> [train]
#> 2026-02-22 18:59:13
#> Checking data is ready for training...
#>
#> ✓
#> [check_supervised]
#>
#>
#> <Classification>
#> GLM (Generalized Linear Model)
#>
#> <Training Classification Metrics>
#> Predicted
#> Reference virginica versicolor
#> virginica 44 1
#> versicolor 1 44
#>
#> Overall
#> Sensitivity 0.978
#> Specificity 0.978
#> Balanced_Accuracy 0.978
#> PPV 0.978
#> NPV 0.978
#> F1 0.978
#> Accuracy 0.978
#> AUC 0.998
#> Brier_Score 0.018
#>
#> Positive Class virginica
#>
#> <Test Classification Metrics>
#> Predicted
#> Reference virginica versicolor
#> virginica 4 1
#> versicolor 0 5
#>
#> Overall
#> Sensitivity 0.800
#> Specificity 1.000
#> Balanced_Accuracy 0.900
#> PPV 1.000
#> NPV 0.833
#> F1 0.889
#> Accuracy 0.900
#> AUC 1.000
#> Brier_Score 0.039
#>
#> Positive Class virginica
#>
#> 2026-02-22 18:59:13
#> ✓ Done in 0.20 seconds.
#> [train]
# Calibrate the `Classification` by defining `predicted_probabilities` and `true_labels`,
# in this case using the training data, but it could be a separate calibration dataset.
mod_c_glm_cal <- calibrate(
mod_c_glm,
predicted_probabilities = mod_c_glm$predicted_prob_training,
true_labels = mod_c_glm$y_training
)
#> 2026-02-22 18:59:13
#> <> Calibrating GLM classification...
#> [calibrate]
#> 2026-02-22 18:59:13
#> ▶
#> [train]
#> 2026-02-22 18:59:13
#> Training set: 90 cases x 1 features.
#> [summarize_supervised]
#> 2026-02-22 18:59:13
#> Test set: 10 cases x 1 features.
#> [summarize_supervised]
#> 2026-02-22 18:59:13
#> // Max workers: 7 => Algorithm: 1; Tuning: 1; Outer Resampling: 1
#> [get_n_workers]
#> 2026-02-22 18:59:13
#> Training Isotonic Classification...
#> [train]
#> 2026-02-22 18:59:13
#> Checking data is ready for training...
#>
#> ✓
#> [check_supervised]
#>
#>
#> <Classification>
#> Isotonic (Isotonic Regression)
#>
#> <Training Classification Metrics>
#> Predicted
#> Reference virginica versicolor
#> virginica 44 1
#> versicolor 1 44
#>
#> Overall
#> Sensitivity 0.978
#> Specificity 0.978
#> Balanced_Accuracy 0.978
#> PPV 0.978
#> NPV 0.978
#> F1 0.978
#> Accuracy 0.978
#> AUC 0.999
#> Brier_Score 0.015
#>
#> Positive Class virginica
#>
#> <Test Classification Metrics>
#> Predicted
#> Reference virginica versicolor
#> virginica 5 0
#> versicolor 0 5
#>
#> Overall
#> Sensitivity 1.000
#> Specificity 1.000
#> Balanced_Accuracy 1.000
#> PPV 1.000
#> NPV 1.000
#> F1 1.000
#> Accuracy 1.000
#> AUC 1.000
#> Brier_Score 0.022
#>
#> Positive Class virginica
#>
#> 2026-02-22 18:59:13
#> ✓ Done in 0.04 seconds.
#> [train]
#>
#> <Classification>
#> GLM (Generalized Linear Model)
#> ⟋ Calibrated using Isotonic Regression.
#>
#> <Training Classification Metrics (Pre => Post Calibration)>
#> Predicted
#> Reference virginica versicolor
#> virginica 44 => 44 1 => 1
#> versicolor 1 => 1 44 => 44
#>
#> Overall
#> Sensitivity 0.98 => 0.98
#> Specificity 0.98 => 0.98
#> Balanced_Accuracy 0.98 => 0.98
#> PPV 0.98 => 0.98
#> NPV 0.98 => 0.98
#> F1 0.98 => 0.98
#> Accuracy 0.98 => 0.98
#> AUC 1.00 => 1.00
#> Brier_Score 0.02 => 0.01
#>
#> Positive Class virginica
#>
#> <Test Classification Metrics (Pre => Post Calibration)>
#> Predicted
#> Reference virginica versicolor
#> virginica 4 => 5 1 => 0
#> versicolor 0 => 0 5 => 5
#>
#> Overall
#> Sensitivity 0.80 => 1.00
#> Specificity 1.00 => 1.00
#> Balanced_Accuracy 0.90 => 1.00
#> PPV 1.00 => 1.00
#> NPV 0.83 => 1.00
#> F1 0.89 => 1.00
#> Accuracy 0.90 => 1.00
#> AUC 1.00 => 1.00
#> Brier_Score 0.04 => 0.02
#>
#> Positive Class virginica
#>
#> 2026-02-22 18:59:13
#> </> Calibration done.
#> [calibrate]
mod_c_glm_cal
#> <Classification>
#> GLM (Generalized Linear Model)
#> ⟋ Calibrated using Isotonic Regression.
#>
#> <Training Classification Metrics (Pre => Post Calibration)>
#> Predicted
#> Reference virginica versicolor
#> virginica 44 => 44 1 => 1
#> versicolor 1 => 1 44 => 44
#>
#> Overall
#> Sensitivity 0.98 => 0.98
#> Specificity 0.98 => 0.98
#> Balanced_Accuracy 0.98 => 0.98
#> PPV 0.98 => 0.98
#> NPV 0.98 => 0.98
#> F1 0.98 => 0.98
#> Accuracy 0.98 => 0.98
#> AUC 1.00 => 1.00
#> Brier_Score 0.02 => 0.01
#>
#> Positive Class virginica
#>
#> <Test Classification Metrics (Pre => Post Calibration)>
#> Predicted
#> Reference virginica versicolor
#> virginica 4 => 5 1 => 0
#> versicolor 0 => 0 5 => 5
#>
#> Overall
#> Sensitivity 0.80 => 1.00
#> Specificity 1.00 => 1.00
#> Balanced_Accuracy 0.90 => 1.00
#> PPV 1.00 => 1.00
#> NPV 0.83 => 1.00
#> F1 0.89 => 1.00
#> Accuracy 0.90 => 1.00
#> AUC 1.00 => 1.00
#> Brier_Score 0.04 => 0.02
#>
#> Positive Class virginica
# --- Calibrate ClassificationRes ---
# Train GLM with cross-validation
resmod_c_glm <- train(
x = dat,
algorithm = "glm",
outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)
#> 2026-02-22 18:59:13
#> ▶
#> [train]
#> 2026-02-22 18:59:13
#> Training set: 100 cases x 4 features.
#> [summarize_supervised]
#> 2026-02-22 18:59:13
#> // Max workers: 7 => Algorithm: 1; Tuning: 1; Outer Resampling: 7
#> [get_n_workers]
#> 2026-02-22 18:59:13
#> <> Training GLM Classification using 3 independent folds...
#> [train]
#> 2026-02-22 18:59:13
#> Input contains more than one column; stratifying on last.
#> [resample]
#> 2026-02-22 18:59:13
#> Using max n bins possible = 2.
#> [kfold]
#> Warning: glm.fit: algorithm did not converge
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#> Warning: glm.fit: algorithm did not converge
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#> 2026-02-22 18:59:13
#> </> Outer resampling done.
#> [train]
#>
#> <Resampled Classification Model>
#> GLM (Generalized Linear Model)
#> ⟳ Tested using 3 independent folds.
#>
#> <Resampled Classification Training Metrics>
#> Showing mean (sd) across resamples.
#> Sensitivity: 0.990 (0.017)
#> Specificity: 0.990 (0.017)
#> Balanced_Accuracy: 0.990 (0.017)
#> PPV: 0.990 (0.017)
#> NPV: 0.990 (0.017)
#> F1: 0.990 (0.017)
#> Accuracy: 0.990 (0.017)
#> AUC: 0.998 (2.7e-03)
#> Brier_Score: 0.008 (0.014)
#>
#> <Resampled Classification Test Metrics>
#> Showing mean (sd) across resamples.
#> Sensitivity: 0.918 (0.096)
#> Specificity: 0.922 (0.090)
#> Balanced_Accuracy: 0.920 (0.046)
#> PPV: 0.929 (0.080)
#> NPV: 0.925 (0.079)
#> F1: 0.919 (0.046)
#> Accuracy: 0.920 (0.046)
#> AUC: 0.934 (0.057)
#> Brier_Score: 0.076 (0.053)
#>
#> 2026-02-22 18:59:13
#> ✓ Done in 0.09 seconds.
#> [train]
# Calibrate the `ClassificationRes` using the same resampling configuration as used for training.
resmod_c_glm_cal <- calibrate(resmod_c_glm)
#> 2026-02-22 18:59:13
#> <> Calibrating GLM resampled classification...
#> [calibrate]
#>
#> <Resampled Classification Model>
#> GLM (Generalized Linear Model)
#> ⟳ Tested using 3 independent folds.
#> ⟋ Calibrated using Isotonic Regression with 5 independent folds.
#>
#> <Resampled Classification Training Metrics (Pre => Post Calibration)>
#> Showing mean (sd) across resamples, Pre => Post calibration.
#> Sensitivity: 0.99 (0.02) => 0.94 (0.09)
#> Specificity: 0.99 (0.02) => 0.94 (0.09)
#> Balanced_Accuracy: 0.99 (0.02) => 0.94 (0.05)
#> PPV: 0.99 (0.02) => 0.95 (0.07)
#> NPV: 0.99 (0.02) => 0.95 (0.08)
#> F1: 0.99 (0.02) => 0.94 (0.05)
#> Accuracy: 0.99 (0.02) => 0.94 (0.05)
#> AUC: 1.00 (2.7e-03) => 0.94 (0.05)
#> Brier_Score: 0.01 (0.01) => 0.05 (0.04)
#>
#> <Resampled Classification Test Metrics (Pre => Post Calibration)>
#> Showing mean (sd) across resamples, Pre => Post calibration.
#> Sensitivity: 0.92 (0.10) => 0.94 (0.13)
#> Specificity: 0.92 (0.09) => 0.92 (0.14)
#> Balanced_Accuracy: 0.92 (0.05) => 0.93 (0.08)
#> PPV: 0.93 (0.08) => 0.94 (0.11)
#> NPV: 0.93 (0.08) => 0.95 (0.10)
#> F1: 0.92 (0.05) => 0.93 (0.08)
#> Accuracy: 0.92 (0.05) => 0.93 (0.08)
#> AUC: 0.93 (0.06) => 0.93 (0.08)
#> Brier_Score: 0.08 (0.05) => 0.07 (0.06)
#>
#> 2026-02-22 18:59:14
#> </> Calibration done.
#> [calibrate]
resmod_c_glm_cal
#> <Resampled Classification Model>
#> GLM (Generalized Linear Model)
#> ⟳ Tested using 3 independent folds.
#> ⟋ Calibrated using Isotonic Regression with 5 independent folds.
#>
#> <Resampled Classification Training Metrics (Pre => Post Calibration)>
#> Showing mean (sd) across resamples, Pre => Post calibration.
#> Sensitivity: 0.99 (0.02) => 0.94 (0.09)
#> Specificity: 0.99 (0.02) => 0.94 (0.09)
#> Balanced_Accuracy: 0.99 (0.02) => 0.94 (0.05)
#> PPV: 0.99 (0.02) => 0.95 (0.07)
#> NPV: 0.99 (0.02) => 0.95 (0.08)
#> F1: 0.99 (0.02) => 0.94 (0.05)
#> Accuracy: 0.99 (0.02) => 0.94 (0.05)
#> AUC: 1.00 (2.7e-03) => 0.94 (0.05)
#> Brier_Score: 0.01 (0.01) => 0.05 (0.04)
#>
#> <Resampled Classification Test Metrics (Pre => Post Calibration)>
#> Showing mean (sd) across resamples, Pre => Post calibration.
#> Sensitivity: 0.92 (0.10) => 0.94 (0.13)
#> Specificity: 0.92 (0.09) => 0.92 (0.14)
#> Balanced_Accuracy: 0.92 (0.05) => 0.93 (0.08)
#> PPV: 0.93 (0.08) => 0.94 (0.11)
#> NPV: 0.93 (0.08) => 0.95 (0.10)
#> F1: 0.92 (0.05) => 0.93 (0.08)
#> Accuracy: 0.92 (0.05) => 0.93 (0.08)
#> AUC: 0.93 (0.06) => 0.93 (0.08)
#> Brier_Score: 0.08 (0.05) => 0.07 (0.06)