-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathbenchmark_internals.R
More file actions
176 lines (156 loc) · 7.05 KB
/
benchmark_internals.R
File metadata and controls
176 lines (156 loc) · 7.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
args <- commandArgs(trailingOnly = TRUE)
lib_path <- args[1]
result_csv <- args[2]
plot_pdf <- args[3]
n_iter <- as.integer(args[4])
version_label <- args[5]
task <- args[6]
# Ensure we use the custom library
.libPaths(c(lib_path, .libPaths()))
if (!require("ICEbox", quietly = TRUE)) {
stop("ICEbox not found in ", lib_path)
}
if (!require("randomForest", quietly = TRUE)) {
stop("randomForest not found")
}
if (!require("MASS", quietly = TRUE)) {
# MASS is needed for Boston data
warning("MASS package not found, Boston tasks might fail")
}
# --- Setup Data and Model ---
setup_wine <- function() {
data("WhiteWine", package = "ICEbox")
# Subset for speed but enough to be meaningful
set.seed(42)
df <- WhiteWine[sample(nrow(WhiteWine), 500), ]
rf <- randomForest(quality ~ ., data = df, ntree = 50)
list(data = df, model = rf, predictor = "alcohol", y = df$quality)
}
setup_boston <- function() {
# Boston is in MASS
data("Boston", package = "MASS")
set.seed(42)
# Boston is small (506 rows), use all or subset. Use all.
df <- Boston
rf <- randomForest(medv ~ ., data = df, ntree = 50)
list(data = df, model = rf, predictor = "rm", y = df$medv)
}
# --- Run Task ---
runtimes <- numeric(n_iter)
pdf(plot_pdf)
if (task == "wine_ice") {
setup <- setup_wine()
# Benchmark ICE
for (i in 1:n_iter) {
start <- Sys.time()
ice_obj <- ice(object = setup$model, X = setup$data, y = setup$y, predictor = setup$predictor, frac_to_build = 0.5, verbose = FALSE)
end <- Sys.time()
runtimes[i] <- as.numeric(difftime(end, start, units = "secs"))
if (i == 1) {
# Plot Uncentered
plot(ice_obj, main = paste(version_label, "- Wine ICE Uncentered"), centered = FALSE)
# Plot Centered
plot(ice_obj, main = paste(version_label, "- Wine ICE Centered"), centered = TRUE)
}
}
} else if (task == "wine_dice") {
setup <- setup_wine()
# Pre-compute ICE for DICE benchmark
ice_obj <- ice(object = setup$model, X = setup$data, y = setup$y, predictor = setup$predictor, frac_to_build = 0.5, verbose = FALSE)
# Benchmark DICE
for (i in 1:n_iter) {
start <- Sys.time()
if (grepl("New", version_label)) {
dice_obj <- dice(ice_obj, verbose = FALSE, use_supsmu = TRUE)
} else {
dice_obj <- dice(ice_obj)
}
end <- Sys.time()
runtimes[i] <- as.numeric(difftime(end, start, units = "secs"))
if (i == 1) {
# Plot Uncentered
plot(dice_obj, main = paste(version_label, "- Wine DICE Uncentered"), centered = FALSE)
# Plot Centered
plot(dice_obj, main = paste(version_label, "- Wine DICE Centered"), centered = TRUE)
}
}
} else if (task == "wine_cluster") {
setup <- setup_wine()
# Pre-compute ICE
ice_obj <- ice(object = setup$model, X = setup$data, y = setup$y, predictor = setup$predictor, frac_to_build = 0.5, verbose = FALSE)
for (i in 1:n_iter) {
start <- Sys.time()
# clusterICE plots by default
if (i == 1) {
# Capture the plots
# Page 1: Uncentered
if (grepl("New", version_label)) {
clusterICE(ice_obj, nClusters = 3, plot = TRUE, centered = FALSE, main = paste(version_label, "- Wine ClusterICE (Uncentered)"))
} else {
clusterICE(ice_obj, nClusters = 3, plot = TRUE, centered = FALSE)
title(paste(version_label, "- Wine ClusterICE (Uncentered)"))
}
# Page 2: Centered
if (grepl("New", version_label)) {
clusterICE(ice_obj, nClusters = 3, plot = TRUE, centered = TRUE, main = paste(version_label, "- Wine ClusterICE (Centered)"))
} else {
clusterICE(ice_obj, nClusters = 3, plot = TRUE, centered = TRUE)
title(paste(version_label, "- Wine ClusterICE (Centered)"))
}
} else {
# Don't plot for timing (approximate, since plotting is part of the function usually,
# but clusterICE has a plot argument. If we want to benchmark the calculation+plotting, we should keep plot=TRUE.
# However, plotting to a PDF device 20 times is slow.
# Let's benchmark just the clustering part if possible, or accept plotting overhead.
# The user asked for runtimes.
# clusterICE(..., plot=FALSE) does just the kmeans.
# But the fix was in the plotting code.
# We should probably benchmark the plotting too if we want to catch regressions there,
# but usually benchmarks exclude rendering time.
# Given the "size" fix is in the plotting code, we MUST run the plotting code to verify it doesn't crash/warn.
# But we can suppress output to a NULL device for iterations > 1 if we want speed,
# OR just run it with plot=FALSE for speed and rely on i=1 for verification.
# The 'size' warning happens during plot construction.
# Let's run plot=FALSE for the measured iterations to measure algorithm speed,
# and plot=TRUE for the first one for visual verification.
# Wait, if we only plot once, we only check for warnings once. That's fine.
clusterICE(ice_obj, nClusters = 3, plot = FALSE, centered = TRUE)
}
end <- Sys.time()
runtimes[i] <- as.numeric(difftime(end, start, units = "secs"))
}
} else if (task == "boston_ice") {
setup <- setup_boston()
for (i in 1:n_iter) {
start <- Sys.time()
ice_obj <- ice(object = setup$model, X = setup$data, y = setup$y, predictor = setup$predictor, frac_to_build = 0.5, verbose = FALSE)
end <- Sys.time()
runtimes[i] <- as.numeric(difftime(end, start, units = "secs"))
if (i == 1) {
plot(ice_obj, main = paste(version_label, "- Boston ICE Uncentered"), centered = FALSE)
plot(ice_obj, main = paste(version_label, "- Boston ICE Centered"), centered = TRUE)
}
}
} else if (task == "boston_dice") {
setup <- setup_boston()
ice_obj <- ice(object = setup$model, X = setup$data, y = setup$y, predictor = setup$predictor, frac_to_build = 0.5, verbose = FALSE)
for (i in 1:n_iter) {
start <- Sys.time()
if (grepl("New", version_label)) {
dice_obj <- dice(ice_obj, verbose = FALSE, use_supsmu = TRUE)
} else {
dice_obj <- dice(ice_obj)
}
end <- Sys.time()
runtimes[i] <- as.numeric(difftime(end, start, units = "secs"))
if (i == 1) {
plot(dice_obj, main = paste(version_label, "- Boston DICE Uncentered"), centered = FALSE)
plot(dice_obj, main = paste(version_label, "- Boston DICE Centered"), centered = TRUE)
}
}
} else {
stop("Unknown task: ", task)
}
invisible(dev.off())
# Save results
write.csv(data.frame(version = version_label, task = task, time = runtimes), file = result_csv, row.names = FALSE)