-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
76 lines (69 loc) · 2.08 KB
/
models.py
File metadata and controls
76 lines (69 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
def train_naive_bayes(X, y):
model = GaussianNB()
model.fit(X, y)
return model
def train_decision_tree(X, y, max_depth=None):
model = DecisionTreeClassifier(
max_depth=max_depth, random_state=42
)
model.fit(X, y)
return model
def train_mlp(X, y,
hidden_layers=(128,),
max_iter=300,
alpha=1e-4,
learning_rate_init=1e-3):
model = MLPClassifier(
hidden_layer_sizes=hidden_layers,
activation='relu',
solver='adam',
alpha=alpha,
batch_size='auto',
learning_rate='adaptive',
learning_rate_init=learning_rate_init,
early_stopping=True, # stop if no improvement on a 10% val-split
validation_fraction=0.1,
n_iter_no_change=10,
max_iter=max_iter,
random_state=42
)
def tune_nb(X, y):
grid = GridSearchCV(
GaussianNB(),
{"var_smoothing": [1e-9, 1e-8, 1e-7, 1e-6]},
cv=3, scoring="accuracy", n_jobs=-1
)
grid.fit(X, y)
return grid.best_estimator_
def tune_dt(X, y):
params = {
"max_depth": [5, 10, 15, None],
"min_samples_leaf": [1, 5, 10]
}
grid = GridSearchCV(
DecisionTreeClassifier(random_state=42),
params, cv=3, scoring="accuracy", n_jobs=-1
)
grid.fit(X, y)
return grid.best_estimator_
def tune_mlp(X, y):
base = MLPClassifier(
solver="adam", early_stopping=True,
validation_fraction=0.1, n_iter_no_change=10,
max_iter=300, random_state=42
)
grid = GridSearchCV(
base,
{
"hidden_layer_sizes": [(128,), (256,128)],
"alpha": [1e-4, 1e-3],
"learning_rate_init": [1e-3, 5e-4]
},
cv=3, scoring="accuracy", n_jobs=-1
)
grid.fit(X, y)
return grid.best_estimator_