forked from sagittaeri/htt
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathforest-ranking
More file actions
executable file
·63 lines (49 loc) · 2.03 KB
/
forest-ranking
File metadata and controls
executable file
·63 lines (49 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
import pylab as pl
from mva.categories import (
Category_Preselection, Category_VBF, Category_Boosted)
from mva.classify import make_dataset, prepare_dataset
from mva.analysis import Analysis
from mva.samples import Higgs
from mva.defaults import TRAIN_FAKES_REGION, TARGET_REGION
analysis = Analysis(
year=2012,
systematics=False,
fakes_region=TRAIN_FAKES_REGION)
analysis.normalize(Category_Preselection)
backgrounds_train = analysis.backgrounds
for category in (Category_VBF, Category_Boosted):
features = category.features
n_features = len(features)
signals_train = [
Higgs(year=2012,
mass=125,
modes=category.train_signal_modes,
trigger=True),
]
# create the dataset
sig, sig_w, bkg, bkg_w = make_dataset(signals_train, backgrounds_train,
fields=features,
category=category,
region=TARGET_REGION)
sample, labels, weights = prepare_dataset(sig, sig_w, bkg, bkg_w)
# Build a forest and compute the feature importances
forest = ExtraTreesClassifier(n_estimators=250, random_state=0)
forest.fit(sample, labels, sample_weight=weights)
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
# Print the feature ranking
print "Feature ranking:"
for f in range(n_features):
print "%d. (%f) %s" % (f + 1, importances[indices[f]], features[indices[f]])
# Plot the feature importances of the forest
pl.figure()
pl.title("Feature importances")
pl.bar(range(n_features), importances[indices],
color="r", yerr=std[indices], align="center")
pl.xticks(range(n_features), indices)
pl.xlim([-1, n_features])
pl.savefig('ranking_{0}.png'.format(category.name))