-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.py
More file actions
110 lines (99 loc) · 3.82 KB
/
eval.py
File metadata and controls
110 lines (99 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from __future__ import print_function
import multiprocessing
# from datetime import datetime
from utilities import loadJSON, saveJSON
PNUM = 62
def evaluateStats(samples):
correct_norm = 0.0
total_norm = 0.0
total_nsw = 0.0
for tweet in samples:
input, pred, goal = (tweet['input'], tweet['prediction'],
tweet['output'])
for i in range(len(input)):
if pred[i].lower() != input[i].lower() and \
goal[i].lower() == pred[i].lower() and goal[i].strip:
correct_norm += 1
if goal[i].lower() != input[i].lower() and goal[i].strip():
total_nsw += 1
if pred[i].lower() != input[i].lower() and pred[i].strip():
total_norm += 1
precision = correct_norm / total_norm
recall = correct_norm / total_nsw
print('T:{} N:{} C:{}'.format(total_nsw, total_norm, correct_norm))
if precision != 0 and recall != 0:
f_measure = (2 * precision * recall) / (precision + recall)
print("precision: {:.4f}".format(precision))
print("recall: {:.4f}".format(recall))
print("F1: {:.4f}".format(f_measure))
else:
print("precision: {:.4f}".format(precision))
print("recall: {:.4f}".format(recall))
def classifyErrors(sample):
current = multiprocessing.current_process()
try:
input, pred, goal = (sample['input'], sample['prediction'],
sample['output'])
sample_props = {
'index': sample['index'],
'in': input,
'goal': goal,
'pred': pred,
'flags': sample['flags'],
'errors': [],
'R2W': 0, 'W2R': 0, 'W2W_C': 0, 'W2W_NC': 0
}
for i in range(len(input)):
new_error = {
'token': input[i],
'norm': goal[i],
'out': pred[i],
'pos': str(i),
'flag': sample['flags'][i],
}
include = True
if goal[i].lower() == input[i].lower():
if pred[i].lower() != input[i].lower():
sample_props['R2W'] += 1
new_error['class'] = 'R2W'
else:
include = False
if goal[i].lower() != input[i].lower() and goal[i].strip:
if pred[i].lower() == input[i].lower():
sample_props['W2W_NC'] += 1
new_error['class'] = 'W2W_NC'
elif pred[i].lower() == goal[i].lower():
sample_props['W2R'] += 1
new_error['class'] = 'W2R'
else:
sample_props['W2W_C'] += 1
new_error['class'] = 'W2W_C'
if include:
sample_props['errors'].append(new_error)
# print "{} : {} : analyzed sample {}".format(
# datetime.now().ctime(),
# current.name,
# sample['index']
# )
return sample_props
except Exception as e:
import traceback
print("Error from Process {}".format(current.name))
print(traceback.format_exc())
return {"error": str(e)}
def reportErrors(samples, outfile):
pool = multiprocessing.Pool(processes=PNUM)
stats = {'R2W': 0, 'W2R': 0, 'W2W_C': 0, 'W2W_NC': 0}
all_errors = pool.map(classifyErrors, samples)
stats = {key: reduce(lambda x, y: x + y, [smpl[key] for smpl in
all_errors])
for key in stats}
print(stats)
saveJSON(all_errors, outfile)
def evaluate(samples=None, outfile=None):
if not samples:
samples = loadJSON('./data/test_out.json')
if not outfile:
outfile = './data/norm_errors.json'
evaluateStats(samples)
reportErrors(samples, outfile)