-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdd_prc.m
More file actions
executable file
·126 lines (109 loc) · 3.5 KB
/
dd_prc.m
File metadata and controls
executable file
·126 lines (109 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
%DD_PRC Precision Recall curve
%
% C = DD_PRC(A,W)
% C = DD_PRC(A*W)
% C = A*W*DD_PRC
%
% INPUT
% A One-class dataset
% W One-class classifier
%
% OUTPUT
% C The precision-recall curve
%
% DESCRIPTION
% Find for a (data description) method W the Precision Recall curve over
% dataset A. The results are returned in a structure C, containing two
% fields. C.err contains the classification errors, C.thr contains the
% trhesholds for the different operating points. The curve can be
% plotted using PLOTROC.
%
% SEE ALSO
% plotroc, dd_avprec, dd_auprc, dd_error, dd_eer.
% Copyright: D.M.J. Tax, D.M.J.Tax@prtools.org
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands
function [e, thr] = dd_prc(a,w)
% Use the same setup as testc
% When no input arguments are given, return an empty mapping
if nargin==0
e = prmapping(mfilename,'fixed');
e = setname(e,'Precision-Recall');
elseif nargin == 1
% Now we should have a mapped dataset, so the real work is done!
% store the fact that we are making a Precision Recall curve
e.type = 'prc';
% for evaluation, we need both target and outlier objects:
if ~isocset(a)
error('I need an OC dataset for computing the Precision Recall curve.');
end
[It,Io] = find_target(a);
if isempty(It)
error('Dataset A does not contain target objects');
end
if isempty(Io)
error('Dataset A does not contain outlier objects');
end
% get the labels of A:
truelab = zeros(size(a,1),1);
truelab(It) = 1;
% check if we have sane results:
if ~all(isfinite(+a))
warning('dd_tools:NonfiniteOutputs',...
'Some strange (non-finite) classifier outputs: can you check your classifier?');
% only keep the outputs which have finite values:
I = all(isfinite(+a),2);
a = a(I,:);
end
% check if the output is normalized (important for classifiers that
% output just a class-conditional density: for these situations the
% raw outputs cannot be compared between different objects:
if size(a,2)>1
% check if we are working with a one-class classifier with a fixed
% threshold
if var(a(:,'outlier'))>1e-6
if any(abs(sum(a,2)-1)>1e-6)
warning('dd_tools:NonNormalizedOutputs',...
'Some non-normalized outputs found: can you check your classifier?');
end
end
end
% store the operating poiont for later:
fl = getfeatlab(a);
% we don't have an operating point right now
%DXD: should we define it one time??
e.op = [];
% first find out where the output for the target objects are stored:
tcolumn = [];
if ~isempty(fl) % we can only find the target feature when feature
% labels are defined
tcolumn = strmatch('target',fl);
end
if isempty(tcolumn)
warning('dd_tools:NoTargetFeature',...
'dd_prc cannot find the target feature, using feature 1.');
tcolumn = 1;
end
% and now extract the required column 'resemblance to target set':
a = +a(:,tcolumn);
% now the real computation is done:
[err,thr] = simpleprc(a,truelab);
e.err = err;
% Find the errors and the thresholds between the points on the curve:
derr = diff(err)/2;
e.thrcoords = [err(1,:); err(1:(end-1),:)+derr; err(end,:)];
dthr = diff(thr)/2;
if ~isempty(dthr) % in some cases there is just 1 threshold value
% defined :-( (sigh)
e.thresholds = [thr(1); thr(1:(end-1))+dthr; thr(end)];
else
e.thresholds = [thr(1); thr(end)];
end
else
% Separate mapping and dataset are given, so we have to map the data
% first:
ismapping(w);
istrained(w);
e = feval(mfilename,a*w);
end
return