-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathcounterrors.pl
More file actions
executable file
·59 lines (52 loc) · 1.5 KB
/
counterrors.pl
File metadata and controls
executable file
·59 lines (52 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/perl
# This is a script to count errors (insertions, deletions, substitutions)
# according to edit distance
use strict;
use warnings;
use utf8;
use List::Util qw(max min);
use FindBin;
use lib $FindBin::Bin;
use Levenshtein;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
if(@ARGV != 2) {
print STDERR "Usage: counterrors.pl REFERENCE SYSTEM\n";
exit;
}
# find which of the inputs are in error
open REF, "<:utf8", $ARGV[0] or die $!;
open TEST, "<:utf8", $ARGV[1] or die $!;
my ($ref, $test, @refs, @tests, %errs, @hists);
while(defined($ref = <REF>) and defined($test = <TEST>)) {
chomp $ref; chomp $test;
my ($hist, $score) = Levenshtein::distance($ref, $test);
@refs = split(/ +/, $ref);
@tests = split(/ +/, $test);
@hists = split(//, $hist);
my (@rerr, @terr);
while(@hists) {
my $h = shift(@hists);
if($h eq 'e') {
if(@rerr+@terr) {
my $err = (@rerr?join(' ',@rerr):"NULL")."\t".(@terr?join(' ',@terr):"NULL");
$errs{$err}++;
@rerr = ();
@terr = ();
}
shift @refs; shift @tests;
} else {
push @rerr, shift(@refs) if $h ne 'i';
push @terr, shift(@tests) if $h ne 'd';
}
}
if(@rerr+@terr) {
my $err = (@rerr?join(' ',@rerr):"NULL")."\t".(@terr?join(' ',@terr):"NULL");
$errs{$err}++;
}
}
close REF;
close TEST;
for(sort { $errs{$b} <=> $errs{$a} } keys %errs) {
print "$_\t$errs{$_}\n";
}