-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmx2split
More file actions
executable file
·113 lines (107 loc) · 2.78 KB
/
mx2split
File metadata and controls
executable file
·113 lines (107 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env perl
# split up an MX2 file into single-recipe files, to make it easier
# to clean it up. Recombine with mx2cat.
use strict;
use Digest::MD5 qw(md5_base64);
use Getopt::Long qw(:config no_ignore_case bundling);
my $prefix = "recipe-";
GetOptions(
"prefix|p=s" => \$prefix,
) or die "Usage: $0 [--prefix path] [mx2file]\n";
push (@ARGV,'-') unless @ARGV;
my @recipe;
my %md5;
foreach my $file (@ARGV) {
my $In;
if ($file eq '-') {
open($In,"<-:bytes");
}else{
open($In,"<:bytes",$file) or die "$0: $file: $!\n";
}
my $recipe;
my $name;
my $insumm;
my $inrecipe;
my $file = slurp($In);
close($In);
$file =~ s/></>\n</g;
$file =~ tr/\r//d;
foreach (split(/\n/, $file)) {
$_ .= "\r\n";
if (/^<RcpE/) {
# have to do it this way to deal with unescaped '"' in name
my $tmp = $_;
$tmp =~ s/ img="[^"]*"//;
$tmp =~ s/ author="([^"]*)"//;
($name) = $tmp =~ /name="(.*)"/;
$inrecipe = 1;
# strip recipe image reference
s/ img="[^"]*"//;
$recipe = $_;
}elsif (/^<\/RcpE/) {
$recipe .= "</RcpE>\r\n";
# clean up leading blank lines inside tags
$recipe =~ s/>[\r\n]+/>\r\n/g;
# clean up trailing blank lines inside tags
$recipe =~ s/[\r\n]+\r\n</\r\n</g;
#undo line-wrapping of attribute-only tags
$recipe =~ s|(\r\n<[^/][^>\r\n]+>)\r\n</|$1</|g;
my $md5 = md5_base64($recipe);
next if $md5{$md5}++;
push(@recipe,{name => $name, recipe => $recipe});
$name = undef;
$inrecipe = 0;
}elsif (/^<Summ/) {
$insumm = 1;
}elsif (/<\/Summ>/) {
$insumm = 0;
}elsif ($insumm) {
next;
}elsif (/<\/mx2>/) {
next;
}elsif (defined $name) {
# strip empty ingredient unit ("1 egg", "2 onions", etc)
s/\s*unit="\s*"// if /IngR/;
# convert <open>text\n?<close>\n to <open>\ntext\n<close>\n
$_ = "$1\r\n$2\r\n" if m|^(<[^>/]+>)([^<]+)(</.*>)?\s*$|;
$_ .= "$3\r\n" if defined $3;
# fix self-closing tags, strip completely empty ones
if (m|^<([^/]\S{3,})(.*)/>.*$|) {
if ($2 =~ /^\s*$/) {
next;
} else {
$_ = "<$1$2>\r\n</$1>\r\n";
}
}
$recipe .= $_;
}
}
close(In);
}
if (@recipe) {
my $count = 0;
my $Index = sprintf("%sindex.txt",$prefix);
open(Index, ">$Index") or die "$0: $Index: $!\n";
foreach my $ref (sort { $a->{name} cmp $b->{name} } @recipe) {
printf Index ("%05d\t%s\n", $count, $ref->{name});
my $Out = sprintf("%s%05d.mx2",$prefix,$count++);
open(Out, ">$Out") or die "$0: $Out: $!\n";
print Out <<EOF;
<?xml version="1.0" standalone="yes" encoding="ISO-8859-1"?>\r
<!DOCTYPE mx2 SYSTEM "mx2.dtd">\r
<mx2 source="mx2split" date="">\r
<Summ>\r
EOF
print Out "<Nam>\r\n",$ref->{name},"\r\n</Nam>\r\n";
print Out "</Summ>\r\n";
print Out $ref->{recipe};
print Out "</mx2>\r\n";
close(Out);
}
close(Index);
}
exit 0;
sub slurp {
my ($fh) = @_;
do {local $/; <$fh>}
}