#!/usr/bin/perl -w
# Define module to use
use HTML::Parser();
local %tagstatus;
local %example;
local $prevtag='', $titletag;
local $settitle=0;
$example{'print'} = 0;
$example{'in_sect'} = 0;
$example{'first'} = 1;
$example{'new'} = 0;
# Create instance
$p = HTML::Parser->new(
start_h => [\&start_rtn, 'tagname, text, attr'],
text_h => [\&text_rtn, 'text'],
end_h => [\&end_rtn, 'tagname']);
# Start parsing the following HTML string
$p->parse_file('example-preseed-etch-new.xml');
# Execute when start tag is encountered
sub start_rtn {
my ($tagname, $text, $attr) = @_;
#print "\nStart: $tagname\n";
#print "Condition: $attr->{condition}\n" if exists $attr->{condition};
#print "Architecture: $attr->{arch}\n" if exists $attr->{arch};
if ( $tagname =~ /appendix|sect1|sect2|sect3|para/ ) {
$tagstatus{$tagname}{'count'} += 1;
#print "$tagname $tagstatus{$tagname}{'count'}\n";
}
# Assumes that
is the first tag after a section tag
if ( $prevtag =~ /sect1|sect2|sect3/ ) {
$settitle = ( $tagname eq 'title' );
$titletag = $prevtag;
$example{'in_sect'} = 0;
}
$prevtag = $tagname;
if ( $tagname eq 'informalexample' ) {
$example{'print'} = 1;
$example{'new'} = 1;
}
}
# Execute when text is encountered
sub text_rtn {
my ($text) = @_;
if ( $settitle ) {
$tagstatus{$titletag}{'title'} = $text;
$settitle = 0;
}
if ( $example{'print'} ) {
# Print section headers
for ($s=1; $s<=3; $s++) {
my $sect="sect$s";
if ( $tagstatus{$sect}{'title'} ) {
print "\n" if ( $s == 1 && ! $example{'first'} );
for ( $i = 1; $i <= 5 - $s; $i++ ) { print "#"; };
print " $tagstatus{$sect}{'title'}\n";
delete $tagstatus{$sect}{'title'};
}
}
# Clean leading whitespace
if ( $example{'new'} ) {
$text =~ s/^[[:space:]]*//;
}
print "$text";
$example{'first'} = 0;
$example{'new'} = 0;
$example{'in_sect'} = 1;
}
}
# Execute when the end tag is encountered
sub end_rtn {
my ($tagname) = @_;
#print "\nEnd: $tagname\n";
if ( $tagname eq 'informalexample' ) {
$example{'print'} = 0;
}
if ( $tagname =~ /appendix|sect1|sect2|sect3|para/ ) {
delete $tagstatus{$tagname}{'title'} if exists $tagstatus{$tagname}{'title'};
if ( $example{'in_sect'} ) {
print "\n";
$example{'in_sect'} = 0;
}
$tagstatus{$tagname}{'count'} -= 1;
#print "$tagname $tagstatus{$tagname}{'count'}\n";
}
}