#!/usr/bin/perl -w # Script parses the XML file for the appendix on preseeding and extracts # example snippts to form the raw preseed example file. Section titles are # added as headers. # The script will include all text between tags that have # the attribute 'role="example"' set, except if a 'condition' attribute is # in force that does not match the specified release or if an 'arch' attribute # is in force that does not match the specified architecture. # Define module to use use HTML::Parser(); use Getopt::Std; local %tagstatus; local %example; local %ignore; local $prevtag = ''; local $titletag; local $settitle = 0; $example{'print'} = 0; $example{'in_sect'} = 0; $example{'first'} = 1; $example{'new'} = 0; getopts('hda:r:') || die "Unknown command line arguments! Try $0 -h\n"; use vars qw($opt_h $opt_d $opt_a $opt_r); if ($opt_h) { print < Options: -h display this help information -d debug mode -a architecture for which to generate the example (default: i386) -r release for which to generate the example (required) END exit 0; } die "Must specify release for which to generate example.\n" if ! $opt_r; my $xmlfile = shift; die "Must specify XML file to parse!\n" if ! $xmlfile; die "Specified XML file \"$xmlfile\" not found.\n" if ! -f $xmlfile; my $arch = $opt_a ? "$opt_a" : "i386"; my $arch_os = $arch =~ "-" ? ($arch =~ s/-.*//r) : "linux"; my $arch_cpu = $arch =~ "-" ? ($arch =~ s/.*-//r) : $arch; my $release = $opt_r; # Create instance $p = HTML::Parser->new( start_h => [\&start_rtn, 'tagname, text, attr'], text_h => [\&text_rtn, 'text'], end_h => [\&end_rtn, 'tagname']); # Start parsing the specified file $p->parse_file($xmlfile); # Replace entities in examples # FIXME: should maybe be extracted from entity definition sub replace_entities { my $text = shift; $text =~ s/&archive-mirror;/http.us.debian.org/g; $text =~ s/&releasename;/$release/g; $text =~ s/&kernelpackage;/linux-image/g; $text =~ s/&kernelversion;/3.2/g; $text =~ s/>/>/g; $text =~ s/</{condition} ) { print STDERR "Condition: $attr->{condition}\n" if $opt_d; if ( $attr->{condition} ne $release ) { $ignore{'tag'} = $tagname; $ignore{'depth'} = $tagstatus{$tagname}{'count'}; print STDERR "Start ignore because of condition" if $opt_d; } } if ( exists $attr->{arch} ) { my $req_arch = $attr->{arch}; print STDERR "Architecture: $req_arch\n" if $opt_d; # x86 is an alias for i386 or amd64 if ( $req_arch =~ "x86" and ($arch_cpu eq "i386" or $arch_cpu eq "amd64") ) { # replace with the one we prefer $req_arch =~ s/x86/$arch_cpu/; } if ( $req_arch ne $arch and $req_arch ne "any-".$arch_cpu and $req_arch ne $arch_os."-any" ) { $ignore{'tag'} = $tagname; $ignore{'depth'} = $tagstatus{$tagname}{'count'}; print STDERR "Start ignore because of architecture" if $opt_d; } } } } # Assumes that is the first tag after a section tag if ( $prevtag =~ /sect1|sect2|sect3/ ) { $settitle = ( $tagname eq 'title' ); $titletag = $prevtag; $example{'in_sect'} = 0; } $prevtag = $tagname; if ( $tagname eq 'informalexample' && ! exists $ignore{'tag'} ) { if ( exists $attr->{role} && $attr->{role} eq "example" ) { $example{'print'} = 1; $example{'new'} = 1; } } } # Execute when text is encountered sub text_rtn { my ($text) = @_; if ( $settitle ) { # Clean leading and trailing whitespace for titles $text =~ s/^[[:space:]]*//; $text =~ s/[[:space:]]*$//; $text = replace_entities($text); $tagstatus{$titletag}{'title'} = $text; $settitle = 0; } if ( $example{'print'} && ! exists $ignore{'tag'} ) { # Print section headers for ($s=1; $s<=3; $s++) { my $sect="sect$s"; if ( $tagstatus{$sect}{'title'} ) { print "\n" if ( $s == 1 && ! $example{'first'} ); for ( $i = 1; $i <= 5 - $s; $i++ ) { print "#"; }; print " $tagstatus{$sect}{'title'}\n"; delete $tagstatus{$sect}{'title'}; } } # Clean leading whitespace if ( $example{'new'} ) { $text =~ s/^[[:space:]]*//; } $text = replace_entities($text); print "$text"; $example{'first'} = 0; $example{'new'} = 0; $example{'in_sect'} = 1; } } # Execute when the end tag is encountered sub end_rtn { my ($tagname) = @_; print STDERR "\nEnd: $tagname\n" if $opt_d; # Set of tags must match what's in start_rtn if ( $tagname =~ /appendix|sect1|sect2|sect3|para|informalexample|phrase/ ) { my $ts = $tagstatus{$tagname}{'count'}; $tagstatus{$tagname}{'count'} -= 1; print STDERR "$tagname $tagstatus{$tagname}{'count'}\n" if $opt_d; die "Invalid XML file: negative count for tag <$tagname>!\n" if $tagstatus{$tagname}{'count'} < 0; if ( exists $ignore{'tag'} ) { if ( $ignore{'tag'} eq $tagname && $ignore{'depth'} == $ts ) { delete $ignore{'tag'}; } return } } if ( $tagname eq 'informalexample' ) { $example{'print'} = 0; } if ( $tagname =~ /appendix|sect1|sect2|sect3|para/ ) { delete $tagstatus{$tagname}{'title'} if exists $tagstatus{$tagname}{'title'}; if ( $example{'in_sect'} ) { print "\n"; $example{'in_sect'} = 0; } } }