diff options
Diffstat (limited to 'scripts/historic')
-rwxr-xr-x | scripts/historic/2format | 19 | ||||
-rwxr-xr-x | scripts/historic/2xml | 7 | ||||
-rw-r--r-- | scripts/historic/2xml2 | 90 | ||||
-rw-r--r-- | scripts/historic/formatparasect | 16 | ||||
-rw-r--r-- | scripts/historic/para1 | 10 | ||||
-rwxr-xr-x | scripts/historic/validate.sh | 22 |
6 files changed, 164 insertions, 0 deletions
diff --git a/scripts/historic/2format b/scripts/historic/2format new file mode 100755 index 000000000..2e3ca0d48 --- /dev/null +++ b/scripts/historic/2format @@ -0,0 +1,19 @@ +#!/bin/sh + +tmp=`tempfile` + +while [ x"$1" != x ]; do + echo $1 + cat $1 \ + | tr -d '\t' \ + | sed -f formatparasect \ + | sed -f para1 \ + | sed '/<\/para>/{:a;N;/<para>/!ba;s/<\/para>\n*<para>/<\/para><para>/;}' \ + | cat -s > $tmp + cp $tmp $1 + shift; +done + +rm -f $tmp + +# | sed -f fix \
\ No newline at end of file diff --git a/scripts/historic/2xml b/scripts/historic/2xml new file mode 100755 index 000000000..a68d24a88 --- /dev/null +++ b/scripts/historic/2xml @@ -0,0 +1,7 @@ +#!/bin/sh + +# feed it sgml and it coughs up xml, pretty rough though. + +sgmlnorm -mn $1 | sed -f 2xml2 > tmp.xml + +sed '/<\/para>/{:a;N;/<para>/!ba;s/<\/para>\n*<para>/<\/para><para>/;}' tmp.xml | less
\ No newline at end of file diff --git a/scripts/historic/2xml2 b/scripts/historic/2xml2 new file mode 100644 index 000000000..299725d0d --- /dev/null +++ b/scripts/historic/2xml2 @@ -0,0 +1,90 @@ +# a very limited debiandoc to xml converter +# can also be used by itself on entity files, +# for those uncomment next line: +s/<item>/|/gi +# ENTITY must upper case +s|<!entity|<!ENTITY|gi +# start links on a new line +s|<URL ID=\"\([^\"]*\)\" NAME=\"\([^\"]*\)\">|\ +<ulink url="\1">\2</ulink>|gi +# pick up halves of those that got wrapped to different lines +s|<URL ID=\"\([^\"]*\)\"|\ +<ulink url="\1">|gi +s|<URL|<ulink|gi +s|^ID=\"\([^\"]*\)\" NAME=\"\([^\"]*\)\">|url="\1">\2</ulink>|gi +s|^NAME=\"\([^\"]*\)\">|\1</ulink>|gi +# ID= at the start of a line can also be an orphan ref +s|^ID=\"\(\&url[^\"]*\)\"|url="\1"></ulink>|gi +s|<REF ID=\"\([^\"]*\)\">|<xref linkend="\1"></xref>|gi +s|<^ID=\"\([^\"]*\)\">|linkend="\1"></xref>|gi +s|<REF|<xref|gi +s|<P>|<para>|gi +s|</P>|</para>|gi +s|HEADING>|title>|gi +# An extra line is helpful when replacing with 2 tags +s|<ITEM>|<listitem><para>\ +|gi +s|</ITEM>|\ +</para></listitem>|gi +s|<EM>\([^<]*\)</EM>|<emphasis>\1</emphasis>|gi +s|<EM>|<emphasis>|gi +s|</EM>|</emphasis>|gi +s|ENUMLIST>|orderedlist>|gi +s|TAGLIST>|variablelist>|gi +s|<TAG>|<varentry><term>\ +|gi +s|</TAG>|</term>|gi +s|<LIST COMPACT>|<itemizedlist>|gi +s|<LIST>|<itemizedlist>|gi +s|</LIST>|</itemizedlist>|gi +s|FILE>|filename>|gi +s|CHAPT>|chapter>|gi +s|CHAPT ID=|chapter id=|gi +# xml has no plain <sect>, must be numbered +# so move each one up a level +s|SECT3>|sect4>|gi +s|SECT2>|sect3>|gi +s|SECT1>|sect2>|gi +s|SECT>|sect1>|gi +s|SECT3 ID=|sect4 id=|gi +s|SECT2 ID=|sect3 id=|gi +s|SECT1 ID=|sect2 id=|gi +s|SECT ID=|sect1 id=|gi +# locate sect's with text immediately following, insert <title> +s|\(<sect[^>]*>\)\([^<>]\+$\)|\1<title>\2</title>|i +# prgn could be <application>, but use <command> as default +s|PRGN>|command>|gi +# our <example>s don't have headings, use <informalexample> +s|<EXAMPLE>|<informalexample><screen>\ +|gi +s|</EXAMPLE>|\ +</screen></informalexample>|gi +s|VAR>|replaceable>|gi +# <tt> has been used for many purposes, but it _should_ be <userinput> +s|TT>|userinput>|gi +# no <package> allowed, substitute <classname> for now +s|PACKAGE>|classname>|g +# just change the case +s|FOOTNOTE>|footnote>|gi +# +# formatting stuff +s| |\ +\ +|g +# all <para> to left margin on next line +s|^ *\(<para>.*\)|\ +\1| +s|^ *\(</para>.*\)|\1| +# sect's on next line, appropriate indent +s|^ *\(</*sect1.*\)|\ + \1| +s|^ *\(</*sect2.*\)|\ + \1| +s|^ *\(</*sect3.*\)|\ + \1| +s|^ *\(</*sect4.*\)|\ + \1| + + + + diff --git a/scripts/historic/formatparasect b/scripts/historic/formatparasect new file mode 100644 index 000000000..16694297b --- /dev/null +++ b/scripts/historic/formatparasect @@ -0,0 +1,16 @@ +s| |\ +\ +|g +# all <para> to left margin on next line +s|^ *\(<para>.*\)|\ +\1| +s|^ *\(</para>.*\)|\1| +# sect's on next line, appropriate indent +s|^ *\(</*sect1.*\)|\ + \1| +s|^ *\(</*sect2.*\)|\ + \1| +s|^ *\(</*sect3.*\)|\ + \1| +s|^ *\(</*sect4.*\)|\ + \1| diff --git a/scripts/historic/para1 b/scripts/historic/para1 new file mode 100644 index 000000000..4342261b7 --- /dev/null +++ b/scripts/historic/para1 @@ -0,0 +1,10 @@ +/<para>/,/[[:alnum:]]/s/<para>\n*/<para>\ +\ +/ +/[[:alnum:].:]/,/<\/para>/s/\n*<\/para>/\ +\ +<\/para>/ + + + + diff --git a/scripts/historic/validate.sh b/scripts/historic/validate.sh new file mode 100755 index 000000000..bdb09989a --- /dev/null +++ b/scripts/historic/validate.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +catalog=/usr/share/sgml/docbook/dtd/xml/4.2/catalog +xmldcl=/usr/share/sgml/declaration/xml.dcl +err=`tempfile` + +if grep -q '^<!DOCTYPE' $1; then + nsgmls -s -c $catalog $xmldcl $1 2> $err +else + temp=`tempfile` + topdir=`dirname $0` + root=`sed -e '0,/<[a-z]/!d' $1 | sed -e '$!d' | sed -e 's/<\([a-z][a-zA-Z0-9]*\).*/\1/'` + cat > $temp <<EOT +<!DOCTYPE $root PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "docbookx.dtd" +[<!ENTITY % entities SYSTEM "entities.ent"> %entities;]> +EOT + cat $1 >> $temp + nsgmls -s -D$topdir -c $catalog $xmldcl $temp 2> $err + rm -f $temp +fi + +less $err
\ No newline at end of file |