summaryrefslogtreecommitdiff
path: root/scripts/merge_xml
diff options
context:
space:
mode:
authorJoey Hess <joeyh@debian.org>2005-10-07 19:51:38 +0000
committerJoey Hess <joeyh@debian.org>2005-10-07 19:51:38 +0000
commit1ea73eea5ecc6a8ed901316049259aee737ee554 (patch)
tree03a077f0b1b1548f3c806bd1c5795964fba0fb52 /scripts/merge_xml
downloadinstallation-guide-1ea73eea5ecc6a8ed901316049259aee737ee554.zip
move manual to top-level directory, split out of debian-installer package
Diffstat (limited to 'scripts/merge_xml')
-rwxr-xr-xscripts/merge_xml79
1 files changed, 79 insertions, 0 deletions
diff --git a/scripts/merge_xml b/scripts/merge_xml
new file mode 100755
index 000000000..561a9a734
--- /dev/null
+++ b/scripts/merge_xml
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+# This script is used for translations using .po files.
+# It merges .xml files per chapter (or appendix).
+# The reason files are merged is that individual .xml files
+# are often not well-formed xml.
+
+if [ "$1" = "--help" ]; then
+ echo "Usage: $0 lang"
+ exit 0
+fi
+
+language=${1:-pl}
+
+SCRIPTDIR="./scripts"
+BUILDDIR="./build"
+if [ -z "$PO_USEBUILD" ] ; then
+ WORKDIR="./integrated"
+else
+ WORKDIR="$BUILDDIR/build.po"
+fi
+LANGDIR="./$language"
+
+[ -d $LANGDIR ] || exit 1
+
+TEMPDIR=/tmp/merge_xml.$$
+LOG=$TEMPDIR/merge_xml.$language.log
+[ -d $TEMPDIR ] || mkdir $TEMPDIR
+
+#<!ENTITY bookinfo.xml SYSTEM "en/bookinfo.xml">
+OLD_IFS=$IFS
+IFS="
+"
+:>$TEMPDIR/entlist
+echo "Building list of entities..."
+for ENT in `grep "<!ENTITY" $BUILDDIR/templates/docstruct.ent` ; do
+ echo -n "$(echo $ENT | sed "s/.*ENTITY\ *//" | sed "s/\ *SYSTEM.*$//")" >>$TEMPDIR/entlist
+ echo -n ":" >>$TEMPDIR/entlist
+ echo "$(echo $ENT | sed "s/.*SYSTEM\ *\"##SRCPATH##\///" | sed "s/\">//")" >>$TEMPDIR/entlist
+
+done
+IFS=$OLD_IFS
+
+# Make sure that all files are in UTF-8 first
+echo "Converting XML files to UTF-8..."
+echo "Converting XML files to UTF-8..." >>$LOG
+for FILE in `find $LANGDIR -name "*.xml"` ; do
+ SUBDIR=$(dirname $FILE | sed "s:$LANGDIR::");
+ XML=$(basename $FILE)
+ mkdir -p $TEMPDIR/in/$SUBDIR
+ REGEXP="^<?.*encoding="
+ if egrep -q $REGEXP $FILE ; then
+ echo "Encoded : $FILE" >>$LOG
+ ENC=$(egrep $REGEXP $FILE | sed "s/.*xml.*encoding=\"//" | sed "s/\"?>//")
+ iconv -f $ENC -t utf-8 $FILE | egrep -v $REGEXP >$TEMPDIR/in/$SUBDIR/$XML
+ else
+ echo "Not encoded: $FILE" >>$LOG
+ cp $FILE $TEMPDIR/in/$SUBDIR/$XML
+ fi
+done
+echo "" >>$LOG
+
+# Include lower level xml-files for all the main level xml-files
+echo "Merging XML files per 'chapter'..."
+echo "Merging XML files per 'chapter'..." >>$LOG
+gawk -v WORKDIR="$TEMPDIR" -v LOG=$LOG -v ENTLIST="$TEMPDIR/entlist" \
+ -f $SCRIPTDIR/merge_xml.awk $BUILDDIR/templates/install.xml.template
+
+# Copy the results to their proper location
+TARGET="$WORKDIR/$language"
+if [ -d $TARGET ]; then
+ rm -r $TARGET
+fi
+mkdir -p $TARGET
+cp -r $TEMPDIR/out/* $TARGET
+cp $LOG $TARGET
+
+rm -r $TEMPDIR
+exit 0