summaryrefslogtreecommitdiff
path: root/scripts/merge_xml
blob: eeaa8eaaf1ca6c115dd7d2a93edaf3ad0d999ead (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash

# This script is used for translations using .po files.
# It merges .xml files per chapter (or appendix).
# The reason files are merged is that individual .xml files
# are often not well-formed xml.

if [ "$1" = "--help" ]; then
    echo "Usage: $0 lang"
    exit 0
fi

language=${1:-pl}

SCRIPTDIR="./scripts"
BUILDDIR="./build"

# Required tools
for tool in gawk egrep iconf ; do
    if [ -z "`which $tool`" ] ; then
        echo "ERROR: $tool is required, please install it."
        exit 1
    fi
done

LANGDIR="./$language"
[ -d $LANGDIR ] || exit 1

if [ -z "$PO_USEBUILD" ] ; then
    WORKDIR="./integrated"
else
    WORKDIR="$BUILDDIR/build.po"
fi
[ -d $WORKDIR ] || mkdir -p $WORKDIR

TEMPDIR=/tmp/merge_xml.$$
LOG=$TEMPDIR/merge_xml.$language.log
[ -d $TEMPDIR ] || mkdir $TEMPDIR

#<!ENTITY bookinfo.xml         SYSTEM "en/bookinfo.xml">
OLD_IFS=$IFS
IFS="
"
:>$TEMPDIR/entlist
echo "Building list of entities..."
for ENT in `grep "<!ENTITY" $BUILDDIR/templates/docstruct.ent` ; do
    echo -n "$(echo $ENT | sed "s/.*ENTITY\ *//" | sed "s/\ *SYSTEM.*$//")" >>$TEMPDIR/entlist
    echo -n ":" >>$TEMPDIR/entlist
    echo    "$(echo $ENT | sed "s/.*SYSTEM\ *\"##SRCPATH##\///" | sed "s/\">//")" >>$TEMPDIR/entlist
    
done
IFS=$OLD_IFS

# Make sure that all files are in UTF-8 first
echo "Converting XML files to UTF-8..."
echo "Converting XML files to UTF-8..." >>$LOG
for FILE in `find $LANGDIR -name "*.xml"` ; do
    SUBDIR=$(dirname $FILE | sed "s:$LANGDIR::");
    XML=$(basename $FILE)
    mkdir -p $TEMPDIR/in/$SUBDIR
    REGEXP="^<?.*encoding="
    if egrep -q $REGEXP $FILE ; then
        echo "Encoded    : $FILE" >>$LOG
        ENC=$(egrep $REGEXP $FILE | sed "s/.*xml.*encoding=\"//" | sed "s/\"?>//")
        iconv -f $ENC -t utf-8 $FILE | egrep -v $REGEXP >$TEMPDIR/in/$SUBDIR/$XML
    else
        echo "Not encoded: $FILE" >>$LOG
        cp $FILE $TEMPDIR/in/$SUBDIR/$XML
    fi
done
echo "" >>$LOG

# Include lower level xml-files for all the main level xml-files
echo "Merging XML files per 'chapter'..."
echo "Merging XML files per 'chapter'..." >>$LOG
gawk -v WORKDIR="$TEMPDIR" -v LOG=$LOG -v ENTLIST="$TEMPDIR/entlist" \
    -f $SCRIPTDIR/merge_xml.awk $BUILDDIR/templates/install.xml.template

# Copy the results to their proper location
TARGET="$WORKDIR/$language"
if [ -d $TARGET ]; then
    rm -r $TARGET
fi
mkdir -p $TARGET
cp -r $TEMPDIR/out/* $TARGET
cp $LOG $TARGET

rm -r $TEMPDIR
exit 0