1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
# The script keeps track of some special situations:
# - 'tags' in comments are not handled well by poxml tools, so these
# are removed
# - references within comments should not be processed, so we keep
# a count of opening and closing of comments
BEGIN {
main_count = 1
# Let's first build an array with all the entities (xml files)
while (getline <ENTLIST) {
delim = index($0, ":")
i = substr($0, 1, delim - 1)
fname = substr($0, delim + 1, length($0) - delim)
# Trim any leading and trailing space of filenames
gsub(/^[[:space:]]*/, "", fname)
gsub(/[[:space:]]*$/, "", fname)
ent [i] = fname
included [i] = 0
}
}
{
# In the main loop we only want to process entities that are refered to
line = $0
if (match (line, /^[[:space:]]*&.*\.xml;[[:space:]]*(<\!--.*-->[[:space:]]*|)*$/) > 0) {
process_file(line, "main")
}
}
END {
print "" >>LOG
print "The following defined entities (from docstruct) were NOT processed:" >>LOG
for (entname in ent) {
if (included [entname] == 0) {
print " " entname >>LOG
}
}
}
function process_file(entline, level, fname, tfname) {
entname = get_entname(entline)
if (entname in ent) {
fname = ent [entname]
print "Processing: " fname >>LOG
INFILE = WORKDIR "/in/" fname
if (level == "main") {
main_count += 1
# Change at highest level: change to a new output file
OUTFILE = WORKDIR "/out/" fname
OUTDIR = OUTFILE
gsub(/\/[^\/]*$/, "/", OUTDIR) # strip filename
system("mkdir -p " OUTDIR) # create directory
} else {
print "" >>OUTFILE
}
if (level == "sub" && included [entname] != 0 && included [entname] < main_count) {
print "** Warning: entity '" entname "'was also included in another file." >>LOG
}
if (level == "main") {
included [entname] = 1
} else {
included [entname] = main_count
}
parse_file(INFILE, fname)
} else {
print "** Entity " entname " not found and will be skipped!" >>LOG
print entline >>OUTFILE
}
}
function parse_file(PARSEFILE, FNAME, fname, nwline, comment_count) {
comment_count = 0
fname = FNAME
# Test whether file exists
getline <PARSEFILE
if (length(ERRNO) != 0) {
print "** Error: file '" PARSEFILE "' does not exist!" >>LOG
return
}
print "<!-- Start of file " fname " -->" >>OUTFILE
while (getline <PARSEFILE) {
nwline = $0
# Update the count of 'open' comments
comment_count += count_comments(nwline)
if (match(nwline, /^[[:space:]]*&.*\.xml;[[:space:]]*(<\!--.*-->[[:space:]]*|)*$/) > 0) {
# If we find another entity reference, we process that file recursively
# But not if the reference is within a comment
if (comment_count != 0) {
print "** Skipping entity reference '" nwline "' found in comment!" >>LOG
} else {
process_file(nwline, "sub")
}
} else {
# Else we just print the line
if (match(nwline, /<\!--.*<.*>.*<.*>.*-->/) > 0) {
# Comments containing "<...> ... <...>" are not handled correctly
# by xml2pot and split2po, so we skip lines like that
# Note: this is a workaround for a bug in the tools:
# http://bugs.kde.org/show_bug.cgi?id=90294
print "** Comment deleted in line '" nwline "'" >>LOG
gsub(/<\!--.*<.*>.*<.*>.*-->/, "", nwline)
}
print nwline >>OUTFILE
}
}
if (comment_count != 0) {
print "** Comment count is not zero at end of file: " comment_count >>LOG
}
print "<!-- End of file " fname " -->" >>OUTFILE
close(PARSEFILE)
}
function get_entname(entline, ename) {
# Parse the name of the entity out of the entity reference
ename = entline
gsub(/^[[:space:]]*&/, "", ename)
gsub(/;.*$/, "", ename)
return ename
}
function count_comments(inline, tmpline, count) {
# 'abuse' gsub to count them
tmpline = inline
count += gsub(/<\!--/, "", tmpline)
count -= gsub(/-->/, "", tmpline)
return count
}
|