summaryrefslogtreecommitdiff
path: root/node
diff options
context:
space:
mode:
authorPeter 'Pita' Martischka <petermartischka@googlemail.com>2011-08-10 22:47:50 +0100
committerPeter 'Pita' Martischka <petermartischka@googlemail.com>2011-08-10 22:47:50 +0100
commit9cc3b543ccbdac7939072d268dc7d28106e9e3c4 (patch)
treea3f96c7b9583cac13ff73d4f09940ce14f779f87 /node
parentce1012438ef836dacbd17f98f0617ad4d19f2786 (diff)
parent3c46cce341516238a0439a081d178f526541d675 (diff)
downloadetherpad-lite-9cc3b543ccbdac7939072d268dc7d28106e9e3c4.zip
Merge branch 'master' of git://github.com/Pita/etherpad-lite into api
Diffstat (limited to 'node')
-rw-r--r--node/utils/ExportHtml.js373
1 files changed, 229 insertions, 144 deletions
diff --git a/node/utils/ExportHtml.js b/node/utils/ExportHtml.js
index e74e404c..dce156ec 100644
--- a/node/utils/ExportHtml.js
+++ b/node/utils/ExportHtml.js
@@ -13,27 +13,30 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
var async = require("async");
var Changeset = require("./Changeset");
var padManager = require("../db/PadManager");
-
-function getPadPlainText(pad, revNum) {
- var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) :
- pad.atext());
- var textLines = atext.text.slice(0,-1).split('\n');
+function getPadPlainText(pad, revNum)
+{
+ var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext());
+ var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
var apool = pad.pool();
var pieces = [];
- for(var i=0;i<textLines.length;i++) {
+ for (var i = 0; i < textLines.length; i++)
+ {
var line = _analyzeLine(textLines[i], attribLines[i], apool);
- if (line.listLevel) {
- var numSpaces = line.listLevel*2-1;
+ if (line.listLevel)
+ {
+ var numSpaces = line.listLevel * 2 - 1;
var bullet = '*';
- pieces.push(new Array(numSpaces+1).join(' '), bullet, ' ', line.text, '\n');
+ pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n');
}
- else {
+ else
+ {
pieces.push(line.text, '\n');
}
}
@@ -41,52 +44,68 @@ function getPadPlainText(pad, revNum) {
return pieces.join('');
}
-function getPadHTML(pad, revNum, callback) {
+function getPadHTML(pad, revNum, callback)
+{
var atext = pad.atext;
var html;
async.waterfall([
- // fetch revision atext
- function (callback) {
- if (revNum != undefined) {
- pad.getInternalRevisionAText(revNum, function (err, revisionAtext) {
- atext = revisionAtext;
- callback(err);
- });
- } else {
- callback(null);
- }
- },
-
- // convert atext to html
- function (callback) {
- html = getHTMLFromAtext(pad, atext);
- callback(null);
+ // fetch revision atext
+
+
+ function (callback)
+ {
+ if (revNum != undefined)
+ {
+ pad.getInternalRevisionAText(revNum, function (err, revisionAtext)
+ {
+ atext = revisionAtext;
+ callback(err);
+ });
}
- ],
- // run final callback
- function (err) {
- callback(err, html);
+ else
+ {
+ callback(null);
}
- );
+ },
+
+ // convert atext to html
+
+
+ function (callback)
+ {
+ html = getHTMLFromAtext(pad, atext);
+ callback(null);
+ }],
+ // run final callback
+
+
+ function (err)
+ {
+ callback(err, html);
+ });
}
-function getHTMLFromAtext(pad, atext) {
+function getHTMLFromAtext(pad, atext)
+{
var apool = pad.apool();
- var textLines = atext.text.slice(0,-1).split('\n');
+ var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
- var tags = ['h1', 'h2', 'strong','em','u','s'];
- var props = ['heading1', 'heading2', 'bold','italic','underline','strikethrough'];
+ var tags = ['h1', 'h2', 'strong', 'em', 'u', 's'];
+ var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
var anumMap = {};
-
- props.forEach(function(propName, i) {
- var propTrueNum = apool.putAttrib([propName,true], true);
- if (propTrueNum >= 0) {
+
+ props.forEach(function (propName, i)
+ {
+ var propTrueNum = apool.putAttrib([propName, true], true);
+ if (propTrueNum >= 0)
+ {
anumMap[propTrueNum] = i;
}
});
- function getLineHTML(text, attribs) {
+ function getLineHTML(text, attribs)
+ {
var propVals = [false, false, false];
var ENTER = 1;
var STAY = 2;
@@ -97,16 +116,18 @@ function getHTMLFromAtext(pad, atext) {
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
// becomes
// <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i>
-
var taker = Changeset.stringIterator(text);
var assem = Changeset.stringAssembler();
- function emitOpenTag(i) {
+ function emitOpenTag(i)
+ {
assem.append('<');
assem.append(tags[i]);
assem.append('>');
}
- function emitCloseTag(i) {
+
+ function emitCloseTag(i)
+ {
assem.append('</');
assem.append(tags[i]);
assem.append('>');
@@ -115,101 +136,123 @@ function getHTMLFromAtext(pad, atext) {
var urls = _findURLs(text);
var idx = 0;
- function processNextChars(numChars) {
- if (numChars <= 0) {
+
+ function processNextChars(numChars)
+ {
+ if (numChars <= 0)
+ {
return;
}
- var iter = Changeset.opIterator(Changeset.subattribution(attribs,
- idx, idx+numChars));
+ var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars));
idx += numChars;
- while (iter.hasNext()) {
+ while (iter.hasNext())
+ {
var o = iter.next();
var propChanged = false;
- Changeset.eachAttribNumber(o.attribs, function(a) {
- if (a in anumMap) {
+ Changeset.eachAttribNumber(o.attribs, function (a)
+ {
+ if (a in anumMap)
+ {
var i = anumMap[a]; // i = 0 => bold, etc.
- if (! propVals[i]) {
+ if (!propVals[i])
+ {
propVals[i] = ENTER;
propChanged = true;
}
- else {
+ else
+ {
propVals[i] = STAY;
}
}
});
- for(var i=0;i<propVals.length;i++) {
- if (propVals[i] === true) {
+ for (var i = 0; i < propVals.length; i++)
+ {
+ if (propVals[i] === true)
+ {
propVals[i] = LEAVE;
propChanged = true;
}
- else if (propVals[i] === STAY) {
+ else if (propVals[i] === STAY)
+ {
propVals[i] = true; // set it back
}
}
// now each member of propVal is in {false,LEAVE,ENTER,true}
// according to what happens at start of span
-
- if (propChanged) {
+ if (propChanged)
+ {
// leaving bold (e.g.) also leaves italics, etc.
var left = false;
- for(var i=0;i<propVals.length;i++) {
+ for (var i = 0; i < propVals.length; i++)
+ {
var v = propVals[i];
- if (! left) {
- if (v === LEAVE) {
+ if (!left)
+ {
+ if (v === LEAVE)
+ {
left = true;
}
}
- else {
- if (v === true) {
+ else
+ {
+ if (v === true)
+ {
propVals[i] = STAY; // tag will be closed and re-opened
}
}
}
- for(var i=propVals.length-1; i>=0; i--) {
- if (propVals[i] === LEAVE) {
+ for (var i = propVals.length - 1; i >= 0; i--)
+ {
+ if (propVals[i] === LEAVE)
+ {
emitCloseTag(i);
propVals[i] = false;
}
- else if (propVals[i] === STAY) {
+ else if (propVals[i] === STAY)
+ {
emitCloseTag(i);
}
}
- for(var i=0; i<propVals.length; i++) {
- if (propVals[i] === ENTER || propVals[i] === STAY) {
+ for (var i = 0; i < propVals.length; i++)
+ {
+ if (propVals[i] === ENTER || propVals[i] === STAY)
+ {
emitOpenTag(i);
propVals[i] = true;
}
}
// propVals is now all {true,false} again
} // end if (propChanged)
-
var chars = o.chars;
- if (o.lines) {
+ if (o.lines)
+ {
chars--; // exclude newline at end of line, if present
}
var s = taker.take(chars);
assem.append(_escapeHTML(s));
} // end iteration over spans in line
-
- for(var i=propVals.length-1; i>=0; i--) {
- if (propVals[i]) {
+ for (var i = propVals.length - 1; i >= 0; i--)
+ {
+ if (propVals[i])
+ {
emitCloseTag(i);
propVals[i] = false;
}
}
} // end processNextChars
-
- if (urls) {
- urls.forEach(function(urlData) {
+ if (urls)
+ {
+ urls.forEach(function (urlData)
+ {
var startIndex = urlData[0];
var url = urlData[1];
var urlLength = url.length;
processNextChars(startIndex - idx);
- assem.append('<a href="'+url.replace(/\"/g, '&quot;')+'">');
+ assem.append('<a href="' + url.replace(/\"/g, '&quot;') + '">');
processNextChars(urlLength);
assem.append('</a>');
});
@@ -218,7 +261,6 @@ function getHTMLFromAtext(pad, atext) {
return _processSpaces(assem.toString());
} // end getLineHTML
-
var pieces = [];
// Need to deal with constraints imposed on HTML lists; can
@@ -228,79 +270,98 @@ function getHTMLFromAtext(pad, atext) {
// so we want to do something reasonable there. We also
// want to deal gracefully with blank lines.
var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...]
- for(var i=0;i<textLines.length;i++) {
+ for (var i = 0; i < textLines.length; i++)
+ {
var line = _analyzeLine(textLines[i], attribLines[i], apool);
var lineContent = getLineHTML(line.text, line.aline);
- if (line.listLevel || lists.length > 0) {
+ if (line.listLevel || lists.length > 0)
+ {
// do list stuff
var whichList = -1; // index into lists or -1
- if (line.listLevel) {
+ if (line.listLevel)
+ {
whichList = lists.length;
- for(var j=lists.length-1;j>=0;j--) {
- if (line.listLevel <= lists[j][0]) {
+ for (var j = lists.length - 1; j >= 0; j--)
+ {
+ if (line.listLevel <= lists[j][0])
+ {
whichList = j;
}
}
}
- if (whichList >= lists.length) {
+ if (whichList >= lists.length)
+ {
lists.push([line.listLevel, line.listTypeName]);
pieces.push('<ul><li>', lineContent || '<br>');
}
- else if (whichList == -1) {
- if (line.text) {
+ else if (whichList == -1)
+ {
+ if (line.text)
+ {
// non-blank line, end all lists
- pieces.push(new Array(lists.length+1).join('</li></ul\n>'));
+ pieces.push(new Array(lists.length + 1).join('</li></ul\n>'));
lists.length = 0;
pieces.push(lineContent, '<br>');
}
- else {
+ else
+ {
pieces.push('<br><br>');
}
}
- else {
- while (whichList < lists.length-1) {
+ else
+ {
+ while (whichList < lists.length - 1)
+ {
pieces.push('</li></ul>');
lists.length--;
}
pieces.push('</li><li>', lineContent || '<br>');
}
}
- else {
+ else
+ {
pieces.push(lineContent, '<br>');
}
}
- pieces.push(new Array(lists.length+1).join('</li></ul>'));
+ pieces.push(new Array(lists.length + 1).join('</li></ul>'));
return pieces.join('');
}
-function _analyzeLine(text, aline, apool) {
+function _analyzeLine(text, aline, apool)
+{
var line = {};
// identify list
var lineMarker = 0;
line.listLevel = 0;
- if (aline) {
+ if (aline)
+ {
var opIter = Changeset.opIterator(aline);
- if (opIter.hasNext()) {
+ if (opIter.hasNext())
+ {
var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool);
- if (listType) {
+ if (listType)
+ {
lineMarker = 1;
listType = /([a-z]+)([12345678])/.exec(listType);
- if (listType) {
+ if (listType)
+ {
line.listTypeName = listType[1];
line.listLevel = Number(listType[2]);
}
}
}
}
- if (lineMarker) {
+ if (lineMarker)
+ {
line.text = text.substring(1);
line.aline = Changeset.subattribution(aline, 1);
}
- else {
+ else
+ {
line.text = text;
line.aline = aline;
}
@@ -308,37 +369,32 @@ function _analyzeLine(text, aline, apool) {
return line;
}
-exports.getPadHTMLDocument = function(padId, revNum, noDocType, callback) {
- padManager.getPad(padId, function(err, pad)
+exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback)
+{
+ padManager.getPad(padId, function (err, pad)
{
- if(err)
+ if (err)
{
callback(err);
return;
}
-
- var head = (noDocType?'':'<!doctype html>\n')+
- '<html lang="en">\n'+
- (noDocType?'':
- '<head>\n'+
- '<meta charset="utf-8">\n'+
- '<style> * { font-family: arial, sans-serif;\n'+
- 'font-size: 13px;\n'+
- 'line-height: 17px; }</style>\n' +
- '</head>\n')+
- '<body>';
+
+ var head = (noDocType ? '' : '<!doctype html>\n') + '<html lang="en">\n' + (noDocType ? '' : '<head>\n' + '<meta charset="utf-8">\n' + '<style> * { font-family: arial, sans-serif;\n' + 'font-size: 13px;\n' + 'line-height: 17px; }</style>\n' + '</head>\n') + '<body>';
var foot = '</body>\n</html>\n';
- getPadHTML(pad, revNum, function (err, html) {
+ getPadHTML(pad, revNum, function (err, html)
+ {
callback(err, head + html + foot);
});
});
}
-function _escapeHTML(s) {
+function _escapeHTML(s)
+{
var re = /[&<>]/g;
- if (! re.MAP) {
+ if (!re.MAP)
+ {
// persisted across function calls!
re.MAP = {
'&': '&amp;',
@@ -346,53 +402,78 @@ function _escapeHTML(s) {
'>': '&gt;',
};
}
- return s.replace(re, function(c) { return re.MAP[c]; });
+
+ s = s.replace(re, function (c)
+ {
+ return re.MAP[c];
+ });
+
+ return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c)
+ {
+ return "&#" +c.charCodeAt(0) + ";"
+ });
}
// copied from ACE
-function _processSpaces(s) {
+
+
+function _processSpaces(s)
+{
var doesWrap = true;
- if (s.indexOf("<") < 0 && ! doesWrap) {
+ if (s.indexOf("<") < 0 && !doesWrap)
+ {
// short-cut
return s.replace(/ /g, '&nbsp;');
}
var parts = [];
- s.replace(/<[^>]*>?| |[^ <]+/g, function(m) { parts.push(m); });
- if (doesWrap) {
+ s.replace(/<[^>]*>?| |[^ <]+/g, function (m)
+ {
+ parts.push(m);
+ });
+ if (doesWrap)
+ {
var endOfLine = true;
var beforeSpace = false;
// last space in a run is normal, others are nbsp,
// end of line is nbsp
- for(var i=parts.length-1;i>=0;i--) {
+ for (var i = parts.length - 1; i >= 0; i--)
+ {
var p = parts[i];
- if (p == " ") {
- if (endOfLine || beforeSpace)
- parts[i] = '&nbsp;';
- endOfLine = false;
- beforeSpace = true;
+ if (p == " ")
+ {
+ if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
+ endOfLine = false;
+ beforeSpace = true;
}
- else if (p.charAt(0) != "<") {
- endOfLine = false;
- beforeSpace = false;
+ else if (p.charAt(0) != "<")
+ {
+ endOfLine = false;
+ beforeSpace = false;
}
}
// beginning of line is nbsp
- for(var i=0;i<parts.length;i++) {
+ for (var i = 0; i < parts.length; i++)
+ {
var p = parts[i];
- if (p == " ") {
- parts[i] = '&nbsp;';
- break;
+ if (p == " ")
+ {
+ parts[i] = '&nbsp;';
+ break;
}
- else if (p.charAt(0) != "<") {
- break;
+ else if (p.charAt(0) != "<")
+ {
+ break;
}
}
}
- else {
- for(var i=0;i<parts.length;i++) {
+ else
+ {
+ for (var i = 0; i < parts.length; i++)
+ {
var p = parts[i];
- if (p == " ") {
- parts[i] = '&nbsp;';
+ if (p == " ")
+ {
+ parts[i] = '&nbsp;';
}
}
}
@@ -403,15 +484,19 @@ function _processSpaces(s) {
// copied from ACE
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
var _REGEX_SPACE = /\s/;
-var _REGEX_URLCHAR = new RegExp('('+/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source+'|'+_REGEX_WORDCHAR.source+')');
-var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source+_REGEX_URLCHAR.source+'*(?![:.,;])'+_REGEX_URLCHAR.source, 'g');
+var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')');
+var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g');
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
-function _findURLs(text) {
+
+
+function _findURLs(text)
+{
_REGEX_URL.lastIndex = 0;
var urls = null;
var execResult;
- while ((execResult = _REGEX_URL.exec(text))) {
+ while ((execResult = _REGEX_URL.exec(text)))
+ {
urls = (urls || []);
var startIndex = execResult.index;
var url = execResult[0];