diff options
author | Peter 'Pita' Martischka <petermartischka@googlemail.com> | 2011-08-10 22:47:50 +0100 |
---|---|---|
committer | Peter 'Pita' Martischka <petermartischka@googlemail.com> | 2011-08-10 22:47:50 +0100 |
commit | 9cc3b543ccbdac7939072d268dc7d28106e9e3c4 (patch) | |
tree | a3f96c7b9583cac13ff73d4f09940ce14f779f87 /node | |
parent | ce1012438ef836dacbd17f98f0617ad4d19f2786 (diff) | |
parent | 3c46cce341516238a0439a081d178f526541d675 (diff) | |
download | etherpad-lite-9cc3b543ccbdac7939072d268dc7d28106e9e3c4.zip |
Merge branch 'master' of git://github.com/Pita/etherpad-lite into api
Diffstat (limited to 'node')
-rw-r--r-- | node/utils/ExportHtml.js | 373 |
1 files changed, 229 insertions, 144 deletions
diff --git a/node/utils/ExportHtml.js b/node/utils/ExportHtml.js index e74e404c..dce156ec 100644 --- a/node/utils/ExportHtml.js +++ b/node/utils/ExportHtml.js @@ -13,27 +13,30 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + var async = require("async"); var Changeset = require("./Changeset"); var padManager = require("../db/PadManager"); - -function getPadPlainText(pad, revNum) { - var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : - pad.atext()); - var textLines = atext.text.slice(0,-1).split('\n'); +function getPadPlainText(pad, revNum) +{ + var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); + var textLines = atext.text.slice(0, -1).split('\n'); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); var apool = pad.pool(); var pieces = []; - for(var i=0;i<textLines.length;i++) { + for (var i = 0; i < textLines.length; i++) + { var line = _analyzeLine(textLines[i], attribLines[i], apool); - if (line.listLevel) { - var numSpaces = line.listLevel*2-1; + if (line.listLevel) + { + var numSpaces = line.listLevel * 2 - 1; var bullet = '*'; - pieces.push(new Array(numSpaces+1).join(' '), bullet, ' ', line.text, '\n'); + pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); } - else { + else + { pieces.push(line.text, '\n'); } } @@ -41,52 +44,68 @@ function getPadPlainText(pad, revNum) { return pieces.join(''); } -function getPadHTML(pad, revNum, callback) { +function getPadHTML(pad, revNum, callback) +{ var atext = pad.atext; var html; async.waterfall([ - // fetch revision atext - function (callback) { - if (revNum != undefined) { - pad.getInternalRevisionAText(revNum, function (err, revisionAtext) { - atext = revisionAtext; - callback(err); - }); - } else { - callback(null); - } - }, - - // convert atext to html - function (callback) { - html = getHTMLFromAtext(pad, atext); - callback(null); + // fetch revision atext + + + function (callback) + { + if (revNum != undefined) + { + pad.getInternalRevisionAText(revNum, function (err, revisionAtext) + { + atext = revisionAtext; + callback(err); + }); } - ], - // run final callback - function (err) { - callback(err, html); + else + { + callback(null); } - ); + }, + + // convert atext to html + + + function (callback) + { + html = getHTMLFromAtext(pad, atext); + callback(null); + }], + // run final callback + + + function (err) + { + callback(err, html); + }); } -function getHTMLFromAtext(pad, atext) { +function getHTMLFromAtext(pad, atext) +{ var apool = pad.apool(); - var textLines = atext.text.slice(0,-1).split('\n'); + var textLines = atext.text.slice(0, -1).split('\n'); var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); - var tags = ['h1', 'h2', 'strong','em','u','s']; - var props = ['heading1', 'heading2', 'bold','italic','underline','strikethrough']; + var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; + var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; var anumMap = {}; - - props.forEach(function(propName, i) { - var propTrueNum = apool.putAttrib([propName,true], true); - if (propTrueNum >= 0) { + + props.forEach(function (propName, i) + { + var propTrueNum = apool.putAttrib([propName, true], true); + if (propTrueNum >= 0) + { anumMap[propTrueNum] = i; } }); - function getLineHTML(text, attribs) { + function getLineHTML(text, attribs) + { var propVals = [false, false, false]; var ENTER = 1; var STAY = 2; @@ -97,16 +116,18 @@ function getHTMLFromAtext(pad, atext) { // <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i> // becomes // <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i> - var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); - function emitOpenTag(i) { + function emitOpenTag(i) + { assem.append('<'); assem.append(tags[i]); assem.append('>'); } - function emitCloseTag(i) { + + function emitCloseTag(i) + { assem.append('</'); assem.append(tags[i]); assem.append('>'); @@ -115,101 +136,123 @@ function getHTMLFromAtext(pad, atext) { var urls = _findURLs(text); var idx = 0; - function processNextChars(numChars) { - if (numChars <= 0) { + + function processNextChars(numChars) + { + if (numChars <= 0) + { return; } - var iter = Changeset.opIterator(Changeset.subattribution(attribs, - idx, idx+numChars)); + var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); idx += numChars; - while (iter.hasNext()) { + while (iter.hasNext()) + { var o = iter.next(); var propChanged = false; - Changeset.eachAttribNumber(o.attribs, function(a) { - if (a in anumMap) { + Changeset.eachAttribNumber(o.attribs, function (a) + { + if (a in anumMap) + { var i = anumMap[a]; // i = 0 => bold, etc. - if (! propVals[i]) { + if (!propVals[i]) + { propVals[i] = ENTER; propChanged = true; } - else { + else + { propVals[i] = STAY; } } }); - for(var i=0;i<propVals.length;i++) { - if (propVals[i] === true) { + for (var i = 0; i < propVals.length; i++) + { + if (propVals[i] === true) + { propVals[i] = LEAVE; propChanged = true; } - else if (propVals[i] === STAY) { + else if (propVals[i] === STAY) + { propVals[i] = true; // set it back } } // now each member of propVal is in {false,LEAVE,ENTER,true} // according to what happens at start of span - - if (propChanged) { + if (propChanged) + { // leaving bold (e.g.) also leaves italics, etc. var left = false; - for(var i=0;i<propVals.length;i++) { + for (var i = 0; i < propVals.length; i++) + { var v = propVals[i]; - if (! left) { - if (v === LEAVE) { + if (!left) + { + if (v === LEAVE) + { left = true; } } - else { - if (v === true) { + else + { + if (v === true) + { propVals[i] = STAY; // tag will be closed and re-opened } } } - for(var i=propVals.length-1; i>=0; i--) { - if (propVals[i] === LEAVE) { + for (var i = propVals.length - 1; i >= 0; i--) + { + if (propVals[i] === LEAVE) + { emitCloseTag(i); propVals[i] = false; } - else if (propVals[i] === STAY) { + else if (propVals[i] === STAY) + { emitCloseTag(i); } } - for(var i=0; i<propVals.length; i++) { - if (propVals[i] === ENTER || propVals[i] === STAY) { + for (var i = 0; i < propVals.length; i++) + { + if (propVals[i] === ENTER || propVals[i] === STAY) + { emitOpenTag(i); propVals[i] = true; } } // propVals is now all {true,false} again } // end if (propChanged) - var chars = o.chars; - if (o.lines) { + if (o.lines) + { chars--; // exclude newline at end of line, if present } var s = taker.take(chars); assem.append(_escapeHTML(s)); } // end iteration over spans in line - - for(var i=propVals.length-1; i>=0; i--) { - if (propVals[i]) { + for (var i = propVals.length - 1; i >= 0; i--) + { + if (propVals[i]) + { emitCloseTag(i); propVals[i] = false; } } } // end processNextChars - - if (urls) { - urls.forEach(function(urlData) { + if (urls) + { + urls.forEach(function (urlData) + { var startIndex = urlData[0]; var url = urlData[1]; var urlLength = url.length; processNextChars(startIndex - idx); - assem.append('<a href="'+url.replace(/\"/g, '"')+'">'); + assem.append('<a href="' + url.replace(/\"/g, '"') + '">'); processNextChars(urlLength); assem.append('</a>'); }); @@ -218,7 +261,6 @@ function getHTMLFromAtext(pad, atext) { return _processSpaces(assem.toString()); } // end getLineHTML - var pieces = []; // Need to deal with constraints imposed on HTML lists; can @@ -228,79 +270,98 @@ function getHTMLFromAtext(pad, atext) { // so we want to do something reasonable there. We also // want to deal gracefully with blank lines. var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] - for(var i=0;i<textLines.length;i++) { + for (var i = 0; i < textLines.length; i++) + { var line = _analyzeLine(textLines[i], attribLines[i], apool); var lineContent = getLineHTML(line.text, line.aline); - if (line.listLevel || lists.length > 0) { + if (line.listLevel || lists.length > 0) + { // do list stuff var whichList = -1; // index into lists or -1 - if (line.listLevel) { + if (line.listLevel) + { whichList = lists.length; - for(var j=lists.length-1;j>=0;j--) { - if (line.listLevel <= lists[j][0]) { + for (var j = lists.length - 1; j >= 0; j--) + { + if (line.listLevel <= lists[j][0]) + { whichList = j; } } } - if (whichList >= lists.length) { + if (whichList >= lists.length) + { lists.push([line.listLevel, line.listTypeName]); pieces.push('<ul><li>', lineContent || '<br>'); } - else if (whichList == -1) { - if (line.text) { + else if (whichList == -1) + { + if (line.text) + { // non-blank line, end all lists - pieces.push(new Array(lists.length+1).join('</li></ul\n>')); + pieces.push(new Array(lists.length + 1).join('</li></ul\n>')); lists.length = 0; pieces.push(lineContent, '<br>'); } - else { + else + { pieces.push('<br><br>'); } } - else { - while (whichList < lists.length-1) { + else + { + while (whichList < lists.length - 1) + { pieces.push('</li></ul>'); lists.length--; } pieces.push('</li><li>', lineContent || '<br>'); } } - else { + else + { pieces.push(lineContent, '<br>'); } } - pieces.push(new Array(lists.length+1).join('</li></ul>')); + pieces.push(new Array(lists.length + 1).join('</li></ul>')); return pieces.join(''); } -function _analyzeLine(text, aline, apool) { +function _analyzeLine(text, aline, apool) +{ var line = {}; // identify list var lineMarker = 0; line.listLevel = 0; - if (aline) { + if (aline) + { var opIter = Changeset.opIterator(aline); - if (opIter.hasNext()) { + if (opIter.hasNext()) + { var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); - if (listType) { + if (listType) + { lineMarker = 1; listType = /([a-z]+)([12345678])/.exec(listType); - if (listType) { + if (listType) + { line.listTypeName = listType[1]; line.listLevel = Number(listType[2]); } } } } - if (lineMarker) { + if (lineMarker) + { line.text = text.substring(1); line.aline = Changeset.subattribution(aline, 1); } - else { + else + { line.text = text; line.aline = aline; } @@ -308,37 +369,32 @@ function _analyzeLine(text, aline, apool) { return line; } -exports.getPadHTMLDocument = function(padId, revNum, noDocType, callback) { - padManager.getPad(padId, function(err, pad) +exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) +{ + padManager.getPad(padId, function (err, pad) { - if(err) + if (err) { callback(err); return; } - - var head = (noDocType?'':'<!doctype html>\n')+ - '<html lang="en">\n'+ - (noDocType?'': - '<head>\n'+ - '<meta charset="utf-8">\n'+ - '<style> * { font-family: arial, sans-serif;\n'+ - 'font-size: 13px;\n'+ - 'line-height: 17px; }</style>\n' + - '</head>\n')+ - '<body>'; + + var head = (noDocType ? '' : '<!doctype html>\n') + '<html lang="en">\n' + (noDocType ? '' : '<head>\n' + '<meta charset="utf-8">\n' + '<style> * { font-family: arial, sans-serif;\n' + 'font-size: 13px;\n' + 'line-height: 17px; }</style>\n' + '</head>\n') + '<body>'; var foot = '</body>\n</html>\n'; - getPadHTML(pad, revNum, function (err, html) { + getPadHTML(pad, revNum, function (err, html) + { callback(err, head + html + foot); }); }); } -function _escapeHTML(s) { +function _escapeHTML(s) +{ var re = /[&<>]/g; - if (! re.MAP) { + if (!re.MAP) + { // persisted across function calls! re.MAP = { '&': '&', @@ -346,53 +402,78 @@ function _escapeHTML(s) { '>': '>', }; } - return s.replace(re, function(c) { return re.MAP[c]; }); + + s = s.replace(re, function (c) + { + return re.MAP[c]; + }); + + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) + { + return "&#" +c.charCodeAt(0) + ";" + }); } // copied from ACE -function _processSpaces(s) { + + +function _processSpaces(s) +{ var doesWrap = true; - if (s.indexOf("<") < 0 && ! doesWrap) { + if (s.indexOf("<") < 0 && !doesWrap) + { // short-cut return s.replace(/ /g, ' '); } var parts = []; - s.replace(/<[^>]*>?| |[^ <]+/g, function(m) { parts.push(m); }); - if (doesWrap) { + s.replace(/<[^>]*>?| |[^ <]+/g, function (m) + { + parts.push(m); + }); + if (doesWrap) + { var endOfLine = true; var beforeSpace = false; // last space in a run is normal, others are nbsp, // end of line is nbsp - for(var i=parts.length-1;i>=0;i--) { + for (var i = parts.length - 1; i >= 0; i--) + { var p = parts[i]; - if (p == " ") { - if (endOfLine || beforeSpace) - parts[i] = ' '; - endOfLine = false; - beforeSpace = true; + if (p == " ") + { + if (endOfLine || beforeSpace) parts[i] = ' '; + endOfLine = false; + beforeSpace = true; } - else if (p.charAt(0) != "<") { - endOfLine = false; - beforeSpace = false; + else if (p.charAt(0) != "<") + { + endOfLine = false; + beforeSpace = false; } } // beginning of line is nbsp - for(var i=0;i<parts.length;i++) { + for (var i = 0; i < parts.length; i++) + { var p = parts[i]; - if (p == " ") { - parts[i] = ' '; - break; + if (p == " ") + { + parts[i] = ' '; + break; } - else if (p.charAt(0) != "<") { - break; + else if (p.charAt(0) != "<") + { + break; } } } - else { - for(var i=0;i<parts.length;i++) { + else + { + for (var i = 0; i < parts.length; i++) + { var p = parts[i]; - if (p == " ") { - parts[i] = ' '; + if (p == " ") + { + parts[i] = ' '; } } } @@ -403,15 +484,19 @@ function _processSpaces(s) { // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; var _REGEX_SPACE = /\s/; -var _REGEX_URLCHAR = new RegExp('('+/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source+'|'+_REGEX_WORDCHAR.source+')'); -var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source+_REGEX_URLCHAR.source+'*(?![:.,;])'+_REGEX_URLCHAR.source, 'g'); +var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')'); +var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); // returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] -function _findURLs(text) { + + +function _findURLs(text) +{ _REGEX_URL.lastIndex = 0; var urls = null; var execResult; - while ((execResult = _REGEX_URL.exec(text))) { + while ((execResult = _REGEX_URL.exec(text))) + { urls = (urls || []); var startIndex = execResult.index; var url = execResult[0]; |