summaryrefslogtreecommitdiff
path: root/bin/convert.js
blob: 202ef04e213b1de21b4ae1100901039be3dda832 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
var startTime = new Date().getTime();
var fs = require("fs");
var ueberDB = require("../src/node_modules/ueberDB");
var mysql = require("../src/node_modules/ueberDB/node_modules/mysql");
var async = require("../src/node_modules/async");
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
var randomString = require('ep_etherpad-lite/static/js/pad_utils').randomString;
var AttributePool = require("ep_etherpad-lite/static/js/AttributePool");

var settingsFile = process.argv[2];
var sqlOutputFile = process.argv[3];

//stop if the settings file is not set
if(!settingsFile || !sqlOutputFile)
{
  console.error("Use: node convert.js $SETTINGSFILE $SQLOUTPUT");
  process.exit(1);
}

log("read settings file...");
//read the settings file and parse the json
var settings = JSON.parse(fs.readFileSync(settingsFile, "utf8"));
log("done");

log("open output file...");
var sqlOutput = fs.openSync(sqlOutputFile, "w");
var sql = "SET CHARACTER SET UTF8;\n" +
          "CREATE TABLE IF NOT EXISTS `store` ( \n" +
          "`key` VARCHAR( 100 ) NOT NULL , \n" + 
          "`value` LONGTEXT NOT NULL , \n" + 
          "PRIMARY KEY (  `key` ) \n" +
          ") ENGINE = INNODB;\n" + 
          "START TRANSACTION;\n\n";
fs.writeSync(sqlOutput, sql);
log("done");

//set setings for ep db
var etherpadDB= new mysql.Client();
etherpadDB.host = settings.etherpadDB.host; 
etherpadDB.port = settings.etherpadDB.port;
etherpadDB.database = settings.etherpadDB.database; 
etherpadDB.user = settings.etherpadDB.user; 
etherpadDB.password = settings.etherpadDB.password; 

//get the timestamp once
var timestamp = new Date().getTime();

var padIDs;

async.series([
  //get all padids out of the database...
  function(callback)
  {
    log("get all padIds out of the database...");
    
    etherpadDB.query("SELECT ID FROM PAD_META", [], function(err, _padIDs)
    {
      padIDs = _padIDs;
      callback(err);
    });
  },
  function(callback)
  {
    log("done");
    
    //create a queue with a concurrency 100
    var queue = async.queue(function (padId, callback) 
    {
      convertPad(padId, function(err)
      {
        incrementPadStats();
        callback(err);
      });
    }, 100);
    
    //set the step callback as the queue callback
    queue.drain = callback;
    
    //add the padids to the worker queue
    for(var i=0,length=padIDs.length;i<length;i++)
    {
      queue.push(padIDs[i].ID);
    }
  }
], function(err)
{
  if(err) throw err;
  
  //write the groups
  var sql = "";
  for(var proID in proID2groupID)
  {
    var groupID = proID2groupID[proID];
    var subdomain = proID2subdomain[proID];
    
    sql+="REPLACE INTO store VALUES (" + etherpadDB.escape("group:" + groupID) + ", " + etherpadDB.escape(JSON.stringify(groups[groupID]))+ ");\n";
    sql+="REPLACE INTO store VALUES (" + etherpadDB.escape("mapper2group:subdomain:" + subdomain) + ", " + etherpadDB.escape(groupID)+ ");\n";
  }
  
  //close transaction
  sql+="COMMIT;";
  
  //end the sql file
  fs.writeSync(sqlOutput, sql, undefined, "utf-8");
  fs.closeSync(sqlOutput);
  
  log("finished.");
  process.exit(0);
});

function log(str)
{
  console.log((new Date().getTime() - startTime)/1000 + "\t" + str);
}

var padsDone = 0;

function incrementPadStats()
{
  padsDone++;
  
  if(padsDone%100 == 0)
  {
    var averageTime = Math.round(padsDone/((new Date().getTime() - startTime)/1000));
    log(padsDone + "/" + padIDs.length + "\t" + averageTime + " pad/s")
  }
}

var proID2groupID = {};
var proID2subdomain = {};
var groups = {};

function convertPad(padId, callback)
{
  var changesets = [];
  var changesetsMeta = [];
  var chatMessages = [];
  var authors = [];
  var apool;
  var subdomain;
  var padmeta;

  async.series([
    //get all needed db values
    function(callback)
    {
      async.parallel([
        //get the pad revisions
        function(callback)
        {
          var sql = "SELECT * FROM `PAD_CHAT_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_CHAT_META` WHERE ID=?)";
          
          etherpadDB.query(sql, [padId], function(err, results)
          {
            if(!err) 
            {
              try
              {
                //parse the pages
                for(var i=0,length=results.length;i<length;i++)
                {
                  parsePage(chatMessages, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
                }
              }catch(e) {err = e}
            }
            
            callback(err);
          });
        },
        //get the chat entries
        function(callback)
        {
          var sql = "SELECT * FROM `PAD_REVS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVS_META` WHERE ID=?)";
          
          etherpadDB.query(sql, [padId], function(err, results)
          {
            if(!err) 
            {
              try
              {
                //parse the pages
                for(var i=0,length=results.length;i<length;i++)
                {
                  parsePage(changesets, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, false);
                }
              }catch(e) {err = e}
            }
            
            callback(err);
          });
        },
        //get the pad revisions meta data
        function(callback)
        {
          var sql = "SELECT * FROM `PAD_REVMETA_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVMETA_META` WHERE ID=?)";
          
          etherpadDB.query(sql, [padId], function(err, results)
          {
            if(!err) 
            {
              try
              {
                //parse the pages
                for(var i=0,length=results.length;i<length;i++)
                {
                  parsePage(changesetsMeta, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
                }
              }catch(e) {err = e}
            }
            
            callback(err);
          });
        },
        //get the attribute pool of this pad
        function(callback)
        {
          var sql = "SELECT `JSON` FROM `PAD_APOOL` WHERE `ID` = ?";
          
          etherpadDB.query(sql, [padId], function(err, results)
          {
            if(!err)
            {
              try
              {
                apool=JSON.parse(results[0].JSON).x;
              }catch(e) {err = e}
            }
            
            callback(err);
          });
        },
        //get the authors informations
        function(callback)
        {
          var sql = "SELECT * FROM `PAD_AUTHORS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_AUTHORS_META` WHERE ID=?)";
          
          etherpadDB.query(sql, [padId], function(err, results)
          {
            if(!err) 
            {
              try
              {
                //parse the pages
                for(var i=0, length=results.length;i<length;i++)
                {
                  parsePage(authors, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
                }
              }catch(e) {err = e}
            }
            
            callback(err);
          });
        },
        //get the pad information
        function(callback)
        {
          var sql = "SELECT JSON FROM `PAD_META` WHERE ID=?";
          
          etherpadDB.query(sql, [padId], function(err, results)
          {
            if(!err) 
            {
              try
              {
                padmeta = JSON.parse(results[0].JSON).x;
              }catch(e) {err = e}
            }
            
            callback(err);
          });
        },
        //get the subdomain
        function(callback)
        {
          //skip if this is no proPad
          if(padId.indexOf("$") == -1)
          {
            callback();
            return;
          }
          
          //get the proID out of this padID
          var proID = padId.split("$")[0];
        
          var sql = "SELECT subDomain FROM pro_domains WHERE ID = ?";
          
          etherpadDB.query(sql, [proID], function(err, results)
          {
            if(!err)
            {
              subdomain = results[0].subDomain;
            }
            
            callback(err);
          });
        }
      ], callback);
    },
    function(callback)
    {
      //saves all values that should be written to the database
      var values = {};
      
      //this is a pro pad, let's convert it to a group pad
      if(padId.indexOf("$") != -1)
      {
        var padIdParts = padId.split("$");
        var proID = padIdParts[0];
        var padName = padIdParts[1];
        
        var groupID
        
        //this proID is not converted so far, do it
        if(proID2groupID[proID] == null)
        {
          groupID = "g." + randomString(16);
          
          //create the mappers for this new group
          proID2groupID[proID] = groupID;
          proID2subdomain[proID] = subdomain;
          groups[groupID] = {pads: {}};
        }
        
        //use the generated groupID;
        groupID = proID2groupID[proID];
        
        //rename the pad
        padId = groupID + "$" + padName;
        
        //set the value for this pad in the group
        groups[groupID].pads[padId] = 1;
      }
      
      try
      {
        var newAuthorIDs = {};
        var oldName2newName = {};
        
        //replace the authors with generated authors
        // we need to do that cause where the original etherpad saves pad local authors, the new (lite) etherpad uses them global
        for(var i in apool.numToAttrib)
        {
          var key = apool.numToAttrib[i][0];
          var value = apool.numToAttrib[i][1];
          
          //skip non authors and anonymous authors
          if(key != "author" || value == "")
            continue;
            
          //generate new author values
          var authorID = "a." + randomString(16);
          var authorColorID = authors[i].colorId || Math.floor(Math.random()*32);
          var authorName = authors[i].name || null;
          
          //overwrite the authorID of the attribute pool
          apool.numToAttrib[i][1] = authorID;
          
          //write the author to the database
          values["globalAuthor:" + authorID] = {"colorId" : authorColorID, "name": authorName, "timestamp": timestamp};
          
          //save in mappers
          newAuthorIDs[i] = authorID;
          oldName2newName[value] = authorID;
        }
        
        //save all revisions
        for(var i=0;i<changesets.length;i++)
        {
          values["pad:" + padId + ":revs:" + i] = {changeset: changesets[i], 
                                                   meta : {
                                                     author: newAuthorIDs[changesetsMeta[i].a],
                                                     timestamp: changesetsMeta[i].t,
                                                     atext: changesetsMeta[i].atext || undefined
                                                   }};
        }
        
        //save all chat messages
        for(var i=0;i<chatMessages.length;i++)
        {
          values["pad:" + padId + ":chat:" + i] = {"text": chatMessages[i].lineText, 
                                                   "userId": oldName2newName[chatMessages[i].userId], 
                                                   "time": chatMessages[i].time}
        }
        
        //generate the latest atext
        var fullAPool = (new AttributePool()).fromJsonable(apool);
        var keyRev = Math.floor(padmeta.head / padmeta.keyRevInterval) * padmeta.keyRevInterval;
        var atext = changesetsMeta[keyRev].atext;
        var curRev = keyRev;
        while (curRev < padmeta.head) 
        {
          curRev++;
          var changeset = changesets[curRev];
          atext = Changeset.applyToAText(changeset, atext, fullAPool);
        }
        
        values["pad:" + padId] = {atext: atext, 
                                  pool: apool, 
                                  head: padmeta.head, 
                                  chatHead: padmeta.numChatMessages }
      
      }
      catch(e)
      {
        console.error("Error while converting pad " + padId + ", pad skipped");
        console.error(e.stack ? e.stack : JSON.stringify(e));
        callback();
        return;
      }
      
      var sql = "";
      for(var key in values)
      {
        sql+="REPLACE INTO store VALUES (" + etherpadDB.escape(key) + ", " + etherpadDB.escape(JSON.stringify(values[key]))+ ");\n";
      }
      
      fs.writeSync(sqlOutput, sql, undefined, "utf-8");
      callback();
    }
  ], callback);
}

/**
 * This parses a Page like Etherpad uses them in the databases
 * The offsets descripes the length of a unit in the page, the data are
 * all values behind each other
 */
function parsePage(array, pageStart, offsets, data, json)
{
  var start = 0;
  var lengths = offsets.split(",");
  
  for(var i=0;i<lengths.length;i++)
  {
    var unitLength = lengths[i];
    
    //skip empty units
    if(unitLength == "")
      continue;
    
    //parse the number
    unitLength = Number(unitLength);
      
    //cut the unit out of data
    var unit = data.substr(start, unitLength);
    
    //put it into the array
    array[pageStart + i] = json ? JSON.parse(unit) : unit;
    
    //update start
    start+=unitLength;
  }
}