summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorJohn McLear <john@mclear.co.uk>2014-12-23 15:50:59 +0000
committerJohn McLear <john@mclear.co.uk>2014-12-23 15:50:59 +0000
commit1ab7dfdb7b9ab3f8126819c73dfda6e4cf80cce8 (patch)
treecf28fce9bc0d5857d647f07127849c4a5db45ff8 /bin
parentb71fb3ad20a6d8225a2cb0fe3bac4d5dc9697df0 (diff)
parent9c174023fc16184360ba186c9df1636cdf35f9c9 (diff)
downloadetherpad-lite-1ab7dfdb7b9ab3f8126819c73dfda6e4cf80cce8.zip
Merge pull request #2358 from BjarniRunar/develop
Created dirty-db-cleaner.py - prunes old history from dirty.db files
Diffstat (limited to 'bin')
-rwxr-xr-xbin/dirty-db-cleaner.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/bin/dirty-db-cleaner.py b/bin/dirty-db-cleaner.py
new file mode 100755
index 00000000..8ed9c506
--- /dev/null
+++ b/bin/dirty-db-cleaner.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python -u
+#
+# Created by Bjarni R. Einarsson, placed in the public domain. Go wild!
+#
+import json
+import os
+import sys
+
+try:
+ dirtydb_input = sys.argv[1]
+ dirtydb_output = '%s.new' % dirtydb_input
+ assert(os.path.exists(dirtydb_input))
+ assert(not os.path.exists(dirtydb_output))
+except:
+ print
+ print 'Usage: %s /path/to/dirty.db' % sys.argv[0]
+ print
+ print 'Note: Will create a file named dirty.db.new in the same folder,'
+ print ' please make sure permissions are OK and a file by that'
+ print ' name does not exist already. This script works by omitting'
+ print ' duplicate lines from the dirty.db file, keeping only the'
+ print ' last (latest) instance. No revision data should be lost,'
+ print ' but be careful, make backups. If it breaks you get to keep'
+ print ' both pieces!'
+ print
+ sys.exit(1)
+
+dirtydb = {}
+lines = 0
+with open(dirtydb_input, 'r') as fd:
+ print 'Reading %s' % dirtydb_input
+ for line in fd:
+ lines += 1
+ data = json.loads(line)
+ dirtydb[data['key']] = line
+ if lines % 10000 == 0:
+ sys.stderr.write('.')
+print
+print 'OK, found %d unique keys in %d lines' % (len(dirtydb), lines)
+
+with open(dirtydb_output, 'w') as fd:
+ for data in dirtydb.values():
+ fd.write(data)
+
+print 'Wrote data to %s. All done!' % dirtydb_output