summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorBjarni R. Einarsson <bre@klaki.net>2014-12-01 22:39:42 +0000
committerBjarni R. Einarsson <bre@klaki.net>2014-12-01 22:39:42 +0000
commit9c174023fc16184360ba186c9df1636cdf35f9c9 (patch)
tree8895b59139969550c47d21ab233cd2df0366211f /bin
parentc1fdd7ff79120318af6c6ea2e1fa0a08e17b2896 (diff)
downloadetherpad-lite-9c174023fc16184360ba186c9df1636cdf35f9c9.zip
Added dirty-db-cleaner.py
Diffstat (limited to 'bin')
-rwxr-xr-xbin/dirty-db-cleaner.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/bin/dirty-db-cleaner.py b/bin/dirty-db-cleaner.py
new file mode 100755
index 00000000..8ed9c506
--- /dev/null
+++ b/bin/dirty-db-cleaner.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python -u
+#
+# Created by Bjarni R. Einarsson, placed in the public domain. Go wild!
+#
+import json
+import os
+import sys
+
+try:
+ dirtydb_input = sys.argv[1]
+ dirtydb_output = '%s.new' % dirtydb_input
+ assert(os.path.exists(dirtydb_input))
+ assert(not os.path.exists(dirtydb_output))
+except:
+ print
+ print 'Usage: %s /path/to/dirty.db' % sys.argv[0]
+ print
+ print 'Note: Will create a file named dirty.db.new in the same folder,'
+ print ' please make sure permissions are OK and a file by that'
+ print ' name does not exist already. This script works by omitting'
+ print ' duplicate lines from the dirty.db file, keeping only the'
+ print ' last (latest) instance. No revision data should be lost,'
+ print ' but be careful, make backups. If it breaks you get to keep'
+ print ' both pieces!'
+ print
+ sys.exit(1)
+
+dirtydb = {}
+lines = 0
+with open(dirtydb_input, 'r') as fd:
+ print 'Reading %s' % dirtydb_input
+ for line in fd:
+ lines += 1
+ data = json.loads(line)
+ dirtydb[data['key']] = line
+ if lines % 10000 == 0:
+ sys.stderr.write('.')
+print
+print 'OK, found %d unique keys in %d lines' % (len(dirtydb), lines)
+
+with open(dirtydb_output, 'w') as fd:
+ for data in dirtydb.values():
+ fd.write(data)
+
+print 'Wrote data to %s. All done!' % dirtydb_output