diff options
author | Bjarni R. Einarsson <bre@klaki.net> | 2014-12-01 22:39:42 +0000 |
---|---|---|
committer | Bjarni R. Einarsson <bre@klaki.net> | 2014-12-01 22:39:42 +0000 |
commit | 9c174023fc16184360ba186c9df1636cdf35f9c9 (patch) | |
tree | 8895b59139969550c47d21ab233cd2df0366211f | |
parent | c1fdd7ff79120318af6c6ea2e1fa0a08e17b2896 (diff) | |
download | etherpad-lite-9c174023fc16184360ba186c9df1636cdf35f9c9.zip |
Added dirty-db-cleaner.py
-rwxr-xr-x | bin/dirty-db-cleaner.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/bin/dirty-db-cleaner.py b/bin/dirty-db-cleaner.py new file mode 100755 index 00000000..8ed9c506 --- /dev/null +++ b/bin/dirty-db-cleaner.py @@ -0,0 +1,45 @@ +#!/usr/bin/python -u +# +# Created by Bjarni R. Einarsson, placed in the public domain. Go wild! +# +import json +import os +import sys + +try: + dirtydb_input = sys.argv[1] + dirtydb_output = '%s.new' % dirtydb_input + assert(os.path.exists(dirtydb_input)) + assert(not os.path.exists(dirtydb_output)) +except: + print + print 'Usage: %s /path/to/dirty.db' % sys.argv[0] + print + print 'Note: Will create a file named dirty.db.new in the same folder,' + print ' please make sure permissions are OK and a file by that' + print ' name does not exist already. This script works by omitting' + print ' duplicate lines from the dirty.db file, keeping only the' + print ' last (latest) instance. No revision data should be lost,' + print ' but be careful, make backups. If it breaks you get to keep' + print ' both pieces!' + print + sys.exit(1) + +dirtydb = {} +lines = 0 +with open(dirtydb_input, 'r') as fd: + print 'Reading %s' % dirtydb_input + for line in fd: + lines += 1 + data = json.loads(line) + dirtydb[data['key']] = line + if lines % 10000 == 0: + sys.stderr.write('.') +print +print 'OK, found %d unique keys in %d lines' % (len(dirtydb), lines) + +with open(dirtydb_output, 'w') as fd: + for data in dirtydb.values(): + fd.write(data) + +print 'Wrote data to %s. All done!' % dirtydb_output |