diff options
author | John McLear <john@mclear.co.uk> | 2014-12-23 15:50:59 +0000 |
---|---|---|
committer | John McLear <john@mclear.co.uk> | 2014-12-23 15:50:59 +0000 |
commit | 1ab7dfdb7b9ab3f8126819c73dfda6e4cf80cce8 (patch) | |
tree | cf28fce9bc0d5857d647f07127849c4a5db45ff8 /bin | |
parent | b71fb3ad20a6d8225a2cb0fe3bac4d5dc9697df0 (diff) | |
parent | 9c174023fc16184360ba186c9df1636cdf35f9c9 (diff) | |
download | etherpad-lite-1ab7dfdb7b9ab3f8126819c73dfda6e4cf80cce8.zip |
Merge pull request #2358 from BjarniRunar/develop
Created dirty-db-cleaner.py - prunes old history from dirty.db files
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/dirty-db-cleaner.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/bin/dirty-db-cleaner.py b/bin/dirty-db-cleaner.py new file mode 100755 index 00000000..8ed9c506 --- /dev/null +++ b/bin/dirty-db-cleaner.py @@ -0,0 +1,45 @@ +#!/usr/bin/python -u +# +# Created by Bjarni R. Einarsson, placed in the public domain. Go wild! +# +import json +import os +import sys + +try: + dirtydb_input = sys.argv[1] + dirtydb_output = '%s.new' % dirtydb_input + assert(os.path.exists(dirtydb_input)) + assert(not os.path.exists(dirtydb_output)) +except: + print + print 'Usage: %s /path/to/dirty.db' % sys.argv[0] + print + print 'Note: Will create a file named dirty.db.new in the same folder,' + print ' please make sure permissions are OK and a file by that' + print ' name does not exist already. This script works by omitting' + print ' duplicate lines from the dirty.db file, keeping only the' + print ' last (latest) instance. No revision data should be lost,' + print ' but be careful, make backups. If it breaks you get to keep' + print ' both pieces!' + print + sys.exit(1) + +dirtydb = {} +lines = 0 +with open(dirtydb_input, 'r') as fd: + print 'Reading %s' % dirtydb_input + for line in fd: + lines += 1 + data = json.loads(line) + dirtydb[data['key']] = line + if lines % 10000 == 0: + sys.stderr.write('.') +print +print 'OK, found %d unique keys in %d lines' % (len(dirtydb), lines) + +with open(dirtydb_output, 'w') as fd: + for data in dirtydb.values(): + fd.write(data) + +print 'Wrote data to %s. All done!' % dirtydb_output |