Now -p pwlfile works and output is sorted, also number of occurrences of word shown.

author: Tapio Lehtonen <tale@debian.org> 2009-09-09 18:17:17 +0000
committer: Tapio Lehtonen <tale@debian.org> 2009-09-09 18:17:17 +0000
commit: 0746fed997ccfa0d63178e5ccd731434ec1a62b9 (patch)
tree: fcb16a8e864306fe97d93a1a05d1a14aecad2e81 /po/fi
parent: a5affb8cd17b60a9489d9f48db2a3ebf3eb549f4 (diff)
download: installation-guide-0746fed997ccfa0d63178e5ccd731434ec1a62b9.zip
1 files changed, 39 insertions, 6 deletions
diff --git a/po/fi/make-fi-all.py b/po/fi/make-fi-all.py
index 0a288ec34..89d76e4c7 100755
--- a/po/fi/make-fi-all.py
+++ b/po/fi/make-fi-all.py
@@ -32,7 +32,7 @@ Print out unknown words found in filename. Options
 -p and --wordlist can be given several times."""
     lhelpstr = """What language is the text to proofread? For example fi_FI."""
     parser = OptionParser(usage=usage)
-    parser.set_defaults(verbose=False)
+    parser.set_defaults(verbose=False, listonly=True, wordlists=[])
     parser.add_option("-p", "--wordlist", 
                       action="append", type="string", dest="wordlists",
                       help="File with OK words one per line.",
@@ -40,6 +40,9 @@ Print out unknown words found in filename. Options
     parser.add_option("-d", "--language", 
                       action="store", type="string", dest="language",
                       help=lhelpstr)
+    parser.add_option("-l", "--listonly", 
+                      action="store_true", dest="listonly",
+                      help=lhelpstr)
     parser.add_option("-v", action="store_true", dest="verbose",
                       help="Be verbose.")
     parser.add_option("-q", action="store_false", dest="verbose",
@@ -65,7 +68,11 @@ Print out unknown words found in filename. Options
 
 
 if __name__ == "__main__":
-    
+    """Use like this
+    ./make-fi-all.py -d fi_FI fi_all.po > foo.txt 2>&1
+    Then do 
+    sort foo.txt | uniq | tee unknown_words.txt | wc"""
+
     from enchant.checker import SpellChecker
 
     o, a = handleCommandLine()
@@ -79,18 +86,30 @@ if __name__ == "__main__":
         print "Erroria", value
         sys.exit(4)
 
-    #text = textF.readlines()
     if o.verbose:
         import sys
         print "Language is", o.language
         print "STDOUT encoding ", sys.stdout.encoding
-        print "sys default encoding", sys.getdefaultencoding()
+        print "sys default encoding ", sys.getdefaultencoding()
     checker = SpellChecker(o.language)
     if o.verbose:
         if checker.wants_unicode:
             print "Checker wants Unicode text to check."
         else:
             print "Checker wants normal strings text to check."
+    #Read in Personal word lists, may be several files
+    for pN in o.wordlists:
+        try:
+            pF = open(pN, "r")
+        except IOError, value:
+            print "Error, personal word list could not be opened for reading ", pN
+            sys.exit(5)
+        for word in pF.readlines():
+            if len(word) > 0:
+                if (word[0] != "#"):
+                    checker.add_to_personal(word)
+
+    unknWords={}
     for text in textF.readlines():
         utext = unicode(text, "utf-8")
         if o.verbose:
@@ -102,6 +121,20 @@ if __name__ == "__main__":
             print
         checker.set_text(utext)
         for err in checker:
-            print err.word.encode("utf-8")
-
+            if o.verbose:
+                print "isinstance basestring", isinstance(err.word, basestring)
+                print "isinstance str", isinstance(err.word, str)
+                print "isinstance unicode", isinstance(err.word, unicode)
+                print err.word.encode("utf-8")
+            if unknWords.has_key(err.word):
+                unknWords[err.word] += 1
+            else:
+                unknWords[err.word] = 1
     textF.close()
+    
+    #Sort alphabetically and print out as count word
+    wlist = unknWords.keys()
+    wlist.sort()
+    for w in wlist:
+        print str(unknWords[w]).rjust(8).encode("utf-8"), " ",
+        print w.encode("utf-8")
author	Tapio Lehtonen <tale@debian.org>	2009-09-09 18:17:17 +0000
committer	Tapio Lehtonen <tale@debian.org>	2009-09-09 18:17:17 +0000
commit	0746fed997ccfa0d63178e5ccd731434ec1a62b9 (patch)
tree	fcb16a8e864306fe97d93a1a05d1a14aecad2e81 /po/fi
parent	a5affb8cd17b60a9489d9f48db2a3ebf3eb549f4 (diff)
download	installation-guide-0746fed997ccfa0d63178e5ccd731434ec1a62b9.zip