diff options
Diffstat (limited to 'misc/openlayers/tools/exampleparser.py')
-rwxr-xr-x | misc/openlayers/tools/exampleparser.py | 251 |
1 files changed, 0 insertions, 251 deletions
diff --git a/misc/openlayers/tools/exampleparser.py b/misc/openlayers/tools/exampleparser.py deleted file mode 100755 index 6ef123a..0000000 --- a/misc/openlayers/tools/exampleparser.py +++ /dev/null @@ -1,251 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -import re -import time -from xml.dom.minidom import Document - -try: - import xml.etree.ElementTree as ElementTree -except ImportError: - try: - import cElementTree as ElementTree - except ImportError: - try: - import elementtree.ElementTree as ElementTree - except ImportError: - import lxml.etree as ElementTree - -missing_deps = False -try: - import json -except ImportError: - try: - import simplejson as json - except ImportError, E: - missing_deps = E - -try: - from BeautifulSoup import BeautifulSoup -except ImportError, E: - missing_deps = E - -feedName = "example-list.xml" -feedPath = "http://openlayers.org/dev/examples/" - -def getListOfExamples(relPath): - """ - returns list of .html filenames within a given path - excludes example-list.html - """ - examples = os.listdir(relPath) - examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"] - return examples - - -def getExampleHtml(path): - """ - returns html of a specific example - """ - print '.', - f = open(path) - html = f.read() - f.close() - return html - - -def extractById(soup, tagId, value=None): - """ - returns full contents of a particular tag id - """ - beautifulTag = soup.find(id=tagId) - if beautifulTag: - if beautifulTag.contents: - value = str(beautifulTag.renderContents()).strip() - value = value.replace('\t','') - value = value.replace('\n','') - return value - -def getRelatedClasses(html): - """ - parses the html, and returns a list of all OpenLayers Classes - used within (ie what parts of OL the javascript uses). - """ - rawstr = r'''(?P<class>OpenLayers\..*?)\(''' - return re.findall(rawstr, html) - -def parseHtml(html,ids): - """ - returns dictionary of items of interest - """ - soup = BeautifulSoup(html) - d = {} - for tagId in ids: - d[tagId] = extractById(soup,tagId) - #classes should eventually be parsed from docs - not automatically created. - classes = getRelatedClasses(html) - d['classes'] = classes - return d - -def getGitInfo(exampleDir, exampleName): - orig = os.getcwd() - os.chdir(exampleDir) - h = os.popen("git log -n 1 --pretty=format:'%an|%ai' " + exampleName) - os.chdir(orig) - log = h.read() - h.close() - d = {} - parts = log.split("|") - d["author"] = parts[0] - # compensate for spaces in git log time - td = parts[1].split(" ") - td.insert(1, "T") - d["date"] = "".join(td) - return d - -def createFeed(examples): - doc = Document() - atomuri = "http://www.w3.org/2005/Atom" - feed = doc.createElementNS(atomuri, "feed") - feed.setAttribute("xmlns", atomuri) - title = doc.createElementNS(atomuri, "title") - title.appendChild(doc.createTextNode("OpenLayers Examples")) - feed.appendChild(title) - link = doc.createElementNS(atomuri, "link") - link.setAttribute("rel", "self") - link.setAttribute("href", feedPath + feedName) - - modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) - id = doc.createElementNS(atomuri, "id") - id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime))) - feed.appendChild(id) - - updated = doc.createElementNS(atomuri, "updated") - updated.appendChild(doc.createTextNode(modtime)) - feed.appendChild(updated) - - examples.sort(key=lambda x:x["modified"]) - for example in sorted(examples, key=lambda x:x["modified"], reverse=True): - entry = doc.createElementNS(atomuri, "entry") - - title = doc.createElementNS(atomuri, "title") - title.appendChild(doc.createTextNode(example["title"] or example["example"])) - entry.appendChild(title) - - tags = doc.createElementNS(atomuri, "tags") - tags.appendChild(doc.createTextNode(example["tags"] or example["example"])) - entry.appendChild(tags) - - link = doc.createElementNS(atomuri, "link") - link.setAttribute("href", "%s%s" % (feedPath, example["example"])) - entry.appendChild(link) - - summary = doc.createElementNS(atomuri, "summary") - summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"])) - entry.appendChild(summary) - - updated = doc.createElementNS(atomuri, "updated") - updated.appendChild(doc.createTextNode(example["modified"])) - entry.appendChild(updated) - - author = doc.createElementNS(atomuri, "author") - name = doc.createElementNS(atomuri, "name") - name.appendChild(doc.createTextNode(example["author"])) - author.appendChild(name) - entry.appendChild(author) - - id = doc.createElementNS(atomuri, "id") - id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"]))) - entry.appendChild(id) - - feed.appendChild(entry) - - doc.appendChild(feed) - return doc - -def wordIndex(examples): - """ - Create an inverted index based on words in title and shortdesc. Keys are - lower cased words. Values are dictionaries with example index keys and - count values. - """ - index = {} - unword = re.compile("\\W+") - keys = ["shortdesc", "title", "tags"] - for i in range(len(examples)): - for key in keys: - text = examples[i][key] - if text: - words = unword.split(text) - for word in words: - if word: - word = word.lower() - if index.has_key(word): - if index[word].has_key(i): - index[word][i] += 1 - else: - index[word][i] = 1 - else: - index[word] = {i: 1} - return index - -if __name__ == "__main__": - - if missing_deps: - print "This script requires json or simplejson and BeautifulSoup. You don't have them. \n(%s)" % E - sys.exit() - - if len(sys.argv) == 3: - inExampleDir = sys.argv[1] - outExampleDir = sys.argv[2] - else: - inExampleDir = "../examples" - outExampleDir = "../examples" - - outFile = open(os.path.join(outExampleDir, "example-list.js"), "w") - - print 'Reading examples from %s and writing out to %s' % (inExampleDir, outFile.name) - - exampleList = [] - docIds = ['title','shortdesc','tags'] - - examples = getListOfExamples(inExampleDir) - - modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) - - for example in examples: - path = os.path.join(inExampleDir, example) - html = getExampleHtml(path) - tagvalues = parseHtml(html,docIds) - tagvalues['example'] = example - # add in author/date info - d = getGitInfo(inExampleDir, example) - tagvalues["author"] = d["author"] or "anonymous" - tagvalues["modified"] = d["date"] or modtime - tagvalues['link'] = example - - exampleList.append(tagvalues) - - print - - exampleList.sort(key=lambda x:x['example'].lower()) - - index = wordIndex(exampleList) - - json = json.dumps({"examples": exampleList, "index": index}) - #give the json a global variable we can use in our js. This should be replaced or made optional. - json = 'var info=' + json - outFile.write(json) - outFile.close() - - outFeedPath = os.path.join(outExampleDir, feedName); - print "writing feed to %s " % outFeedPath - atom = open(outFeedPath, 'w') - doc = createFeed(exampleList) - atom.write(doc.toxml()) - atom.close() - - - print 'complete' - - |