summaryrefslogtreecommitdiff
path: root/misc/openlayers/tools/exampleparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'misc/openlayers/tools/exampleparser.py')
-rwxr-xr-xmisc/openlayers/tools/exampleparser.py251
1 files changed, 251 insertions, 0 deletions
diff --git a/misc/openlayers/tools/exampleparser.py b/misc/openlayers/tools/exampleparser.py
new file mode 100755
index 0000000..6ef123a
--- /dev/null
+++ b/misc/openlayers/tools/exampleparser.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import re
+import time
+from xml.dom.minidom import Document
+
+try:
+ import xml.etree.ElementTree as ElementTree
+except ImportError:
+ try:
+ import cElementTree as ElementTree
+ except ImportError:
+ try:
+ import elementtree.ElementTree as ElementTree
+ except ImportError:
+ import lxml.etree as ElementTree
+
+missing_deps = False
+try:
+ import json
+except ImportError:
+ try:
+ import simplejson as json
+ except ImportError, E:
+ missing_deps = E
+
+try:
+ from BeautifulSoup import BeautifulSoup
+except ImportError, E:
+ missing_deps = E
+
+feedName = "example-list.xml"
+feedPath = "http://openlayers.org/dev/examples/"
+
+def getListOfExamples(relPath):
+ """
+ returns list of .html filenames within a given path - excludes example-list.html
+ """
+ examples = os.listdir(relPath)
+ examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"]
+ return examples
+
+
+def getExampleHtml(path):
+ """
+ returns html of a specific example
+ """
+ print '.',
+ f = open(path)
+ html = f.read()
+ f.close()
+ return html
+
+
+def extractById(soup, tagId, value=None):
+ """
+ returns full contents of a particular tag id
+ """
+ beautifulTag = soup.find(id=tagId)
+ if beautifulTag:
+ if beautifulTag.contents:
+ value = str(beautifulTag.renderContents()).strip()
+ value = value.replace('\t','')
+ value = value.replace('\n','')
+ return value
+
+def getRelatedClasses(html):
+ """
+ parses the html, and returns a list of all OpenLayers Classes
+ used within (ie what parts of OL the javascript uses).
+ """
+ rawstr = r'''(?P<class>OpenLayers\..*?)\('''
+ return re.findall(rawstr, html)
+
+def parseHtml(html,ids):
+ """
+ returns dictionary of items of interest
+ """
+ soup = BeautifulSoup(html)
+ d = {}
+ for tagId in ids:
+ d[tagId] = extractById(soup,tagId)
+ #classes should eventually be parsed from docs - not automatically created.
+ classes = getRelatedClasses(html)
+ d['classes'] = classes
+ return d
+
+def getGitInfo(exampleDir, exampleName):
+ orig = os.getcwd()
+ os.chdir(exampleDir)
+ h = os.popen("git log -n 1 --pretty=format:'%an|%ai' " + exampleName)
+ os.chdir(orig)
+ log = h.read()
+ h.close()
+ d = {}
+ parts = log.split("|")
+ d["author"] = parts[0]
+ # compensate for spaces in git log time
+ td = parts[1].split(" ")
+ td.insert(1, "T")
+ d["date"] = "".join(td)
+ return d
+
+def createFeed(examples):
+ doc = Document()
+ atomuri = "http://www.w3.org/2005/Atom"
+ feed = doc.createElementNS(atomuri, "feed")
+ feed.setAttribute("xmlns", atomuri)
+ title = doc.createElementNS(atomuri, "title")
+ title.appendChild(doc.createTextNode("OpenLayers Examples"))
+ feed.appendChild(title)
+ link = doc.createElementNS(atomuri, "link")
+ link.setAttribute("rel", "self")
+ link.setAttribute("href", feedPath + feedName)
+
+ modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
+ id = doc.createElementNS(atomuri, "id")
+ id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime)))
+ feed.appendChild(id)
+
+ updated = doc.createElementNS(atomuri, "updated")
+ updated.appendChild(doc.createTextNode(modtime))
+ feed.appendChild(updated)
+
+ examples.sort(key=lambda x:x["modified"])
+ for example in sorted(examples, key=lambda x:x["modified"], reverse=True):
+ entry = doc.createElementNS(atomuri, "entry")
+
+ title = doc.createElementNS(atomuri, "title")
+ title.appendChild(doc.createTextNode(example["title"] or example["example"]))
+ entry.appendChild(title)
+
+ tags = doc.createElementNS(atomuri, "tags")
+ tags.appendChild(doc.createTextNode(example["tags"] or example["example"]))
+ entry.appendChild(tags)
+
+ link = doc.createElementNS(atomuri, "link")
+ link.setAttribute("href", "%s%s" % (feedPath, example["example"]))
+ entry.appendChild(link)
+
+ summary = doc.createElementNS(atomuri, "summary")
+ summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"]))
+ entry.appendChild(summary)
+
+ updated = doc.createElementNS(atomuri, "updated")
+ updated.appendChild(doc.createTextNode(example["modified"]))
+ entry.appendChild(updated)
+
+ author = doc.createElementNS(atomuri, "author")
+ name = doc.createElementNS(atomuri, "name")
+ name.appendChild(doc.createTextNode(example["author"]))
+ author.appendChild(name)
+ entry.appendChild(author)
+
+ id = doc.createElementNS(atomuri, "id")
+ id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"])))
+ entry.appendChild(id)
+
+ feed.appendChild(entry)
+
+ doc.appendChild(feed)
+ return doc
+
+def wordIndex(examples):
+ """
+ Create an inverted index based on words in title and shortdesc. Keys are
+ lower cased words. Values are dictionaries with example index keys and
+ count values.
+ """
+ index = {}
+ unword = re.compile("\\W+")
+ keys = ["shortdesc", "title", "tags"]
+ for i in range(len(examples)):
+ for key in keys:
+ text = examples[i][key]
+ if text:
+ words = unword.split(text)
+ for word in words:
+ if word:
+ word = word.lower()
+ if index.has_key(word):
+ if index[word].has_key(i):
+ index[word][i] += 1
+ else:
+ index[word][i] = 1
+ else:
+ index[word] = {i: 1}
+ return index
+
+if __name__ == "__main__":
+
+ if missing_deps:
+ print "This script requires json or simplejson and BeautifulSoup. You don't have them. \n(%s)" % E
+ sys.exit()
+
+ if len(sys.argv) == 3:
+ inExampleDir = sys.argv[1]
+ outExampleDir = sys.argv[2]
+ else:
+ inExampleDir = "../examples"
+ outExampleDir = "../examples"
+
+ outFile = open(os.path.join(outExampleDir, "example-list.js"), "w")
+
+ print 'Reading examples from %s and writing out to %s' % (inExampleDir, outFile.name)
+
+ exampleList = []
+ docIds = ['title','shortdesc','tags']
+
+ examples = getListOfExamples(inExampleDir)
+
+ modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
+
+ for example in examples:
+ path = os.path.join(inExampleDir, example)
+ html = getExampleHtml(path)
+ tagvalues = parseHtml(html,docIds)
+ tagvalues['example'] = example
+ # add in author/date info
+ d = getGitInfo(inExampleDir, example)
+ tagvalues["author"] = d["author"] or "anonymous"
+ tagvalues["modified"] = d["date"] or modtime
+ tagvalues['link'] = example
+
+ exampleList.append(tagvalues)
+
+ print
+
+ exampleList.sort(key=lambda x:x['example'].lower())
+
+ index = wordIndex(exampleList)
+
+ json = json.dumps({"examples": exampleList, "index": index})
+ #give the json a global variable we can use in our js. This should be replaced or made optional.
+ json = 'var info=' + json
+ outFile.write(json)
+ outFile.close()
+
+ outFeedPath = os.path.join(outExampleDir, feedName);
+ print "writing feed to %s " % outFeedPath
+ atom = open(outFeedPath, 'w')
+ doc = createFeed(exampleList)
+ atom.write(doc.toxml())
+ atom.close()
+
+
+ print 'complete'
+
+