#!/usr/bin/env python import sys import os import re import time from xml.dom.minidom import Document try: import xml.etree.ElementTree as ElementTree except ImportError: try: import cElementTree as ElementTree except ImportError: try: import elementtree.ElementTree as ElementTree except ImportError: import lxml.etree as ElementTree missing_deps = False try: import json except ImportError: try: import simplejson as json except ImportError, E: missing_deps = E try: from BeautifulSoup import BeautifulSoup except ImportError, E: missing_deps = E feedName = "example-list.xml" feedPath = "http://openlayers.org/dev/examples/" def getListOfExamples(relPath): """ returns list of .html filenames within a given path - excludes example-list.html """ examples = os.listdir(relPath) examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"] return examples def getExampleHtml(path): """ returns html of a specific example """ print '.', f = open(path) html = f.read() f.close() return html def extractById(soup, tagId, value=None): """ returns full contents of a particular tag id """ beautifulTag = soup.find(id=tagId) if beautifulTag: if beautifulTag.contents: value = str(beautifulTag.renderContents()).strip() value = value.replace('\t','') value = value.replace('\n','') return value def getRelatedClasses(html): """ parses the html, and returns a list of all OpenLayers Classes used within (ie what parts of OL the javascript uses). """ rawstr = r'''(?POpenLayers\..*?)\(''' return re.findall(rawstr, html) def parseHtml(html,ids): """ returns dictionary of items of interest """ soup = BeautifulSoup(html) d = {} for tagId in ids: d[tagId] = extractById(soup,tagId) #classes should eventually be parsed from docs - not automatically created. classes = getRelatedClasses(html) d['classes'] = classes return d def getGitInfo(exampleDir, exampleName): orig = os.getcwd() os.chdir(exampleDir) h = os.popen("git log -n 1 --pretty=format:'%an|%ai' " + exampleName) os.chdir(orig) log = h.read() h.close() d = {} parts = log.split("|") d["author"] = parts[0] # compensate for spaces in git log time td = parts[1].split(" ") td.insert(1, "T") d["date"] = "".join(td) return d def createFeed(examples): doc = Document() atomuri = "http://www.w3.org/2005/Atom" feed = doc.createElementNS(atomuri, "feed") feed.setAttribute("xmlns", atomuri) title = doc.createElementNS(atomuri, "title") title.appendChild(doc.createTextNode("OpenLayers Examples")) feed.appendChild(title) link = doc.createElementNS(atomuri, "link") link.setAttribute("rel", "self") link.setAttribute("href", feedPath + feedName) modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) id = doc.createElementNS(atomuri, "id") id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime))) feed.appendChild(id) updated = doc.createElementNS(atomuri, "updated") updated.appendChild(doc.createTextNode(modtime)) feed.appendChild(updated) examples.sort(key=lambda x:x["modified"]) for example in sorted(examples, key=lambda x:x["modified"], reverse=True): entry = doc.createElementNS(atomuri, "entry") title = doc.createElementNS(atomuri, "title") title.appendChild(doc.createTextNode(example["title"] or example["example"])) entry.appendChild(title) tags = doc.createElementNS(atomuri, "tags") tags.appendChild(doc.createTextNode(example["tags"] or example["example"])) entry.appendChild(tags) link = doc.createElementNS(atomuri, "link") link.setAttribute("href", "%s%s" % (feedPath, example["example"])) entry.appendChild(link) summary = doc.createElementNS(atomuri, "summary") summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"])) entry.appendChild(summary) updated = doc.createElementNS(atomuri, "updated") updated.appendChild(doc.createTextNode(example["modified"])) entry.appendChild(updated) author = doc.createElementNS(atomuri, "author") name = doc.createElementNS(atomuri, "name") name.appendChild(doc.createTextNode(example["author"])) author.appendChild(name) entry.appendChild(author) id = doc.createElementNS(atomuri, "id") id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"]))) entry.appendChild(id) feed.appendChild(entry) doc.appendChild(feed) return doc def wordIndex(examples): """ Create an inverted index based on words in title and shortdesc. Keys are lower cased words. Values are dictionaries with example index keys and count values. """ index = {} unword = re.compile("\\W+") keys = ["shortdesc", "title", "tags"] for i in range(len(examples)): for key in keys: text = examples[i][key] if text: words = unword.split(text) for word in words: if word: word = word.lower() if index.has_key(word): if index[word].has_key(i): index[word][i] += 1 else: index[word][i] = 1 else: index[word] = {i: 1} return index if __name__ == "__main__": if missing_deps: print "This script requires json or simplejson and BeautifulSoup. You don't have them. \n(%s)" % E sys.exit() if len(sys.argv) == 3: inExampleDir = sys.argv[1] outExampleDir = sys.argv[2] else: inExampleDir = "../examples" outExampleDir = "../examples" outFile = open(os.path.join(outExampleDir, "example-list.js"), "w") print 'Reading examples from %s and writing out to %s' % (inExampleDir, outFile.name) exampleList = [] docIds = ['title','shortdesc','tags'] examples = getListOfExamples(inExampleDir) modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) for example in examples: path = os.path.join(inExampleDir, example) html = getExampleHtml(path) tagvalues = parseHtml(html,docIds) tagvalues['example'] = example # add in author/date info d = getGitInfo(inExampleDir, example) tagvalues["author"] = d["author"] or "anonymous" tagvalues["modified"] = d["date"] or modtime tagvalues['link'] = example exampleList.append(tagvalues) print exampleList.sort(key=lambda x:x['example'].lower()) index = wordIndex(exampleList) json = json.dumps({"examples": exampleList, "index": index}) #give the json a global variable we can use in our js. This should be replaced or made optional. json = 'var info=' + json outFile.write(json) outFile.close() outFeedPath = os.path.join(outExampleDir, feedName); print "writing feed to %s " % outFeedPath atom = open(outFeedPath, 'w') doc = createFeed(exampleList) atom.write(doc.toxml()) atom.close() print 'complete'