#!/usr/bin/python import web import feedparser import shelve import hashlib import time import re import sys urls = ( '/', 'usage', '(.*)', 'test' ) app = web.application(urls, globals()) class usage: def GET(self): usage = ''' Hello, world! ''' return usage class test: def GET(self, path): self.resmax = 3 self.res = 0 path = path.strip("/").split("/") last = path[-1] if re.search("\.dat$", last): url = "/".join(path[:-3]) rss = self.get_rss(url, forcedb=True) dat = self.search_dat(rss, last) return dat elif re.search("subject\.txt$", last): url = "/".join(path[:-2]) rss = self.get_rss(url) subject = self.conv_subject(rss) return subject else: index = "dat/\nsubject.txt\n" return index def get_rss(self, url, forcedb=False): md5hash = hashlib.md5(url).hexdigest() db = shelve.open("feed.shelve") try: if forcedb or (db["%s_mtime" % md5hash]+60*5 > time.time()): rss = db["%s_body" % md5hash] else: rss = self.write_rss(url, md5hash, db) except KeyError: rss = self.write_rss(url, md5hash, db) db.close() return rss def write_rss(self, url, md5hash, db): rss = feedparser.parse("http://" + url) db["%s_mtime" % md5hash] = time.time() db["%s_body" % md5hash] = rss return rss def tags_title(self, entry_tags): tags = [] for tag in entry_tags: tags.append(tag["term"]) tags = ",".join(tags) return tags def conv_subject(self, rss): subject_txt = [] res = "1" for entry in rss["entries"]: try: tags = self.tags_title(entry["tags"]) except KeyError: tags = "" dat = int(time.mktime(entry["updated_parsed"])) title = entry["title"] subject = "%s.dat<>[%s] %s (%s)\n" % (dat, tags, title, res) subject_txt.append(subject) subject_txt = "".join(subject_txt) return self.utf8tosjis(subject_txt) def search_dat(self, rss, datfile): epoch = int(re.sub("\.dat$", "", datfile)) date = time.localtime(epoch) for entry in rss["entries"]: if date == entry["updated_parsed"]: return self.conv_dat(entry, rss) return "not found." def conv_dat(self, entry, rss): #name = entry["author"] name = "Anonymous" #mail = entry[""] mail = "" date = time.strftime("%Y/%m/%d %H:%M:%S (%a)", entry["updated_parsed"]) id_split = entry["id"].split("/") id = "" + id_split[-1] +" " #host = "/".join(id_split[:-1]) if entry["content"]: body = self.untag(entry["content"][0]["value"]) else: body = entry["summary"] anchors = [] for i in range(self.resmax): (anchor, anchors) = self.search_anchor(body, rss, anchors) body = body + "
" + anchor try: tags = self.tags_title(entry["tags"]) except KeyError: tags = "" title = entry["title"] dat = "%s<>%s<>%s ID:%s<>%s<>[%s] %s\n" % (name, mail, date, id, body, tags, title) return self.utf8tosjis(dat) def untag(self, html, quote=None): u = re.compile("]*/*>") n = re.compile("\n") html = u.sub("", html) html = n.sub("
", html) if quote is not None: html = re.sub("^", quote, html) html = re.sub("
", "
" + quote, html) return html def search_anchor(self, body, rss, anchors): #self.res += 1 #if self.res > self.resmax: # return "" resanchor = body.split(">>") body = [] anchor = [] for b in resanchor[1:]: match = re.search("^([0-9a-z]{8})", b) if match: if match.group(1) not in anchor: anchor.append(match.group(1)) for entry in rss["entries"]: id = entry["id"].split("/")[-1] if id in anchor and id not in anchors: anchors.append(id) body.append("%s wrote:" % id) if entry["content"]: body.append(self.untag(entry["content"][0]["value"], quote=" > ")) else: body.append(" > " + entry["summary"]) return ("

" + "
".join(body), anchors) def utf8tosjis(self, data): return unicode(data).encode("Shift_JIS", "replace") if __name__ == '__main__': app.run()