#!/usr/bin/python

import web
import feedparser
import shelve
import hashlib
import time
import re
import sys

urls = (
    '/', 'usage',
    '(.*)', 'test'
)

app = web.application(urls, globals())

class usage:
    def GET(self):
        usage = '''
            Hello, world!
        '''
        return usage

class test:
    def GET(self, path):
        self.resmax = 3
        self.res = 0
        path = path.strip("/").split("/")
        last = path[-1]
        if re.search("\.dat$", last):
            url = "/".join(path[:-3])
            rss = self.get_rss(url, forcedb=True)
            dat = self.search_dat(rss, last)
            return dat
        elif re.search("subject\.txt$", last):
            url = "/".join(path[:-2])
            rss = self.get_rss(url)
            subject = self.conv_subject(rss)
            return subject
        else:
            index = "dat/\nsubject.txt\n"
            return index

    def get_rss(self, url, forcedb=False):
        md5hash = hashlib.md5(url).hexdigest()
        db = shelve.open("feed.shelve")
        try:
            if forcedb or (db["%s_mtime" % md5hash]+60*5 > time.time()):
                rss = db["%s_body" % md5hash]
            else:
                rss = self.write_rss(url, md5hash, db)
        except KeyError:
            rss = self.write_rss(url, md5hash, db)
        db.close()
        return rss

    def write_rss(self, url, md5hash, db):
        rss = feedparser.parse("http://" + url)
        db["%s_mtime" % md5hash] = time.time()
        db["%s_body" % md5hash] = rss
        return rss

    def tags_title(self, entry_tags):
        tags = []
        for tag in entry_tags:
	    tags.append(tag["term"])
        tags = ",".join(tags)
        return tags

    def conv_subject(self, rss):
        subject_txt = []
        res = "1"
        for entry in rss["entries"]:
            try:
                tags = self.tags_title(entry["tags"])
            except KeyError:
                tags = ""
            dat = int(time.mktime(entry["updated_parsed"]))
            title = entry["title"]
            subject = "%s.dat<>[%s] %s (%s)\n" % (dat, tags, title, res)
            subject_txt.append(subject)
        subject_txt = "".join(subject_txt)
        return self.utf8tosjis(subject_txt)

    def search_dat(self, rss, datfile):
        epoch = int(re.sub("\.dat$", "", datfile))
        date = time.localtime(epoch)
        for entry in rss["entries"]:
            if date == entry["updated_parsed"]:
                return self.conv_dat(entry, rss)
        return "not found."

    def conv_dat(self, entry, rss):
        #name = entry["author"]
        name = "Anonymous"
        #mail = entry[""]
        mail = ""
        date = time.strftime("%Y/%m/%d %H:%M:%S (%a)", entry["updated_parsed"])
        id_split = entry["id"].split("/")
        id = "<a href=\"" + entry["link"] + "\">" + id_split[-1] +" </a>"
        #host = "/".join(id_split[:-1])
        if entry["content"]:
            body = self.untag(entry["content"][0]["value"])
        else:
            body = entry["summary"]
        anchors = []
        for i in range(self.resmax):
            (anchor, anchors) = self.search_anchor(body, rss, anchors)
            body = body + "<br>" + anchor
        try:
            tags = self.tags_title(entry["tags"])
        except KeyError:
            tags = ""
        title = entry["title"]
        dat = "%s<>%s<>%s ID:%s<>%s<>[%s] %s\n" % (name, mail, date, id, body, tags, title)
        return self.utf8tosjis(dat)

    def untag(self, html, quote=None):
    	u = re.compile("</*[^>]*/*>")
    	n = re.compile("\n")
    	html = u.sub("", html)
    	html = n.sub("<br>", html)
        if quote is not None:
            html = re.sub("^", quote, html)
            html = re.sub("<br>", "<br>" + quote, html)
        return html

    def search_anchor(self, body, rss, anchors):
        #self.res += 1
        #if self.res > self.resmax:
        #    return ""
        resanchor = body.split("&gt;&gt;")
        body = []
        anchor = []
        for b in resanchor[1:]:
            match = re.search("^([0-9a-z]{8})", b)
            if match:
                if match.group(1) not in anchor:
                    anchor.append(match.group(1))
        for entry in rss["entries"]:
            id = entry["id"].split("/")[-1]
            if id in anchor and id not in anchors:
                anchors.append(id)
                body.append("%s wrote:" % id)
                if entry["content"]:
                    body.append(self.untag(entry["content"][0]["value"], quote="&nbsp;&gt;&nbsp;"))
                else: 
                    body.append("&nbsp;&gt; " + entry["summary"])
        return ("<br><br>" + "<br>".join(body), anchors)

    def utf8tosjis(self, data):
        return unicode(data).encode("Shift_JIS", "replace")

if __name__ == '__main__':
    app.run()