wgetでYouTube等から動画を落とす

今度は正規表現を使わずに書いてみました。

#!python
#encoding=utf-8

import urllib
import urllib2
import re
import os
import sys
import time

save_dir = r"c:\My Documents"
interval = 3


def get_video_detail(url):
    for host, video in VIDEOS.items():
        if url.find(host) != -1:
            return video.get_detail(url)

def _extract_from_to(str, from_, to_=None, to_end=False):
    start = str.find(from_)
    if start != -1:
        start += len(from_)
        end = None
        if to_:
            end = str.find(to_, start)
            if end == -1 and to_end:
                end = None
        if end != -1:
            return str[start:end]

VIDEOS = {}
class Video:
    def __init__(self, id_from_to, dl_url_from_to, title_from_to=None,
            api_url=None, encoding="utf-8", ext=".flv"):
        self.id_from_to = id_from_to
        self.dl_url_from_to = dl_url_from_to
        self.title_from_to = title_from_to
        self.api_url = api_url
        self.encoding = encoding
        self.ext = ext
    
    def get_detail(self, url):
        id = self._extract_id(url)
        if self.api_url:
            url = self.api_url % id
        content = self._get_content(url)
        params = self._extract_dl_url_params(content)
        dl_url = self._build_dl_url(params)
        title = self._extract_title(content)
        return id, dl_url, title, self.ext

    def _extract_id(self, url):
        from_, to_ = self.id_from_to
        id = _extract_from_to(url.lower(), from_, to_, True)
        if id:
            return id
        else:
            raise ValueError("invalid video url")
    
    def _get_content(self, url):
        try:
            response = urllib2.urlopen(url)
            return response.read()
        except urllib2.URLError:
            raise RuntimeError("unable to download video page")

    def _extract_dl_url_params(self, content):
        params = []
        for from_, to_ in self.dl_url_from_to:
            p = _extract_from_to(content, from_, to_)
            if p:
                params.append(p)
            else:
                raise RuntimeError("unable to extract download url")
        return params
    
    def _build_dl_url(self, params):
        return params[0]

    def _extract_title(self, content):
        if self.title_from_to:
            from_, to_ = self.title_from_to
            title = _extract_from_to(content, from_, to_)
            if title:
                return title.decode(self.encoding, "ignore")


class YouTube(Video):
    def __init__(self):
        Video.__init__(self,
            id_from_to = ("/watch?v=", "&"),
            dl_url_from_to = [("video_id=", "&"), ("&t=", "&")],
            title_from_to = ("<title>YouTube - ", "</title>")
        )
    def _build_dl_url(self, params):
        return "http://www.youtube.com/get_video?video_id=%s&t=%s" %\
               tuple(params)

VIDEOS["youtube.com"] = YouTube()


class Veoh(Video):
    def __init__(self):
        Video.__init__(self,
            id_from_to = ("/videos/", "?"),
            dl_url_from_to = [('fullPreviewHashPath="', '"')],
            api_url = "http://www.veoh.com/rest/video/%s/details",
            title_from_to = ('\ttitle="', '"')
        )
VIDEOS["www.veoh.com"] = Veoh()


class Dailymotion(Video):
    def __init__(self):
        Video.__init__(self,
            id_from_to = ("/video/", None),
            dl_url_from_to = [("&url=", "&")],
            title_from_to = ('<h1 class="nav with_uptitle">', "</h1>")
        )
    def _build_dl_url(self, params):
        return urllib.unquote(params[0])

VIDEOS["dailymotion.com"] = Dailymotion()


class AmebaVision(Video):
    def __init__(self):
        Video.__init__(self,
            id_from_to = ("movie=", None),
            dl_url_from_to = [("<imageUrlLarge>", "</imageUrlLarge>")],
            api_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=%s",
            title_from_to = ("\t<title>", "</title>")
        )
    def _build_dl_url(self, params):
        dl_url = params[0].replace("//vi", "//vm")
        dl_url = dl_url.replace("/jpg/", "/flv/")
        dl_url = dl_url.replace("_4.jpg", ".flv")
        return dl_url

VIDEOS["vision.ameba.jp"] = AmebaVision()


class Yourfilehost(Video):
    def __init__(self):
        Video.__init__(self,
            id_from_to = ("cat=video&file=", None),
            dl_url_from_to = [("&videoembed_id=", "&")]
        )
    def _extract_id(self, url):
        id = Video._extract_id(self, url)
        return os.path.splitext(id)[0]

    def _build_dl_url(self, params):
        return urllib.unquote(params[0])
    
VIDEOS["www.yourfilehost.com"] = Yourfilehost()


invalid_chr_re = re.compile(u'[\/:*?"<>|]')
for url in sys.argv[1:]:
    try:
        id, dl_url, title, ext = get_video_detail(url)
        filename = title or id
        filename = invalid_chr_re.sub(" ", filename)
        filepath = os.path.join(save_dir, filename + ext)
        command = "wget -O '%s' --referer='%s' '%s'" %\
                  (filepath, url, dl_url)
        os.system(command)
        time.sleep(interval)
    except (ValueError, RuntimeError), e:
        print "Error: %s :%s" % (e, url)