wgetでYouTube等から動画を落とす
今度は正規表現を使わずに書いてみました。
#!python #encoding=utf-8 import urllib import urllib2 import re import os import sys import time save_dir = r"c:\My Documents" interval = 3 def get_video_detail(url): for host, video in VIDEOS.items(): if url.find(host) != -1: return video.get_detail(url) def _extract_from_to(str, from_, to_=None, to_end=False): start = str.find(from_) if start != -1: start += len(from_) end = None if to_: end = str.find(to_, start) if end == -1 and to_end: end = None if end != -1: return str[start:end] VIDEOS = {} class Video: def __init__(self, id_from_to, dl_url_from_to, title_from_to=None, api_url=None, encoding="utf-8", ext=".flv"): self.id_from_to = id_from_to self.dl_url_from_to = dl_url_from_to self.title_from_to = title_from_to self.api_url = api_url self.encoding = encoding self.ext = ext def get_detail(self, url): id = self._extract_id(url) if self.api_url: url = self.api_url % id content = self._get_content(url) params = self._extract_dl_url_params(content) dl_url = self._build_dl_url(params) title = self._extract_title(content) return id, dl_url, title, self.ext def _extract_id(self, url): from_, to_ = self.id_from_to id = _extract_from_to(url.lower(), from_, to_, True) if id: return id else: raise ValueError("invalid video url") def _get_content(self, url): try: response = urllib2.urlopen(url) return response.read() except urllib2.URLError: raise RuntimeError("unable to download video page") def _extract_dl_url_params(self, content): params = [] for from_, to_ in self.dl_url_from_to: p = _extract_from_to(content, from_, to_) if p: params.append(p) else: raise RuntimeError("unable to extract download url") return params def _build_dl_url(self, params): return params[0] def _extract_title(self, content): if self.title_from_to: from_, to_ = self.title_from_to title = _extract_from_to(content, from_, to_) if title: return title.decode(self.encoding, "ignore") class YouTube(Video): def __init__(self): Video.__init__(self, id_from_to = ("/watch?v=", "&"), dl_url_from_to = [("video_id=", "&"), ("&t=", "&")], title_from_to = ("<title>YouTube - ", "</title>") ) def _build_dl_url(self, params): return "http://www.youtube.com/get_video?video_id=%s&t=%s" %\ tuple(params) VIDEOS["youtube.com"] = YouTube() class Veoh(Video): def __init__(self): Video.__init__(self, id_from_to = ("/videos/", "?"), dl_url_from_to = [('fullPreviewHashPath="', '"')], api_url = "http://www.veoh.com/rest/video/%s/details", title_from_to = ('\ttitle="', '"') ) VIDEOS["www.veoh.com"] = Veoh() class Dailymotion(Video): def __init__(self): Video.__init__(self, id_from_to = ("/video/", None), dl_url_from_to = [("&url=", "&")], title_from_to = ('<h1 class="nav with_uptitle">', "</h1>") ) def _build_dl_url(self, params): return urllib.unquote(params[0]) VIDEOS["dailymotion.com"] = Dailymotion() class AmebaVision(Video): def __init__(self): Video.__init__(self, id_from_to = ("movie=", None), dl_url_from_to = [("<imageUrlLarge>", "</imageUrlLarge>")], api_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=%s", title_from_to = ("\t<title>", "</title>") ) def _build_dl_url(self, params): dl_url = params[0].replace("//vi", "//vm") dl_url = dl_url.replace("/jpg/", "/flv/") dl_url = dl_url.replace("_4.jpg", ".flv") return dl_url VIDEOS["vision.ameba.jp"] = AmebaVision() class Yourfilehost(Video): def __init__(self): Video.__init__(self, id_from_to = ("cat=video&file=", None), dl_url_from_to = [("&videoembed_id=", "&")] ) def _extract_id(self, url): id = Video._extract_id(self, url) return os.path.splitext(id)[0] def _build_dl_url(self, params): return urllib.unquote(params[0]) VIDEOS["www.yourfilehost.com"] = Yourfilehost() invalid_chr_re = re.compile(u'[\/:*?"<>|]') for url in sys.argv[1:]: try: id, dl_url, title, ext = get_video_detail(url) filename = title or id filename = invalid_chr_re.sub(" ", filename) filepath = os.path.join(save_dir, filename + ext) command = "wget -O '%s' --referer='%s' '%s'" %\ (filepath, url, dl_url) os.system(command) time.sleep(interval) except (ValueError, RuntimeError), e: print "Error: %s :%s" % (e, url)