wgetでYouTubeから動画を落とす
wgetでYouTube、Veoh、Dailymotion、AmebaVision、YourfilehostからFLVファイルを落とします。ただし、ログインしないと見れない動画は落とせません。
#!python #encoding=utf-8 import os import sys import re import urllib import urllib2 import time save_dir = r"C:\My Documents" interval = 3 class Video: video_url_re = None api_url_format = None flv_url_re = None title_re = None server_encoding = "utf-8" def __init__(self, video_url): self.id = self._extract_id(video_url) if self.api_url_format: api_url = self.api_url_format % self.id else: api_url = video_url content = self._get_content(api_url) self.flv_url = self._extract_flv_url(content) self.title = self._extract_title(content) def _extract_id(self, video_url): match = self.video_url_re.match(video_url) if match: return match.group(1) else: raise ValueError("invalid video url") def _get_content(self, url): try: response = urllib2.urlopen(url) return response.read() except urllib2.URLError: raise RuntimeError("unable to download video page") def _extract_flv_url(self, content): match = self.flv_url_re.search(content) if match is None: raise RuntimeError("unable to extract flv url") return self._build_flv_url(match) def _build_flv_url(self, match): return match.group(1) def _extract_title(self, content): if self.title_re: match = self.title_re.search(content) if match: return match.group(1).decode(self.server_encoding, "ignore") class YouTube(Video): video_url_re = re.compile(r"http://(?:\w+\.)?youtube\.com/watch\?v=([\w-]+)") flv_url_re = re.compile(r"watch_fullscreen\?.*?video_id=([^&]+)&.*?t=([^&]+)&") title_re = re.compile(r"<title>YouTube - ([^<>]*)</title>") def _build_flv_url(self, match): return "http://www.youtube.com/get_video?video_id=%s&t=%s" % match.group(1,2) class Veoh(Video): video_url_re = re.compile(r"http://www\.veoh\.com/videos/(\w+)") api_url_format = "http://www.veoh.com/rest/video/%s/details" flv_url_re = re.compile(r'fullPreviewHashPath="([^"]+)"') title_re = re.compile(r'title="([^"]*)"\s+dateAdded=') class Dailymotion(Video): video_url_re = re.compile(r"http://www.dailymotion\.com/.*?/video/([\w/-]+)") flv_url_re = re.compile("http%3A%2F%2F\w+\.dailymotion\.com%2Fget%2F\d{2}%2F320x240%2Fflv%2F\d+\.flv%3Fkey%3D[a-z0-9]+") title_re = re.compile(r'<h1 class="nav with_uptitle">([^<>]*)</h1>') def _build_flv_url(self, match): return urllib.unquote(match.group(0)) class AmevaVision(Video): video_url_re = re.compile(r"http://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)") api_url_format = "http://vision.ameba.jp/api/get/detailMovie.do?movie=%s" flv_url_re = re.compile(r"<imageUrlLarge>([^<>]+)</imageUrlLarge>") title_re = re.compile(r"<item>\s*<title>([^<>]*)</title>") def _build_flv_url(self, match): flv_url = match.group(1).replace("//vi", "//vm") flv_url = flv_url.replace("/jpg/", "/flv/") flv_url = flv_url.replace("_4.jpg", ".flv") return flv_url class Yourfilehost(Video): video_url_re = re.compile(r"http://(?:www\.)?yourfilehost\.com/media\.php\?cat=video&file=([\w.-]+)\.") flv_url_re =re.compile(r"videoembed_id=([\w%.-]+)&") def _build_flv_url(self, match): return urllib.unquote(match.group(1)) klass = [YouTube, Veoh, Dailymotion, AmevaVision, Yourfilehost] def get_video(url): for k in klass: try: return k(url) except ValueError: if k == klass[-1]: raise invalid_chr_re = re.compile(u'[\/:*?"<>|]') for url in sys.argv[1:]: try: video = get_video(url) filename = (video.title or video.id) + ".flv" filename = invalid_chr_re.sub(" ", filename) filepath = os.path.join(save_dir, filename) command = "wget -O '%s' --referer='%s' '%s'" % (filepath, url, video.flv_url) os.system(command) time.sleep(interval) except (ValueError, RuntimeError), e: print "Error: %s [%s]" % (e, url)