wgetでYouTubeから動画を落とす

wgetYouTube、Veoh、Dailymotion、AmebaVision、YourfilehostからFLVファイルを落とします。ただし、ログインしないと見れない動画は落とせません。

#!python
#encoding=utf-8

import os
import sys
import re
import urllib
import urllib2
import time

save_dir = r"C:\My Documents"
interval = 3

class Video:
    video_url_re = None
    api_url_format = None
    flv_url_re = None
    title_re = None
    server_encoding = "utf-8"

    def __init__(self, video_url):
        self.id = self._extract_id(video_url)
        if self.api_url_format:
            api_url = self.api_url_format % self.id
        else:
            api_url = video_url
        content = self._get_content(api_url)
        self.flv_url = self._extract_flv_url(content)
        self.title = self._extract_title(content)

    def _extract_id(self, video_url):
        match = self.video_url_re.match(video_url)
        if match:
            return match.group(1)
        else:
            raise ValueError("invalid video url")
    
    def _get_content(self, url):
        try:
            response = urllib2.urlopen(url)
            return response.read()
        except urllib2.URLError:
            raise RuntimeError("unable to download video page")
 
    def _extract_flv_url(self, content):
        match = self.flv_url_re.search(content)
        if match is None:
            raise RuntimeError("unable to extract flv url")
        return self._build_flv_url(match)

    def _build_flv_url(self, match):
        return match.group(1)

    def _extract_title(self, content):
        if self.title_re:
            match = self.title_re.search(content)
            if match:
                return match.group(1).decode(self.server_encoding, "ignore")

class YouTube(Video):
    video_url_re = re.compile(r"http://(?:\w+\.)?youtube\.com/watch\?v=([\w-]+)")
    flv_url_re = re.compile(r"watch_fullscreen\?.*?video_id=([^&]+)&.*?t=([^&]+)&")
    title_re = re.compile(r"<title>YouTube - ([^<>]*)</title>")

    def _build_flv_url(self, match):
        return "http://www.youtube.com/get_video?video_id=%s&t=%s" % match.group(1,2)

class Veoh(Video):
    video_url_re = re.compile(r"http://www\.veoh\.com/videos/(\w+)")
    api_url_format = "http://www.veoh.com/rest/video/%s/details"
    flv_url_re = re.compile(r'fullPreviewHashPath="([^"]+)"')
    title_re = re.compile(r'title="([^"]*)"\s+dateAdded=')

class Dailymotion(Video):
    video_url_re = re.compile(r"http://www.dailymotion\.com/.*?/video/([\w/-]+)")
    flv_url_re = re.compile("http%3A%2F%2F\w+\.dailymotion\.com%2Fget%2F\d{2}%2F320x240%2Fflv%2F\d+\.flv%3Fkey%3D[a-z0-9]+")
    title_re = re.compile(r'<h1 class="nav with_uptitle">([^<>]*)</h1>')

    def _build_flv_url(self, match):
        return urllib.unquote(match.group(0))

class AmevaVision(Video):
    video_url_re = re.compile(r"http://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)")
    api_url_format = "http://vision.ameba.jp/api/get/detailMovie.do?movie=%s"
    flv_url_re = re.compile(r"<imageUrlLarge>([^<>]+)</imageUrlLarge>")
    title_re = re.compile(r"<item>\s*<title>([^<>]*)</title>")

    def _build_flv_url(self, match):
        flv_url = match.group(1).replace("//vi", "//vm")
        flv_url = flv_url.replace("/jpg/", "/flv/")
        flv_url = flv_url.replace("_4.jpg", ".flv")
        return flv_url

class Yourfilehost(Video):
    video_url_re = re.compile(r"http://(?:www\.)?yourfilehost\.com/media\.php\?cat=video&file=([\w.-]+)\.")
    flv_url_re =re.compile(r"videoembed_id=([\w%.-]+)&")

    def _build_flv_url(self, match):
        return urllib.unquote(match.group(1))

klass = [YouTube, Veoh, Dailymotion, AmevaVision, Yourfilehost]

def get_video(url):
    for k in klass:
        try:
            return k(url)
        except ValueError:
            if k == klass[-1]: raise

invalid_chr_re = re.compile(u'[\/:*?"<>|]')
for url in sys.argv[1:]:
    try:
        video = get_video(url)
        filename = (video.title or video.id) + ".flv"
        filename = invalid_chr_re.sub(" ", filename)
        filepath = os.path.join(save_dir, filename)
        command = "wget -O '%s' --referer='%s' '%s'" % (filepath, url, video.flv_url)
        os.system(command)
        time.sleep(interval)
    except (ValueError, RuntimeError), e:
        print "Error: %s [%s]" % (e, url)