youtube.lua 8.97 KB
Newer Older
1 2 3
--[[
 $Id$

4
 Copyright © 2007-2009 the VideoLAN team
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
--]]
20 21 22

-- Helper function to get a parameter's value in a URL
function get_url_param( url, name )
23 24
    local _, _, res = string.find( url, "[&?]"..name.."=([^&]*)" )
    return res
25 26 27 28
end

function get_arturl( path, video_id )
    if string.match( vlc.path, "iurl=" ) then
Rafaël Carré's avatar
Rafaël Carré committed
29
        return vlc.strings( get_url_param( vlc.path, "iurl" ) )
30 31 32 33
    end
    if not arturl then
        return "http://img.youtube.com/vi/"..video_id.."/default.jpg"
    end
34 35 36 37
end

-- Probe function.
function probe()
38
    if vlc.access ~= "http" and vlc.access ~= "https" then
39 40 41 42 43 44 45 46 47 48 49 50
        return false
    end
    youtube_site = string.match( string.sub( vlc.path, 1, 8 ), "youtube" )
    if not youtube_site then
        -- FIXME we should be using a builtin list of known youtube websites
        -- like "fr.youtube.com", "uk.youtube.com" etc..
        youtube_site = string.find( vlc.path, ".youtube.com" )
        if youtube_site == nil then
            return false
        end
    end
    return (  string.match( vlc.path, "watch%?v=" ) -- the html page
51 52 53
            or string.match( vlc.path, "watch_fullscreen%?video_id=" ) -- the fullscreen page
            or string.match( vlc.path, "p.swf" ) -- the (old?) player url
            or string.match( vlc.path, "jp.swf" ) -- the (new?) player url (as of 24/08/2007)
54
            or string.match( vlc.path, "player2.swf" ) ) -- another player url
55 56 57 58 59 60
end

-- Parse function.
function parse()
    if string.match( vlc.path, "watch%?v=" )
    then -- This is the HTML page's URL
61 62
        -- fmt is the format of the video: 18 is HQ (mp4)
        fmt = get_url_param( vlc.path, "fmt" )
63 64 65 66 67
        while true do
            -- Try to find the video's title
            line = vlc.readline()
            if not line then break end
            if string.match( line, "<meta name=\"title\"" ) then
68
                _,_,name = string.find( line, "content=\"(.-)\"" )
69
                name = vlc.strings.resolve_xml_special_chars( name )
70
                name = vlc.strings.resolve_xml_special_chars( name )
71
            end
72
            if string.match( line, "<meta name=\"description\"" ) then
73 74
               -- Don't ask me why they double encode ...
                _,_,description = vlc.strings.resolve_xml_special_chars(vlc.strings.resolve_xml_special_chars(string.find( line, "content=\"(.-)\"" )))
75 76
            end
            if string.match( line, "subscribe_to_user=" ) then
77
                _,_,artist = string.find( line, "subscribe_to_user=([^&]*)" )
78
            end
79
            -- CURRENT: var swfConfig = { [a lot of stuff...], "video_id": "OHVvVmUNBFc", "sk": "WswKuJzDBsdD6oG3IakCXgC", "t": "OEgsToPDskK3zO44y0QN8Fr5ZSAZwCQp", "plid": "AARGnwWMrmGkbpOxAAAA4AT4IAA"};
80 81 82
            -- OLD 1: var swfArgs = {hl:'en',BASE_YT_URL:'http://youtube.com/',video_id:'XPJ7d8dq0t8',l:'292',t:'OEgsToPDskLFdOYrrlDm3FQPoQBYaCP1',sk:'0gnr-AE6QZJEZmCMd3lq_AC'};
            -- OLD 2: var swfArgs = { "BASE_YT_URL": "http://youtube.com", "video_id": "OHVvVmUNBFc", "l": 88, "sk": "WswKuJzDBsdD6oG3IakCXgC", "t": "OEgsToPDskK3zO44y0QN8Fr5ZSAZwCQp", "plid": "AARGnwWMrmGkbpOxAAAA4AT4IAA", "tk": "mEL4E7PqHeaZp5OG19NQThHt9mXJU4PbRTOw6lz9osHi4Hixp7RE1w=="};
            -- OLD 3: 'SWF_ARGS': { [a lot of stuff...], "video_id": "OHVvVmUNBFc", "sk": "WswKuJzDBsdD6oG3IakCXgC", "t": "OEgsToPDskK3zO44y0QN8Fr5ZSAZwCQp", "plid": "AARGnwWMrmGkbpOxAAAA4AT4IAA"};
83
            if ( string.match( line, "PLAYER_CONFIG" ) or string.match( line, "swfConfig" ) or string.match( line, "SWF_ARGS" ) or string.match( line, "swfArgs" ) ) and string.match( line, "video_id" ) then
84
                if string.match( line, "BASE_YT_URL" ) then
85
                    _,_,base_yt_url = string.find( line, "\"BASE_YT_URL\": \"(.-)\"" )
86
                end
87 88
                _,_,t = string.find( line, "\"t\": \"(.-)\"" )
                -- vlc.msg.err( t )
89
                -- video_id = string.gsub( line, ".*&video_id:'([^']*)'.*", "%1" )
90 91
                fmt_url_map = string.match( line, "\"fmt_url_map\": \"(.-)\"" )
                if fmt_url_map then
92 93
                    -- FIXME: do this properly
                    fmt_url_map = string.gsub( fmt_url_map, "\\u0026", "&" )
94 95 96 97
                    for itag,url in string.gmatch( fmt_url_map, "(%d+)|([^,]+)" ) do
                        -- Apparently formats are listed in quality order,
                        -- so we can afford to simply take the first one
                        if not fmt or tonumber( itag ) == tonumber( fmt ) then
Ilkka Ollakka's avatar
Ilkka Ollakka committed
98 99
                            -- do unescaping of /
                            url = string.gsub( url, '\\/','/' )
100 101 102 103 104
                            path = url
                            break
                        end
                    end
                end
105 106 107
            -- Also available on non-HTML5 pages: var swfHTML = (isIE) ? "<object [...]><param name=\"flashvars\" value=\"rv.2.thumbnailUrl=http%3A%2F%2Fi4.ytimg.com%2Fvi%2F3MLp7YNTznE%2Fdefault.jpg&rv.7.length_seconds=384 [...] &video_id=OHVvVmUNBFc [...] &t=OEgsToPDskK3zO44y0QN8Fr5ZSAZwCQp [...]
            elseif string.match( line, "swfHTML" ) and string.match( line, "video_id" ) then
                _,_,t = string.find( line, "&t=(.-)&" )
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
            -- Also available in HTML5 pages: videoPlayer.setAvailableFormat("http://v6.lscache4.c.youtube.com/videoplayback?ip=82.0.0.0&sparams=id%2Cexpire%2Cip%2Cipbits%2Citag%2Calgorithm%2Cburst%2Cfactor&algorithm=throttle-factor&itag=45&ipbits=8&burst=40&sver=3&expire=1275688800&key=yt1&signature=6ED860441298D1157FF3013A5D72727F25831F09.4C196BEA9F8F9B83CE678D79AD918B83D5E98B46&factor=1.25&id=7117715cf57d18d4", "video/webm; codecs=&quot;vp8.0, vorbis&quot;", "hd720");
            elseif string.match( line, "videoPlayer%.setAvailableFormat" ) then
                url,itag = string.match( line, "videoPlayer%.setAvailableFormat%(\"(.-itag=(%d+).-)\",.+%)" )
                if url then
                    -- For now, WebM formats are listed only in the HTML5
                    -- section, that is also only when HTML5 is enabled.
                    -- Format 45 is 720p, and 43 is lower resolution.
                    if tonumber( itag ) == 45  or ( tonumber( itag ) == 43 and not webm_path ) then
                        webm_path = url
                    end
                    -- Grab something if fmt_url_map failed
                    if not path and ( not fmt or tonumber( itag ) == tonumber( fmt ) ) then
                        path = url
                    end
                end
Rafaël Carré's avatar
Rafaël Carré committed
123
            end
124
        end
125

126 127 128
        if not video_id then
            video_id = get_url_param( vlc.path, "v" )
        end
129
        arturl = get_arturl( vlc.path, video_id )
130

131 132 133 134 135 136 137 138
        if not fmt then
            -- Prefer WebM formats if this is an &html5=True URL
            html5 = get_url_param( vlc.path, "html5" )
            if html5 == "True" and webm_path then
                path = webm_path
            end
        end

139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
        if not path then
            if not base_yt_url then
                base_yt_url = "http://youtube.com/"
            end
            if fmt then
                format = "&fmt=" .. fmt
            else
                format = ""
            end

            if t then
                path = base_yt_url .. "get_video?video_id="..video_id.."&t="..t..format
            else
                -- This shouldn't happen ... but keep it as a backup.
                path = "http://www.youtube.com/v/"..video_id
            end
155
        end
156
        return { { path = path; name = name; description = description; artist = artist; arturl = arturl } }
157 158
    else -- This is the flash player's URL
        if string.match( vlc.path, "title=" ) then
159
            name = vlc.strings.decode_uri(get_url_param( vlc.path, "title" ))
160
        end
161
        video_id = get_url_param( vlc.path, "video_id" )
162
        arturl = get_arturl( vlc.path, video_id )
163 164 165 166 167 168
        fmt = get_url_param( vlc.path, "fmt" )
        if fmt then
            format = "&fmt=" .. fmt
        else
            format = ""
        end
169 170 171
        if not string.match( vlc.path, "t=" ) then
            -- This sucks, we're missing "t" which is now mandatory. Let's
            -- try using another url
172
            return { { path = "http://www.youtube.com/v/"..video_id; name = name; arturl = arturl } }
173
        end
174
        return { { path = "http://www.youtube.com/get_video.php?video_id="..video_id.."&t="..get_url_param( vlc.path, "t" )..format; name = name; arturl = arturl } }
175 176
    end
end