Search in sources :

Example 1 with Regex

use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.

the class YouTubeExtractor method getLinks.

private HashMap<Integer, String> getLinks(final String video, final boolean prem, Browser br) throws Exception {
    br.setFollowRedirects(true);
    /* this cookie makes html5 available and skip controversy check */
    br.setCookie("youtube.com", "PREF", "f2=40100000&hl=en-US");
    br.getHeaders().put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/600.3.18 (KHTML, like Gecko) Version/8.0.3 Safari/600.3.18");
    br.getPage(video);
    if (br.containsHTML("id=\"unavailable-submessage\" class=\"watch-unavailable-submessage\"")) {
        return null;
    }
    String videoId = new Regex(video, "watch\\?v=([\\w_\\-]+)").getMatch(0);
    boolean fileNameFound = false;
    String filename = videoId;
    if (br.containsHTML("&title=")) {
        filename = Encoding.htmlDecode(br.getRegex("&title=([^&$]+)").getMatch(0).replaceAll("\\+", " ").trim());
        fileNameFound = true;
    }
    String playerId = br.getRegex("<script src=\"/yts/jsbin/player-([\\w_\\-]+)/en_US/base.js\".*?name=\"player/base\".*?></script>").getMatch(0);
    YouTubeSig ytSig = getYouTubeSig("http://www.youtube.com/yts/jsbin/player-" + playerId + "/en_US/base.js");
    currentYTSig = ytSig;
    /* html5_fmt_map */
    if (br.getRegex(FILENAME_PATTERN).count() != 0 && !fileNameFound) {
        filename = Encoding.htmlDecode(br.getRegex(FILENAME_PATTERN).getMatch(0).trim());
    }
    return parseLinks(br, video, filename, false, false, ytSig);
}
Also used : Regex(com.frostwire.search.youtube.jd.Regex)

Example 2 with Regex

use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.

the class YouTubeExtractor method parseLinks.

private HashMap<Integer, String> parseLinks(Browser br, final String videoURL, String filename, boolean ythack, boolean tryGetDetails, YouTubeSig ytSig) throws Exception {
    final HashMap<Integer, String> links = new HashMap<Integer, String>();
    String html5_fmt_map = br.getRegex("\"html5_fmt_map\": \\[(.*?)\\]").getMatch(0);
    if (html5_fmt_map != null) {
        String[] html5_hits = new Regex(html5_fmt_map, "\\{(.*?)\\}").getColumn(0);
        if (html5_hits != null) {
            for (String hit : html5_hits) {
                String hitUrl = new Regex(hit, "url\": \"(http:.*?)\"").getMatch(0);
                String hitFmt = new Regex(hit, "itag\": (\\d+)").getMatch(0);
                if (hitUrl != null && hitFmt != null) {
                    hitUrl = unescape(hitUrl.replaceAll("\\\\/", "/"));
                    links.put(Integer.parseInt(hitFmt), Encoding.htmlDecode(Encoding.urlDecode(hitUrl, true)));
                }
            }
        }
    } else {
        // New format since ca. 1.8.2011.
        html5_fmt_map = br.getRegex("\"url_encoded_fmt_stream_map\": \"(.*?)\"").getMatch(0);
        // New format since 1.27.2014, they removed a space.
        if (html5_fmt_map == null) {
            html5_fmt_map = br.getRegex("\"url_encoded_fmt_stream_map\":\"(.*?)\"").getMatch(0);
        }
        if (html5_fmt_map == null) {
            html5_fmt_map = br.getRegex("url_encoded_fmt_stream_map=(.*?)(&|$)").getMatch(0);
            if (html5_fmt_map != null) {
                html5_fmt_map = html5_fmt_map.replaceAll("%2C", ",");
                if (!html5_fmt_map.contains("url=")) {
                    html5_fmt_map = html5_fmt_map.replaceAll("%3D", "=");
                    html5_fmt_map = html5_fmt_map.replaceAll("%26", "&");
                }
            }
        }
        if (html5_fmt_map != null && !html5_fmt_map.contains("signature") && !html5_fmt_map.contains("sig") && !html5_fmt_map.contains("s=")) {
            Thread.sleep(5000);
            br.clearCookies("youtube.com");
            return null;
        }
        if (html5_fmt_map != null) {
            HashMap<Integer, String> ret = parseLinks(html5_fmt_map, ytSig);
            if (ret.size() == 0)
                return links;
            links.putAll(ret);
            if (true) {
                /* not playable by vlc */
                /* check for adaptive fmts */
                String adaptive = br.getRegex("\"adaptive_fmts\": \"(.*?)\"").getMatch(0);
                ret = parseLinks(adaptive, ytSig);
                links.putAll(ret);
            }
        } else {
            if (br.containsHTML("reason=Unfortunately"))
                return null;
            if (tryGetDetails) {
                br.getPage("http://www.youtube.com/get_video_info?el=detailpage&video_id=" + getVideoID(videoURL));
                return parseLinks(br, videoURL, filename, ythack, false, ytSig);
            } else {
                return null;
            }
        }
    }
    /* normal links */
    final HashMap<String, String> fmt_list = new HashMap<String, String>();
    String fmt_list_str = "";
    if (ythack) {
        fmt_list_str = (br.getMatch("&fmt_list=(.+?)&") + ",").replaceAll("%2F", "/").replaceAll("%2C", ",");
    } else {
        fmt_list_str = (br.getMatch("\"fmt_list\":\\s+\"(.+?)\",") + ",").replaceAll("\\\\/", "/");
    }
    final String[][] fmt_list_map = new Regex(fmt_list_str, "(\\d+)/(\\d+x\\d+)/\\d+/\\d+/\\d+,").getMatches();
    for (final String[] fmt : fmt_list_map) {
        fmt_list.put(fmt[0], fmt[1]);
    }
    if (links.size() == 0 && ythack) {
        /* try to find fallback links */
        String[] urls = br.getRegex("url%3D(.*?)($|%2C)").getColumn(0);
        int index = 0;
        for (String vurl : urls) {
            String hitUrl = new Regex(vurl, "(.*?)%26").getMatch(0);
            String hitQ = new Regex(vurl, "%26quality%3D(.*?)%").getMatch(0);
            if (hitUrl != null && hitQ != null) {
                hitUrl = unescape(hitUrl.replaceAll("\\\\/", "/"));
                if (fmt_list_map.length >= index) {
                    links.put(Integer.parseInt(fmt_list_map[index][0]), Encoding.htmlDecode(Encoding.urlDecode(hitUrl, false)));
                    index++;
                }
            }
        }
    }
    if (filename != null && links != null && !links.isEmpty()) {
        links.put(-1, filename);
    }
    return links;
}
Also used : Regex(com.frostwire.search.youtube.jd.Regex)

Example 3 with Regex

use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.

the class YouTubeExtractor method extractLinksFromDashManifest.

private List<LinkInfo> extractLinksFromDashManifest(String dashManifestUrl, YouTubeSig ytSig, String filename, Date date, String videoId, String userName, String channelName, ThumbnailLinks thumbnailLinks) throws IOException, ParserConfigurationException, SAXException {
    dashManifestUrl = dashManifestUrl.replace("\\/", "/");
    Pattern p = Pattern.compile("/s/([a-fA-F0-9\\.]+)/");
    Matcher m = p.matcher(dashManifestUrl);
    if (m.find()) {
        String sig = m.group(1);
        String signature = ytSig.calc(sig);
        dashManifestUrl = dashManifestUrl.replaceAll("/s/([a-fA-F0-9\\.]+)/", "/signature/" + signature + "/");
    } else if (dashManifestUrl.contains("/signature/")) {
    // dashManifestUrl as it is, empty block to review
    } else {
        return Collections.emptyList();
    }
    HttpClient httpClient = HttpClientFactory.getInstance(HttpClientFactory.HttpContext.SEARCH);
    String dashDoc = httpClient.get(dashManifestUrl);
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    DocumentBuilder db = dbf.newDocumentBuilder();
    Document doc = db.parse(new InputSource(new StringReader(dashDoc)));
    NodeList nodes = doc.getElementsByTagName("BaseURL");
    List<LinkInfo> infos = new ArrayList<LinkInfo>();
    for (int i = 0; i < nodes.getLength(); i++) {
        Node item = nodes.item(i);
        String url = item.getTextContent();
        int contentLength = -1;
        try {
            contentLength = Integer.parseInt(item.getAttributes().item(0).getTextContent());
        } catch (Throwable e) {
        // ignore
        }
        int fmt = Integer.parseInt(new Regex(url, "itag=(\\d+)").getMatch(0));
        Format format = FORMATS.get(fmt);
        if (format == null) {
            continue;
        }
        LinkInfo info = new LinkInfo(url, fmt, filename, contentLength, date, videoId, userName, channelName, thumbnailLinks, format);
        infos.add(info);
    }
    return infos;
}
Also used : Pattern(java.util.regex.Pattern) InputSource(org.xml.sax.InputSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Matcher(java.util.regex.Matcher) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) Document(org.w3c.dom.Document) SimpleDateFormat(java.text.SimpleDateFormat) Regex(com.frostwire.search.youtube.jd.Regex) DocumentBuilder(javax.xml.parsers.DocumentBuilder) HttpClient(com.frostwire.util.http.HttpClient) StringReader(java.io.StringReader)

Example 4 with Regex

use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.

the class YouTubeExtractor method parseLinks.

private HashMap<Integer, String> parseLinks(String html5_fmt_map, YouTubeSig ytSig) {
    final HashMap<Integer, String> links = new HashMap<Integer, String>();
    if (html5_fmt_map != null) {
        if (html5_fmt_map.contains(UNSUPPORTEDRTMP)) {
            return links;
        }
        String[] html5_hits = new Regex(html5_fmt_map, "(.*?)(,|$)").getColumn(0);
        if (html5_hits != null) {
            for (String hit : html5_hits) {
                hit = unescape(hit);
                String hitUrl = new Regex(hit, "url=(http.*?)(\\&|$)").getMatch(0);
                String sig = new Regex(hit, "url=http.*?(\\&|$)(sig|signature)=(.*?)(\\&|$)").getMatch(2);
                if (sig == null)
                    sig = new Regex(hit, "(sig|signature)=(.*?)(\\&|$)").getMatch(1);
                if (sig == null)
                    sig = new Regex(hit, "(sig|signature)%3D(.*?)%26").getMatch(1);
                if (sig == null) {
                    String temp = new Regex(hit, "(\\&|^)s=(.*?)(\\&|$)").getMatch(1);
                    sig = ytSig != null && temp != null ? ytSig.calc(temp) : null;
                }
                String hitFmt = new Regex(hit, "itag=(\\d+)").getMatch(0);
                if (hitUrl != null && hitFmt != null) {
                    hitUrl = unescape(hitUrl.replaceAll("\\\\/", "/"));
                    if (hitUrl.startsWith("http%253A")) {
                        hitUrl = Encoding.htmlDecode(hitUrl);
                    }
                    String inst = null;
                    if (hitUrl.contains("sig")) {
                        inst = Encoding.htmlDecode(Encoding.urlDecode(hitUrl, true));
                    } else {
                        inst = Encoding.htmlDecode(Encoding.urlDecode(hitUrl, true) + "&signature=" + sig);
                    }
                    links.put(Integer.parseInt(hitFmt), inst);
                }
            }
        }
    }
    return links;
}
Also used : Regex(com.frostwire.search.youtube.jd.Regex)

Aggregations

Regex (com.frostwire.search.youtube.jd.Regex)4 HttpClient (com.frostwire.util.http.HttpClient)1 StringReader (java.io.StringReader)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 Document (org.w3c.dom.Document)1 Node (org.w3c.dom.Node)1 NodeList (org.w3c.dom.NodeList)1 InputSource (org.xml.sax.InputSource)1