use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.
the class YouTubeExtractor method getLinks.
private HashMap<Integer, String> getLinks(final String video, final boolean prem, Browser br) throws Exception {
br.setFollowRedirects(true);
/* this cookie makes html5 available and skip controversy check */
br.setCookie("youtube.com", "PREF", "f2=40100000&hl=en-US");
br.getHeaders().put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/600.3.18 (KHTML, like Gecko) Version/8.0.3 Safari/600.3.18");
br.getPage(video);
if (br.containsHTML("id=\"unavailable-submessage\" class=\"watch-unavailable-submessage\"")) {
return null;
}
String videoId = new Regex(video, "watch\\?v=([\\w_\\-]+)").getMatch(0);
boolean fileNameFound = false;
String filename = videoId;
if (br.containsHTML("&title=")) {
filename = Encoding.htmlDecode(br.getRegex("&title=([^&$]+)").getMatch(0).replaceAll("\\+", " ").trim());
fileNameFound = true;
}
String playerId = br.getRegex("<script src=\"/yts/jsbin/player-([\\w_\\-]+)/en_US/base.js\".*?name=\"player/base\".*?></script>").getMatch(0);
YouTubeSig ytSig = getYouTubeSig("http://www.youtube.com/yts/jsbin/player-" + playerId + "/en_US/base.js");
currentYTSig = ytSig;
/* html5_fmt_map */
if (br.getRegex(FILENAME_PATTERN).count() != 0 && !fileNameFound) {
filename = Encoding.htmlDecode(br.getRegex(FILENAME_PATTERN).getMatch(0).trim());
}
return parseLinks(br, video, filename, false, false, ytSig);
}
use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.
the class YouTubeExtractor method parseLinks.
private HashMap<Integer, String> parseLinks(Browser br, final String videoURL, String filename, boolean ythack, boolean tryGetDetails, YouTubeSig ytSig) throws Exception {
final HashMap<Integer, String> links = new HashMap<Integer, String>();
String html5_fmt_map = br.getRegex("\"html5_fmt_map\": \\[(.*?)\\]").getMatch(0);
if (html5_fmt_map != null) {
String[] html5_hits = new Regex(html5_fmt_map, "\\{(.*?)\\}").getColumn(0);
if (html5_hits != null) {
for (String hit : html5_hits) {
String hitUrl = new Regex(hit, "url\": \"(http:.*?)\"").getMatch(0);
String hitFmt = new Regex(hit, "itag\": (\\d+)").getMatch(0);
if (hitUrl != null && hitFmt != null) {
hitUrl = unescape(hitUrl.replaceAll("\\\\/", "/"));
links.put(Integer.parseInt(hitFmt), Encoding.htmlDecode(Encoding.urlDecode(hitUrl, true)));
}
}
}
} else {
// New format since ca. 1.8.2011.
html5_fmt_map = br.getRegex("\"url_encoded_fmt_stream_map\": \"(.*?)\"").getMatch(0);
// New format since 1.27.2014, they removed a space.
if (html5_fmt_map == null) {
html5_fmt_map = br.getRegex("\"url_encoded_fmt_stream_map\":\"(.*?)\"").getMatch(0);
}
if (html5_fmt_map == null) {
html5_fmt_map = br.getRegex("url_encoded_fmt_stream_map=(.*?)(&|$)").getMatch(0);
if (html5_fmt_map != null) {
html5_fmt_map = html5_fmt_map.replaceAll("%2C", ",");
if (!html5_fmt_map.contains("url=")) {
html5_fmt_map = html5_fmt_map.replaceAll("%3D", "=");
html5_fmt_map = html5_fmt_map.replaceAll("%26", "&");
}
}
}
if (html5_fmt_map != null && !html5_fmt_map.contains("signature") && !html5_fmt_map.contains("sig") && !html5_fmt_map.contains("s=")) {
Thread.sleep(5000);
br.clearCookies("youtube.com");
return null;
}
if (html5_fmt_map != null) {
HashMap<Integer, String> ret = parseLinks(html5_fmt_map, ytSig);
if (ret.size() == 0)
return links;
links.putAll(ret);
if (true) {
/* not playable by vlc */
/* check for adaptive fmts */
String adaptive = br.getRegex("\"adaptive_fmts\": \"(.*?)\"").getMatch(0);
ret = parseLinks(adaptive, ytSig);
links.putAll(ret);
}
} else {
if (br.containsHTML("reason=Unfortunately"))
return null;
if (tryGetDetails) {
br.getPage("http://www.youtube.com/get_video_info?el=detailpage&video_id=" + getVideoID(videoURL));
return parseLinks(br, videoURL, filename, ythack, false, ytSig);
} else {
return null;
}
}
}
/* normal links */
final HashMap<String, String> fmt_list = new HashMap<String, String>();
String fmt_list_str = "";
if (ythack) {
fmt_list_str = (br.getMatch("&fmt_list=(.+?)&") + ",").replaceAll("%2F", "/").replaceAll("%2C", ",");
} else {
fmt_list_str = (br.getMatch("\"fmt_list\":\\s+\"(.+?)\",") + ",").replaceAll("\\\\/", "/");
}
final String[][] fmt_list_map = new Regex(fmt_list_str, "(\\d+)/(\\d+x\\d+)/\\d+/\\d+/\\d+,").getMatches();
for (final String[] fmt : fmt_list_map) {
fmt_list.put(fmt[0], fmt[1]);
}
if (links.size() == 0 && ythack) {
/* try to find fallback links */
String[] urls = br.getRegex("url%3D(.*?)($|%2C)").getColumn(0);
int index = 0;
for (String vurl : urls) {
String hitUrl = new Regex(vurl, "(.*?)%26").getMatch(0);
String hitQ = new Regex(vurl, "%26quality%3D(.*?)%").getMatch(0);
if (hitUrl != null && hitQ != null) {
hitUrl = unescape(hitUrl.replaceAll("\\\\/", "/"));
if (fmt_list_map.length >= index) {
links.put(Integer.parseInt(fmt_list_map[index][0]), Encoding.htmlDecode(Encoding.urlDecode(hitUrl, false)));
index++;
}
}
}
}
if (filename != null && links != null && !links.isEmpty()) {
links.put(-1, filename);
}
return links;
}
use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.
the class YouTubeExtractor method extractLinksFromDashManifest.
private List<LinkInfo> extractLinksFromDashManifest(String dashManifestUrl, YouTubeSig ytSig, String filename, Date date, String videoId, String userName, String channelName, ThumbnailLinks thumbnailLinks) throws IOException, ParserConfigurationException, SAXException {
dashManifestUrl = dashManifestUrl.replace("\\/", "/");
Pattern p = Pattern.compile("/s/([a-fA-F0-9\\.]+)/");
Matcher m = p.matcher(dashManifestUrl);
if (m.find()) {
String sig = m.group(1);
String signature = ytSig.calc(sig);
dashManifestUrl = dashManifestUrl.replaceAll("/s/([a-fA-F0-9\\.]+)/", "/signature/" + signature + "/");
} else if (dashManifestUrl.contains("/signature/")) {
// dashManifestUrl as it is, empty block to review
} else {
return Collections.emptyList();
}
HttpClient httpClient = HttpClientFactory.getInstance(HttpClientFactory.HttpContext.SEARCH);
String dashDoc = httpClient.get(dashManifestUrl);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new InputSource(new StringReader(dashDoc)));
NodeList nodes = doc.getElementsByTagName("BaseURL");
List<LinkInfo> infos = new ArrayList<LinkInfo>();
for (int i = 0; i < nodes.getLength(); i++) {
Node item = nodes.item(i);
String url = item.getTextContent();
int contentLength = -1;
try {
contentLength = Integer.parseInt(item.getAttributes().item(0).getTextContent());
} catch (Throwable e) {
// ignore
}
int fmt = Integer.parseInt(new Regex(url, "itag=(\\d+)").getMatch(0));
Format format = FORMATS.get(fmt);
if (format == null) {
continue;
}
LinkInfo info = new LinkInfo(url, fmt, filename, contentLength, date, videoId, userName, channelName, thumbnailLinks, format);
infos.add(info);
}
return infos;
}
use of com.frostwire.search.youtube.jd.Regex in project frostwire by frostwire.
the class YouTubeExtractor method parseLinks.
private HashMap<Integer, String> parseLinks(String html5_fmt_map, YouTubeSig ytSig) {
final HashMap<Integer, String> links = new HashMap<Integer, String>();
if (html5_fmt_map != null) {
if (html5_fmt_map.contains(UNSUPPORTEDRTMP)) {
return links;
}
String[] html5_hits = new Regex(html5_fmt_map, "(.*?)(,|$)").getColumn(0);
if (html5_hits != null) {
for (String hit : html5_hits) {
hit = unescape(hit);
String hitUrl = new Regex(hit, "url=(http.*?)(\\&|$)").getMatch(0);
String sig = new Regex(hit, "url=http.*?(\\&|$)(sig|signature)=(.*?)(\\&|$)").getMatch(2);
if (sig == null)
sig = new Regex(hit, "(sig|signature)=(.*?)(\\&|$)").getMatch(1);
if (sig == null)
sig = new Regex(hit, "(sig|signature)%3D(.*?)%26").getMatch(1);
if (sig == null) {
String temp = new Regex(hit, "(\\&|^)s=(.*?)(\\&|$)").getMatch(1);
sig = ytSig != null && temp != null ? ytSig.calc(temp) : null;
}
String hitFmt = new Regex(hit, "itag=(\\d+)").getMatch(0);
if (hitUrl != null && hitFmt != null) {
hitUrl = unescape(hitUrl.replaceAll("\\\\/", "/"));
if (hitUrl.startsWith("http%253A")) {
hitUrl = Encoding.htmlDecode(hitUrl);
}
String inst = null;
if (hitUrl.contains("sig")) {
inst = Encoding.htmlDecode(Encoding.urlDecode(hitUrl, true));
} else {
inst = Encoding.htmlDecode(Encoding.urlDecode(hitUrl, true) + "&signature=" + sig);
}
links.put(Integer.parseInt(hitFmt), inst);
}
}
}
}
return links;
}
Aggregations