Search in sources :

Example 1 with SearchException

use of com.biglybt.core.metasearch.SearchException in project BiglyBT by BiglySoftware.

the class WebEngine method getWebPageContentSupport.

private pageDetails getWebPageContentSupport(Proxy proxy, String proxy_host, String searchURL, SearchParameter[] searchParameters, Map<String, String> searchContext, String headers, boolean only_if_modified) throws SearchException {
    try {
        TorrentUtils.setTLSDescription("Search: " + getName());
        if (requiresLogin()) {
            throw new SearchLoginException("login required");
        }
        boolean vuze_file = searchURL.toLowerCase().startsWith("vuze:");
        if (!vuze_file) {
            String[] from_strs = new String[searchParameters.length];
            String[] to_strs = new String[searchParameters.length];
            for (int i = 0; i < searchParameters.length; i++) {
                SearchParameter parameter = searchParameters[i];
                from_strs[i] = "%" + parameter.getMatchPattern();
                to_strs[i] = URLEncoder.encode(parameter.getValue(), "UTF-8");
            }
            searchURL = GeneralUtils.replaceAll(searchURL, from_strs, to_strs);
            Iterator<Map.Entry<String, String>> it = searchContext.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry<String, String> entry = it.next();
                String key = entry.getKey();
                if (supportsContext(key)) {
                    if (searchURL.indexOf('?') == -1) {
                        searchURL += "?";
                    } else {
                        searchURL += "&";
                    }
                    String value = entry.getValue();
                    searchURL += key + "=" + URLEncoder.encode(value, "UTF-8");
                }
            }
        }
        // System.out.println(searchURL);
        // hack to support POST by encoding into URL
        // http://xxxx/index.php?main=search&azmethod=post_basic:SearchString1=%s&SearchString=&search=Search
        ResourceDownloaderFactory rdf = StaticUtilities.getResourceDownloaderFactory();
        URL initial_url;
        ResourceDownloader initial_url_rd;
        int post_pos = searchURL.indexOf("azmethod=");
        if (post_pos > 0) {
            String post_params = searchURL.substring(post_pos + 9);
            searchURL = searchURL.substring(0, post_pos - 1);
            debugLog("search_url: " + searchURL + ", post=" + post_params);
            initial_url = new URL(searchURL);
            int sep = post_params.indexOf(':');
            String type = post_params.substring(0, sep);
            if (!type.equals("post_basic")) {
                throw (new SearchException("Only basic type supported"));
            }
            post_params = post_params.substring(sep + 1);
            if (proxy == null) {
                initial_url_rd = rdf.create(initial_url, post_params);
            } else {
                initial_url_rd = rdf.create(initial_url, post_params, proxy);
            }
            initial_url_rd.setProperty("URL_Content-Type", "application/x-www-form-urlencoded");
        } else {
            debugLog("search_url: " + searchURL);
            initial_url = new URL(searchURL);
            if (proxy == null) {
                initial_url_rd = rdf.create(initial_url);
            } else {
                initial_url_rd = rdf.create(initial_url, proxy);
            }
        }
        if (proxy_host != null) {
            initial_url_rd.setProperty("URL_HOST", proxy_host);
        }
        setHeaders(initial_url_rd, headers);
        if (needsAuth && local_cookies != null) {
            initial_url_rd.setProperty("URL_Cookie", local_cookies);
        } else if (fullCookies != null && fullCookies.length() > 0) {
            initial_url_rd.setProperty("URL_Cookie", fullCookies);
        }
        if (only_if_modified) {
            String last_modified = getLocalString(LD_LAST_MODIFIED);
            String etag = getLocalString(LD_ETAG);
            if (last_modified != null) {
                initial_url_rd.setProperty("URL_If-Modified-Since", last_modified);
            }
            if (etag != null) {
                initial_url_rd.setProperty("URL_If-None-Match", etag);
            }
        }
        InputStream is = null;
        try {
            String content_charset = "UTF-8";
            ResourceDownloader mr_rd = null;
            if (initial_url.getProtocol().equalsIgnoreCase("file")) {
                // handle file://c:/ - map to file:/c:/
                String str = initial_url.toExternalForm();
                if (initial_url.getAuthority() != null) {
                    str = str.replaceFirst("://", ":/");
                }
                int pos = str.indexOf('?');
                if (pos != -1) {
                    str = str.substring(0, pos);
                }
                is = new FileInputStream(new File(new URL(str).toURI()));
            } else {
                if (proxy == null) {
                    initial_url_rd.setProperty("URL_Connect_Timeout", 10 * 1000);
                    initial_url_rd.setProperty("URL_Read_Timeout", 10 * 1000);
                }
                mr_rd = rdf.getMetaRefreshDownloader(initial_url_rd);
                try {
                    is = mr_rd.download();
                } catch (ResourceDownloaderException e) {
                    Long response = (Long) mr_rd.getProperty("URL_HTTP_Response");
                    if (response != null && response.longValue() == 304) {
                        return (new pageDetails(initial_url, initial_url, ""));
                    } else {
                        throw (e);
                    }
                }
                if (needsAuth) {
                    List cookies_list = (List) mr_rd.getProperty("URL_Set-Cookie");
                    List cookies_set = new ArrayList();
                    if (cookies_list != null) {
                        for (int i = 0; i < cookies_list.size(); i++) {
                            String[] cookies = ((String) cookies_list.get(i)).split(";");
                            for (int j = 0; j < cookies.length; j++) {
                                String cookie = cookies[j].trim();
                                if (cookie.indexOf('=') != -1) {
                                    cookies_set.add(cookie);
                                }
                            }
                        }
                    }
                // well, not much we can do with the cookies anyway as in general the ones
                // set are the ones missing/expired, not the existing ones. That is, we can't
                // deduce anything from the fact that a required cookie is not 'set' here
                // the most we could do is catch a server that explicitly deleted invalid
                // cookies by expiring it, but I doubt this is a common practice.
                // Also note the complexity of cookie syntax
                // Set-Cookie: old standard using expires=, new using MaxAge
                // Set-Cookie2:
                // Maybe use http://jcookie.sourceforge.net/ if needed
                }
                if (only_if_modified) {
                    String last_modified = extractProperty(mr_rd.getProperty("URL_Last-Modified"));
                    String etag = extractProperty(mr_rd.getProperty("URL_ETag"));
                    if (last_modified != null) {
                        setLocalString(LD_LAST_MODIFIED, last_modified);
                    }
                    if (etag != null) {
                        setLocalString(LD_ETAG, etag);
                    }
                }
                List cts = (List) mr_rd.getProperty("URL_Content-Type");
                if (cts != null && cts.size() > 0) {
                    String content_type = (String) cts.get(0);
                    int pos = content_type.toLowerCase().indexOf("charset");
                    if (pos != -1) {
                        content_type = content_type.substring(pos + 1);
                        pos = content_type.indexOf('=');
                        if (pos != -1) {
                            content_type = content_type.substring(pos + 1).trim();
                            pos = content_type.indexOf(';');
                            if (pos != -1) {
                                content_type = content_type.substring(0, pos).trim();
                            }
                            if (content_type.startsWith("\"")) {
                                content_type = content_type.substring(1).trim();
                            }
                            if (content_type.endsWith("\"")) {
                                content_type = content_type.substring(0, content_type.length() - 1).trim();
                            }
                            try {
                                if (Charset.isSupported(content_type)) {
                                    debugLog("charset: " + content_type);
                                    content_charset = content_type;
                                }
                            } catch (Throwable e) {
                                try {
                                    // handle lowercase 'utf-8' for example
                                    content_type = content_type.toUpperCase();
                                    if (Charset.isSupported(content_type)) {
                                        debugLog("charset: " + content_type);
                                        content_charset = content_type;
                                    }
                                } catch (Throwable f) {
                                    log("Content type '" + content_type + "' not supported", f);
                                }
                            }
                        }
                    }
                }
            }
            ByteArrayOutputStream baos = new ByteArrayOutputStream(8192);
            byte[] buffer = new byte[8192];
            while (true) {
                int len = is.read(buffer);
                if (len <= 0) {
                    break;
                }
                baos.write(buffer, 0, len);
            }
            byte[] data = baos.toByteArray();
            if (vuze_file) {
                try {
                    VuzeFileHandler vfh = VuzeFileHandler.getSingleton();
                    VuzeFile vf = vfh.loadVuzeFile(data);
                    vfh.handleFiles(new VuzeFile[] { vf }, VuzeFileComponent.COMP_TYPE_NONE);
                } catch (Throwable e) {
                    Debug.out(e);
                }
                return (new pageDetails(initial_url, initial_url, null));
            }
            String page = null;
            String content = new String(data, 0, Math.min(data.length, 2048), content_charset);
            String lc_content = content.toLowerCase();
            {
                // first look for xml charset
                // e.g. <?xml version="1.0" encoding="windows-1251" ?>
                int pos1 = lc_content.indexOf("<?xml");
                if (pos1 != -1) {
                    int pos2 = lc_content.indexOf("?>");
                    if (pos2 != -1) {
                        int pos3 = lc_content.indexOf("encoding", pos1);
                        if (pos3 != -1) {
                            pos3 = lc_content.indexOf("\"", pos3);
                        }
                        if (pos3 > pos1 && pos3 < pos2) {
                            pos3++;
                            int pos4 = lc_content.indexOf("\"", pos3);
                            if (pos4 > pos3 && pos4 < pos2) {
                                String encoding = content.substring(pos3, pos4).trim();
                                try {
                                    if (Charset.isSupported(encoding)) {
                                        debugLog("charset from xml tag: " + encoding);
                                        content_charset = encoding;
                                        // some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary
                                        int data_start = pos2;
                                        int max_skip = 64;
                                        while (data[data_start] != '?' && max_skip-- > 0) {
                                            data_start++;
                                        }
                                        page = content.substring(0, pos3) + "utf-8" + content.substring(pos4, pos2) + new String(data, data_start, data.length - data_start, content_charset);
                                    }
                                } catch (Throwable e) {
                                    log("Content type '" + encoding + "' not supported", e);
                                }
                            }
                        }
                    }
                }
            }
            if (page == null) {
                // next look for http-equiv charset
                // e.g. <meta http-equiv="Content-Type" content="text/html; charset=windows-1251" />
                int pos = 0;
                while (true) {
                    int pos1 = lc_content.indexOf("http-equiv", pos);
                    if (pos1 != -1) {
                        int pos2 = lc_content.indexOf(">", pos1);
                        if (pos2 != -1) {
                            int pos3 = lc_content.indexOf("charset", pos1);
                            if (pos3 != -1 && pos3 < pos2) {
                                pos3 = lc_content.indexOf("=", pos3);
                                if (pos3 != -1) {
                                    pos3++;
                                    int pos4 = lc_content.indexOf("\"", pos3);
                                    if (pos4 != -1) {
                                        int pos5 = lc_content.indexOf(";", pos3);
                                        if (pos5 != -1 && pos5 < pos4) {
                                            pos4 = pos5;
                                        }
                                        String encoding = content.substring(pos3, pos4).trim();
                                        try {
                                            if (Charset.isSupported(encoding)) {
                                                debugLog("charset from http-equiv : " + encoding);
                                                content_charset = encoding;
                                                // some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary
                                                int data_start = pos2;
                                                int max_skip = 64;
                                                while (data[data_start] != '?' && max_skip-- > 0) {
                                                    data_start++;
                                                }
                                                page = content.substring(0, pos3) + "utf-8" + content.substring(pos4, pos2) + new String(data, data_start, data.length - data_start, content_charset);
                                            }
                                        } catch (Throwable e) {
                                            log("Content type '" + encoding + "' not supported", e);
                                        }
                                        break;
                                    }
                                }
                            }
                            pos = pos2;
                        } else {
                            break;
                        }
                    } else {
                        break;
                    }
                }
            }
            if (page == null) {
                page = new String(data, content_charset);
            }
            debugLog("page:");
            debugLog(page);
            try {
                Matcher m = baseTagPattern.matcher(page);
                if (m.find()) {
                    basePage = m.group(1);
                    debugLog("base_page: " + basePage);
                }
            } catch (Exception e) {
            // No BASE tag in the page
            }
            URL final_url = initial_url;
            if (mr_rd != null) {
                URL x = (URL) mr_rd.getProperty("URL_URL");
                if (x != null) {
                    final_url = x;
                }
            }
            return (new pageDetails(initial_url, final_url, page));
        } finally {
            if (is != null) {
                is.close();
            }
        }
    } catch (SearchException e) {
        throw (e);
    } catch (Throwable e) {
        // e.printStackTrace();
        debugLog("Failed to load page: " + Debug.getNestedExceptionMessageAndStack(e));
        throw (new SearchException("Failed to load page", e));
    } finally {
        TorrentUtils.setTLSDescription(null);
    }
}
Also used : Matcher(java.util.regex.Matcher) SearchException(com.biglybt.core.metasearch.SearchException) VuzeFile(com.biglybt.core.vuzefile.VuzeFile) SearchParameter(com.biglybt.core.metasearch.SearchParameter) ResourceDownloaderException(com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderException) SearchLoginException(com.biglybt.core.metasearch.SearchLoginException) ResourceDownloader(com.biglybt.pif.utils.resourcedownloader.ResourceDownloader) SearchException(com.biglybt.core.metasearch.SearchException) ResourceDownloaderException(com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderException) SearchLoginException(com.biglybt.core.metasearch.SearchLoginException) ResourceDownloaderFactory(com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderFactory) VuzeFile(com.biglybt.core.vuzefile.VuzeFile) VuzeFileHandler(com.biglybt.core.vuzefile.VuzeFileHandler)

Example 2 with SearchException

use of com.biglybt.core.metasearch.SearchException in project BiglyBT by BiglySoftware.

the class WebEngine method getWebPageContent.

protected pageDetails getWebPageContent(SearchParameter[] searchParameters, Map<String, String> searchContext, String headers, boolean only_if_modified, pageDetailsVerifier verifier) throws SearchException {
    String searchURL = searchURLFormat;
    String lc_url = searchURL.toLowerCase(Locale.US);
    boolean explicit_tor = lc_url.startsWith("tor:");
    boolean user_tor = false;
    if (!explicit_tor) {
        String test = Result.adjustLink(searchURL);
        if (test.startsWith("tor:")) {
            user_tor = true;
        }
    }
    if (explicit_tor || user_tor) {
        // strip out any stuff we probably don't want to send
        searchContext = new HashMap<>();
        String target_resource = explicit_tor ? searchURL.substring(4) : searchURL;
        URL location;
        try {
            location = new URL(target_resource);
        } catch (MalformedURLException e) {
            throw (new SearchException(e));
        }
        Map<String, Object> options = new HashMap<>();
        options.put(AEProxyFactory.PO_PEER_NETWORKS, new String[] { AENetworkClassifier.AT_TOR });
        PluginProxy plugin_proxy = AEProxyFactory.getPluginProxy("Web engine download of '" + target_resource + "'", location, options, true);
        if (plugin_proxy == null) {
            throw (new SearchException("No Tor plugin proxy available for '" + target_resource + "'"));
        }
        URL url = plugin_proxy.getURL();
        Proxy proxy = plugin_proxy.getProxy();
        boolean ok = false;
        try {
            String proxy_host = location.getHost() + (location.getPort() == -1 ? "" : (":" + location.getPort()));
            pageDetails details = getWebPageContentSupport(proxy, proxy_host, url.toExternalForm(), searchParameters, searchContext, headers, only_if_modified);
            if (verifier != null) {
                verifier.verify(details);
            }
            ok = true;
            return (details);
        } finally {
            plugin_proxy.setOK(ok);
        }
    }
    try {
        try {
            URL url = new URL(searchURL);
            if (AENetworkClassifier.categoriseAddress(url.getHost()) != AENetworkClassifier.AT_PUBLIC) {
                // strip out any stuff we probably don't want to send
                searchContext = new HashMap<>();
            }
        } catch (Throwable e) {
        }
        pageDetails details = getWebPageContentSupport(null, null, searchURL, searchParameters, searchContext, headers, only_if_modified);
        if (verifier != null) {
            verifier.verify(details);
        }
        return (details);
    } catch (SearchException e) {
        try {
            URL original_url = new URL(searchURL);
            PluginProxy plugin_proxy = AEProxyFactory.getPluginProxy("getting search results ", original_url);
            if (plugin_proxy == null) {
                throw (e);
            } else {
                URL url = plugin_proxy.getURL();
                Proxy proxy = plugin_proxy.getProxy();
                boolean ok = false;
                try {
                    String proxy_host = original_url.getHost() + (original_url.getPort() == -1 ? "" : (":" + original_url.getPort()));
                    pageDetails details = getWebPageContentSupport(proxy, proxy_host, url.toExternalForm(), searchParameters, searchContext, headers, only_if_modified);
                    if (verifier != null) {
                        verifier.verify(details);
                    }
                    ok = true;
                    return (details);
                } finally {
                    plugin_proxy.setOK(ok);
                }
            }
        } catch (Throwable f) {
            throw (e);
        }
    }
}
Also used : PluginProxy(com.biglybt.core.proxy.AEProxyFactory.PluginProxy) SearchException(com.biglybt.core.metasearch.SearchException) PluginProxy(com.biglybt.core.proxy.AEProxyFactory.PluginProxy) JSONObject(org.json.simple.JSONObject)

Aggregations

SearchException (com.biglybt.core.metasearch.SearchException)2 SearchLoginException (com.biglybt.core.metasearch.SearchLoginException)1 SearchParameter (com.biglybt.core.metasearch.SearchParameter)1 PluginProxy (com.biglybt.core.proxy.AEProxyFactory.PluginProxy)1 VuzeFile (com.biglybt.core.vuzefile.VuzeFile)1 VuzeFileHandler (com.biglybt.core.vuzefile.VuzeFileHandler)1 ResourceDownloader (com.biglybt.pif.utils.resourcedownloader.ResourceDownloader)1 ResourceDownloaderException (com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderException)1 ResourceDownloaderFactory (com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderFactory)1 Matcher (java.util.regex.Matcher)1 JSONObject (org.json.simple.JSONObject)1