Search in sources :

Example 1 with SearchParameter

use of com.biglybt.core.metasearch.SearchParameter in project BiglyBT by BiglySoftware.

the class SubscriptionDownloader method download.

protected void download() throws SubscriptionException {
    log("Downloading");
    Map map = JSONUtils.decodeJSON(subs.getJSON());
    Long engine_id = (Long) map.get("engine_id");
    String search_term = (String) map.get("search_term");
    String networks = (String) map.get("networks");
    Map filters = (Map) map.get("filters");
    Engine engine = manager.getEngine(subs, map, false);
    if (engine == null) {
        throw (new SubscriptionException("Download failed, search engine " + engine_id + " not found"));
    }
    List sps = new ArrayList();
    if (search_term != null) {
        sps.add(new SearchParameter("s", search_term));
        log("    Using search term '" + search_term + "' for engine " + engine.getString());
    }
    if (networks != null && networks.length() > 0) {
        sps.add(new SearchParameter("n", networks));
    }
    /*
		if ( mature != null ){

			sps.add( new SearchParameter( "m", mature.toString()));
		}
		*/
    SearchParameter[] parameters = (SearchParameter[]) sps.toArray(new SearchParameter[sps.size()]);
    SubscriptionHistoryImpl history = (SubscriptionHistoryImpl) subs.getHistory();
    try {
        Map context = new HashMap();
        context.put(Engine.SC_SOURCE, "subscription");
        Result[] results = engine.search(parameters, context, -1, -1, null, null);
        log("    Got " + results.length + " results");
        SubscriptionResultFilterImpl result_filter = new SubscriptionResultFilterImpl(subs, filters);
        results = result_filter.filter(results);
        log("    Post-filter: " + results.length + " results");
        SubscriptionResultImpl[] s_results = new SubscriptionResultImpl[results.length];
        for (int i = 0; i < results.length; i++) {
            SubscriptionResultImpl s_result = new SubscriptionResultImpl(history, results[i]);
            s_results[i] = s_result;
        }
        SubscriptionResultImpl[] all_results = history.reconcileResults(engine, s_results);
        checkAutoDownload(all_results);
        history.setLastError(null, false);
    } catch (Throwable e) {
        log("    Download failed", e);
        history.setLastError(Debug.getNestedExceptionMessage(e), e instanceof SearchLoginException);
        throw (new SubscriptionException("Search failed", e));
    }
}
Also used : SubscriptionException(com.biglybt.core.subs.SubscriptionException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SearchLoginException(com.biglybt.core.metasearch.SearchLoginException) Result(com.biglybt.core.metasearch.Result) List(java.util.List) ArrayList(java.util.ArrayList) SearchParameter(com.biglybt.core.metasearch.SearchParameter) Map(java.util.Map) HashMap(java.util.HashMap) Engine(com.biglybt.core.metasearch.Engine)

Example 2 with SearchParameter

use of com.biglybt.core.metasearch.SearchParameter in project BiglyBT by BiglySoftware.

the class WebEngine method getWebPageContentSupport.

private pageDetails getWebPageContentSupport(Proxy proxy, String proxy_host, String searchURL, SearchParameter[] searchParameters, Map<String, String> searchContext, String headers, boolean only_if_modified) throws SearchException {
    try {
        TorrentUtils.setTLSDescription("Search: " + getName());
        if (requiresLogin()) {
            throw new SearchLoginException("login required");
        }
        boolean vuze_file = searchURL.toLowerCase().startsWith("vuze:");
        if (!vuze_file) {
            String[] from_strs = new String[searchParameters.length];
            String[] to_strs = new String[searchParameters.length];
            for (int i = 0; i < searchParameters.length; i++) {
                SearchParameter parameter = searchParameters[i];
                from_strs[i] = "%" + parameter.getMatchPattern();
                to_strs[i] = URLEncoder.encode(parameter.getValue(), "UTF-8");
            }
            searchURL = GeneralUtils.replaceAll(searchURL, from_strs, to_strs);
            Iterator<Map.Entry<String, String>> it = searchContext.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry<String, String> entry = it.next();
                String key = entry.getKey();
                if (supportsContext(key)) {
                    if (searchURL.indexOf('?') == -1) {
                        searchURL += "?";
                    } else {
                        searchURL += "&";
                    }
                    String value = entry.getValue();
                    searchURL += key + "=" + URLEncoder.encode(value, "UTF-8");
                }
            }
        }
        // System.out.println(searchURL);
        // hack to support POST by encoding into URL
        // http://xxxx/index.php?main=search&azmethod=post_basic:SearchString1=%s&SearchString=&search=Search
        ResourceDownloaderFactory rdf = StaticUtilities.getResourceDownloaderFactory();
        URL initial_url;
        ResourceDownloader initial_url_rd;
        int post_pos = searchURL.indexOf("azmethod=");
        if (post_pos > 0) {
            String post_params = searchURL.substring(post_pos + 9);
            searchURL = searchURL.substring(0, post_pos - 1);
            debugLog("search_url: " + searchURL + ", post=" + post_params);
            initial_url = new URL(searchURL);
            int sep = post_params.indexOf(':');
            String type = post_params.substring(0, sep);
            if (!type.equals("post_basic")) {
                throw (new SearchException("Only basic type supported"));
            }
            post_params = post_params.substring(sep + 1);
            if (proxy == null) {
                initial_url_rd = rdf.create(initial_url, post_params);
            } else {
                initial_url_rd = rdf.create(initial_url, post_params, proxy);
            }
            initial_url_rd.setProperty("URL_Content-Type", "application/x-www-form-urlencoded");
        } else {
            debugLog("search_url: " + searchURL);
            initial_url = new URL(searchURL);
            if (proxy == null) {
                initial_url_rd = rdf.create(initial_url);
            } else {
                initial_url_rd = rdf.create(initial_url, proxy);
            }
        }
        if (proxy_host != null) {
            initial_url_rd.setProperty("URL_HOST", proxy_host);
        }
        setHeaders(initial_url_rd, headers);
        if (needsAuth && local_cookies != null) {
            initial_url_rd.setProperty("URL_Cookie", local_cookies);
        } else if (fullCookies != null && fullCookies.length() > 0) {
            initial_url_rd.setProperty("URL_Cookie", fullCookies);
        }
        if (only_if_modified) {
            String last_modified = getLocalString(LD_LAST_MODIFIED);
            String etag = getLocalString(LD_ETAG);
            if (last_modified != null) {
                initial_url_rd.setProperty("URL_If-Modified-Since", last_modified);
            }
            if (etag != null) {
                initial_url_rd.setProperty("URL_If-None-Match", etag);
            }
        }
        InputStream is = null;
        try {
            String content_charset = "UTF-8";
            ResourceDownloader mr_rd = null;
            if (initial_url.getProtocol().equalsIgnoreCase("file")) {
                // handle file://c:/ - map to file:/c:/
                String str = initial_url.toExternalForm();
                if (initial_url.getAuthority() != null) {
                    str = str.replaceFirst("://", ":/");
                }
                int pos = str.indexOf('?');
                if (pos != -1) {
                    str = str.substring(0, pos);
                }
                is = new FileInputStream(new File(new URL(str).toURI()));
            } else {
                if (proxy == null) {
                    initial_url_rd.setProperty("URL_Connect_Timeout", 10 * 1000);
                    initial_url_rd.setProperty("URL_Read_Timeout", 10 * 1000);
                }
                mr_rd = rdf.getMetaRefreshDownloader(initial_url_rd);
                try {
                    is = mr_rd.download();
                } catch (ResourceDownloaderException e) {
                    Long response = (Long) mr_rd.getProperty("URL_HTTP_Response");
                    if (response != null && response.longValue() == 304) {
                        return (new pageDetails(initial_url, initial_url, ""));
                    } else {
                        throw (e);
                    }
                }
                if (needsAuth) {
                    List cookies_list = (List) mr_rd.getProperty("URL_Set-Cookie");
                    List cookies_set = new ArrayList();
                    if (cookies_list != null) {
                        for (int i = 0; i < cookies_list.size(); i++) {
                            String[] cookies = ((String) cookies_list.get(i)).split(";");
                            for (int j = 0; j < cookies.length; j++) {
                                String cookie = cookies[j].trim();
                                if (cookie.indexOf('=') != -1) {
                                    cookies_set.add(cookie);
                                }
                            }
                        }
                    }
                // well, not much we can do with the cookies anyway as in general the ones
                // set are the ones missing/expired, not the existing ones. That is, we can't
                // deduce anything from the fact that a required cookie is not 'set' here
                // the most we could do is catch a server that explicitly deleted invalid
                // cookies by expiring it, but I doubt this is a common practice.
                // Also note the complexity of cookie syntax
                // Set-Cookie: old standard using expires=, new using MaxAge
                // Set-Cookie2:
                // Maybe use http://jcookie.sourceforge.net/ if needed
                }
                if (only_if_modified) {
                    String last_modified = extractProperty(mr_rd.getProperty("URL_Last-Modified"));
                    String etag = extractProperty(mr_rd.getProperty("URL_ETag"));
                    if (last_modified != null) {
                        setLocalString(LD_LAST_MODIFIED, last_modified);
                    }
                    if (etag != null) {
                        setLocalString(LD_ETAG, etag);
                    }
                }
                List cts = (List) mr_rd.getProperty("URL_Content-Type");
                if (cts != null && cts.size() > 0) {
                    String content_type = (String) cts.get(0);
                    int pos = content_type.toLowerCase().indexOf("charset");
                    if (pos != -1) {
                        content_type = content_type.substring(pos + 1);
                        pos = content_type.indexOf('=');
                        if (pos != -1) {
                            content_type = content_type.substring(pos + 1).trim();
                            pos = content_type.indexOf(';');
                            if (pos != -1) {
                                content_type = content_type.substring(0, pos).trim();
                            }
                            if (content_type.startsWith("\"")) {
                                content_type = content_type.substring(1).trim();
                            }
                            if (content_type.endsWith("\"")) {
                                content_type = content_type.substring(0, content_type.length() - 1).trim();
                            }
                            try {
                                if (Charset.isSupported(content_type)) {
                                    debugLog("charset: " + content_type);
                                    content_charset = content_type;
                                }
                            } catch (Throwable e) {
                                try {
                                    // handle lowercase 'utf-8' for example
                                    content_type = content_type.toUpperCase();
                                    if (Charset.isSupported(content_type)) {
                                        debugLog("charset: " + content_type);
                                        content_charset = content_type;
                                    }
                                } catch (Throwable f) {
                                    log("Content type '" + content_type + "' not supported", f);
                                }
                            }
                        }
                    }
                }
            }
            ByteArrayOutputStream baos = new ByteArrayOutputStream(8192);
            byte[] buffer = new byte[8192];
            while (true) {
                int len = is.read(buffer);
                if (len <= 0) {
                    break;
                }
                baos.write(buffer, 0, len);
            }
            byte[] data = baos.toByteArray();
            if (vuze_file) {
                try {
                    VuzeFileHandler vfh = VuzeFileHandler.getSingleton();
                    VuzeFile vf = vfh.loadVuzeFile(data);
                    vfh.handleFiles(new VuzeFile[] { vf }, VuzeFileComponent.COMP_TYPE_NONE);
                } catch (Throwable e) {
                    Debug.out(e);
                }
                return (new pageDetails(initial_url, initial_url, null));
            }
            String page = null;
            String content = new String(data, 0, Math.min(data.length, 2048), content_charset);
            String lc_content = content.toLowerCase();
            {
                // first look for xml charset
                // e.g. <?xml version="1.0" encoding="windows-1251" ?>
                int pos1 = lc_content.indexOf("<?xml");
                if (pos1 != -1) {
                    int pos2 = lc_content.indexOf("?>");
                    if (pos2 != -1) {
                        int pos3 = lc_content.indexOf("encoding", pos1);
                        if (pos3 != -1) {
                            pos3 = lc_content.indexOf("\"", pos3);
                        }
                        if (pos3 > pos1 && pos3 < pos2) {
                            pos3++;
                            int pos4 = lc_content.indexOf("\"", pos3);
                            if (pos4 > pos3 && pos4 < pos2) {
                                String encoding = content.substring(pos3, pos4).trim();
                                try {
                                    if (Charset.isSupported(encoding)) {
                                        debugLog("charset from xml tag: " + encoding);
                                        content_charset = encoding;
                                        // some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary
                                        int data_start = pos2;
                                        int max_skip = 64;
                                        while (data[data_start] != '?' && max_skip-- > 0) {
                                            data_start++;
                                        }
                                        page = content.substring(0, pos3) + "utf-8" + content.substring(pos4, pos2) + new String(data, data_start, data.length - data_start, content_charset);
                                    }
                                } catch (Throwable e) {
                                    log("Content type '" + encoding + "' not supported", e);
                                }
                            }
                        }
                    }
                }
            }
            if (page == null) {
                // next look for http-equiv charset
                // e.g. <meta http-equiv="Content-Type" content="text/html; charset=windows-1251" />
                int pos = 0;
                while (true) {
                    int pos1 = lc_content.indexOf("http-equiv", pos);
                    if (pos1 != -1) {
                        int pos2 = lc_content.indexOf(">", pos1);
                        if (pos2 != -1) {
                            int pos3 = lc_content.indexOf("charset", pos1);
                            if (pos3 != -1 && pos3 < pos2) {
                                pos3 = lc_content.indexOf("=", pos3);
                                if (pos3 != -1) {
                                    pos3++;
                                    int pos4 = lc_content.indexOf("\"", pos3);
                                    if (pos4 != -1) {
                                        int pos5 = lc_content.indexOf(";", pos3);
                                        if (pos5 != -1 && pos5 < pos4) {
                                            pos4 = pos5;
                                        }
                                        String encoding = content.substring(pos3, pos4).trim();
                                        try {
                                            if (Charset.isSupported(encoding)) {
                                                debugLog("charset from http-equiv : " + encoding);
                                                content_charset = encoding;
                                                // some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary
                                                int data_start = pos2;
                                                int max_skip = 64;
                                                while (data[data_start] != '?' && max_skip-- > 0) {
                                                    data_start++;
                                                }
                                                page = content.substring(0, pos3) + "utf-8" + content.substring(pos4, pos2) + new String(data, data_start, data.length - data_start, content_charset);
                                            }
                                        } catch (Throwable e) {
                                            log("Content type '" + encoding + "' not supported", e);
                                        }
                                        break;
                                    }
                                }
                            }
                            pos = pos2;
                        } else {
                            break;
                        }
                    } else {
                        break;
                    }
                }
            }
            if (page == null) {
                page = new String(data, content_charset);
            }
            debugLog("page:");
            debugLog(page);
            try {
                Matcher m = baseTagPattern.matcher(page);
                if (m.find()) {
                    basePage = m.group(1);
                    debugLog("base_page: " + basePage);
                }
            } catch (Exception e) {
            // No BASE tag in the page
            }
            URL final_url = initial_url;
            if (mr_rd != null) {
                URL x = (URL) mr_rd.getProperty("URL_URL");
                if (x != null) {
                    final_url = x;
                }
            }
            return (new pageDetails(initial_url, final_url, page));
        } finally {
            if (is != null) {
                is.close();
            }
        }
    } catch (SearchException e) {
        throw (e);
    } catch (Throwable e) {
        // e.printStackTrace();
        debugLog("Failed to load page: " + Debug.getNestedExceptionMessageAndStack(e));
        throw (new SearchException("Failed to load page", e));
    } finally {
        TorrentUtils.setTLSDescription(null);
    }
}
Also used : Matcher(java.util.regex.Matcher) SearchException(com.biglybt.core.metasearch.SearchException) VuzeFile(com.biglybt.core.vuzefile.VuzeFile) SearchParameter(com.biglybt.core.metasearch.SearchParameter) ResourceDownloaderException(com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderException) SearchLoginException(com.biglybt.core.metasearch.SearchLoginException) ResourceDownloader(com.biglybt.pif.utils.resourcedownloader.ResourceDownloader) SearchException(com.biglybt.core.metasearch.SearchException) ResourceDownloaderException(com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderException) SearchLoginException(com.biglybt.core.metasearch.SearchLoginException) ResourceDownloaderFactory(com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderFactory) VuzeFile(com.biglybt.core.vuzefile.VuzeFile) VuzeFileHandler(com.biglybt.core.vuzefile.VuzeFileHandler)

Aggregations

SearchLoginException (com.biglybt.core.metasearch.SearchLoginException)2 SearchParameter (com.biglybt.core.metasearch.SearchParameter)2 Engine (com.biglybt.core.metasearch.Engine)1 Result (com.biglybt.core.metasearch.Result)1 SearchException (com.biglybt.core.metasearch.SearchException)1 SubscriptionException (com.biglybt.core.subs.SubscriptionException)1 VuzeFile (com.biglybt.core.vuzefile.VuzeFile)1 VuzeFileHandler (com.biglybt.core.vuzefile.VuzeFileHandler)1 ResourceDownloader (com.biglybt.pif.utils.resourcedownloader.ResourceDownloader)1 ResourceDownloaderException (com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderException)1 ResourceDownloaderFactory (com.biglybt.pif.utils.resourcedownloader.ResourceDownloaderFactory)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Matcher (java.util.regex.Matcher)1