Search in sources :

Example 1 with Webpage

use of org.edamontology.pubfetcher.Webpage in project edammap by edamontology.

the class Processor method getProcessedQuery.

public QueryProcessed getProcessedQuery(Query query, QueryType type, PreProcessor pp, Idf queryIdf, FetcherArgs fetcherArgs) {
    QueryProcessed queryProcessed = new QueryProcessed();
    boolean removeBroken = (type == QueryType.Bioconductor);
    if (query.getName() != null) {
        List<String> nameTokens = pp.process(query.getName());
        if (!nameTokens.isEmpty()) {
            queryProcessed.setNameTokens(nameTokens);
            if (queryIdf != null) {
                queryProcessed.setNameIdfs(queryIdf.getIdf(nameTokens));
            }
        }
    }
    if (query.getKeywords() != null) {
        for (Keyword keyword : query.getKeywords()) {
            String keywordValue = keyword.getValue();
            List<String> keywordTokens = null;
            List<Double> keywordIdfs = null;
            if (keywordValue != null) {
                keywordTokens = pp.process(keywordValue);
                if (keywordTokens.isEmpty()) {
                    keywordTokens = null;
                } else if (queryIdf != null) {
                    keywordIdfs = queryIdf.getIdf(keywordTokens);
                }
            }
            queryProcessed.addKeywordTokens(keywordTokens);
            queryProcessed.addKeywordIdfs(keywordIdfs);
        }
    }
    if (query.getDescription() != null) {
        List<String> descriptionTokens = pp.process(query.getDescription());
        if (!descriptionTokens.isEmpty()) {
            queryProcessed.setDescriptionTokens(descriptionTokens);
            if (queryIdf != null) {
                queryProcessed.setDescriptionIdfs(queryIdf.getIdf(descriptionTokens));
            }
        }
    }
    if (query.getWebpageUrls() != null) {
        for (Iterator<Link> it = query.getWebpageUrls().iterator(); it.hasNext(); ) {
            String webpageUrl = it.next().getUrl();
            Webpage webpage = FetcherCommon.getWebpage(webpageUrl, database, fetcher, fetcherArgs);
            List<String> webpageTokens = null;
            List<Double> webpageIdfs = null;
            if (webpage != null && webpage.isUsable(fetcherArgs)) {
                webpageTokens = pp.process(webpage.getTitle() + " " + webpage.getContent());
                if (webpageTokens.isEmpty()) {
                    webpageTokens = null;
                } else if (queryIdf != null) {
                    webpageIdfs = queryIdf.getIdf(webpageTokens);
                }
            }
            if (webpageTokens == null && removeBroken) {
                it.remove();
            } else {
                queryProcessed.addWebpage(webpage);
                queryProcessed.addWebpageTokens(webpageTokens);
                queryProcessed.addWebpageIdfs(webpageIdfs);
            }
        }
    }
    if (query.getDocUrls() != null) {
        for (Iterator<Link> it = query.getDocUrls().iterator(); it.hasNext(); ) {
            String docUrl = it.next().getUrl();
            Webpage doc = FetcherCommon.getDoc(docUrl, database, fetcher, fetcherArgs);
            List<String> docTokens = null;
            List<Double> docIdfs = null;
            if (doc != null && doc.isUsable(fetcherArgs)) {
                docTokens = pp.process(doc.getTitle() + " " + doc.getContent());
                if (docTokens.isEmpty()) {
                    docTokens = null;
                } else if (queryIdf != null) {
                    docIdfs = queryIdf.getIdf(docTokens);
                }
            }
            if (docTokens == null && removeBroken) {
                it.remove();
            } else {
                queryProcessed.addDoc(doc);
                queryProcessed.addDocTokens(docTokens);
                queryProcessed.addDocIdfs(docIdfs);
            }
        }
    }
    if (query.getPublicationIds() != null) {
        for (PublicationIdsQuery publicationIds : query.getPublicationIds()) {
            Publication publication = FetcherCommon.getPublication(publicationIds, database, fetcher, null, fetcherArgs);
            if (publication != null) {
                queryProcessed.addPublication(publication);
                queryProcessed.addProcessedPublication(processPublication(publication, pp, queryIdf, fetcherArgs));
            } else {
                queryProcessed.addPublication(null);
                queryProcessed.addProcessedPublication(null);
            }
        }
    }
    return queryProcessed;
}
Also used : PublicationIdsQuery(org.edamontology.edammap.core.query.PublicationIdsQuery) Webpage(org.edamontology.pubfetcher.Webpage) Keyword(org.edamontology.edammap.core.query.Keyword) Publication(org.edamontology.pubfetcher.Publication) Link(org.edamontology.edammap.core.query.Link)

Example 2 with Webpage

use of org.edamontology.pubfetcher.Webpage in project edammap by edamontology.

the class Report method writeLinks.

private static void writeLinks(FetcherArgs fetcherArgs, Writer writer, List<Link> webpageUrls, List<Webpage> webpages) throws IOException {
    for (int i = 0; i < webpageUrls.size(); ++i) {
        Link webpageUrl = webpageUrls.get(i);
        Webpage webpage = webpages.get(i);
        if (webpageUrl == null || webpageUrl.getUrl() == null || webpageUrl.getUrl().isEmpty())
            continue;
        String status = "";
        if (webpage == null) {
            status = "broken";
        } else if (webpage.isBroken()) {
            status = "broken";
        } else if (webpage.isEmpty()) {
            status = "empty";
        } else if (!webpage.isFinal(fetcherArgs)) {
            status = "non-final";
        }
        writer.write("\t\t\t\t\t<div class=\"with-meta\"><span" + (status.isEmpty() ? "" : " class=\"" + status + "\"") + ">");
        writer.write(FetcherCommon.getLinkHtml(webpageUrl.getUrl()));
        if (webpageUrl.getType() != null && !webpageUrl.getType().isEmpty()) {
            writer.write(" <span class=\"link-type\">(" + FetcherCommon.escapeHtml(webpageUrl.getType()) + ")</span>");
        }
        if (!status.isEmpty()) {
            writer.write(" (" + status + ")");
        }
        writer.write("</span><span class=\"spacer\"></span>");
        if (webpage != null) {
            writer.write("<span class=\"" + (status.isEmpty() ? "info" : "warning") + "\" tabindex=\"0\"></span>");
        }
        writer.write("\n");
        if (webpage != null) {
            writer.write("\t\t\t\t\t\t<div class=\"" + (status.isEmpty() ? "info" : "warning") + "-box\" tabindex=\"0\">\n");
            writer.write("\t\t\t\t\t\t\t<h4><span" + (status.isEmpty() ? "" : " class=\"" + status + "\"") + ">");
            writer.write(FetcherCommon.getLinkHtml(webpageUrl.getUrl()));
            if (webpageUrl.getType() != null && !webpageUrl.getType().isEmpty()) {
                writer.write(" <span class=\"link-type\">(" + FetcherCommon.escapeHtml(webpageUrl.getType()) + ")</span>");
            }
            if (!status.isEmpty()) {
                writer.write(" (" + status + ")");
            }
            writer.write("</span></h4>\n");
            writer.write(webpage.toStringMetaHtml("\t\t\t\t\t\t\t"));
            writer.write("\n");
            writer.write("\t\t\t\t\t\t</div>\n");
        }
        writer.write("\t\t\t\t\t</div>\n");
    }
}
Also used : Webpage(org.edamontology.pubfetcher.Webpage) Link(org.edamontology.edammap.core.query.Link)

Aggregations

Link (org.edamontology.edammap.core.query.Link)2 Webpage (org.edamontology.pubfetcher.Webpage)2 Keyword (org.edamontology.edammap.core.query.Keyword)1 PublicationIdsQuery (org.edamontology.edammap.core.query.PublicationIdsQuery)1 Publication (org.edamontology.pubfetcher.Publication)1