Search in sources :

Example 1 with PageRank

use of com.graphaware.nlp.ml.pagerank.PageRank in project neo4j-nlp by graphaware.

the class TextRankSummarizer method evaluate.

private boolean evaluate(Node annotatedText, int iter, double damp, double threshold, Set<String> stopWords, List<String> admittedPOSs, List<String> forbiddenPOSs) {
    Map<Long, Map<Long, CoOccurrenceItem>> coOccurrence = createGraph(annotatedText, stopWords, admittedPOSs, forbiddenPOSs);
    if (coOccurrence == null || coOccurrence.isEmpty()) {
        LOG.info("Graph of co-occurrences is empty, aborting ...");
        return true;
    }
    PageRank pageRank = new PageRank(database);
    Map<Long, Double> pageRanks = pageRank.run(coOccurrence, iter, damp, threshold);
    if (pageRanks == null) {
        LOG.error("Page ranks not retrieved, aborting evaluate() method ...");
        return false;
    }
    System.out.println("\n >> Ranked sentences:");
    AtomicReference<Integer> currOrder = new AtomicReference<>(1);
    AtomicReference<String> saveQuery = new AtomicReference<>("MATCH (a:AnnotatedText) WHERE id(a) = {id}\n");
    pageRanks.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).forEach(en -> {
        System.out.println("  " + en.getKey() + ": " + en.getValue());
        saveQuery.set(saveQuery.get() + "WITH a\n" + "MATCH (a)-[:CONTAINS_SENTENCE]->(s:Sentence {sentenceNumber: " + en.getKey() + "})\n" + "SET s.summaryRank = " + currOrder.get() + ", s.summaryRelevance = " + en.getValue() + "\n");
        currOrder.set(currOrder.get() + 1);
    });
    // Save results
    // System.out.println(saveQuery.get());
    Map<String, Object> params = new HashMap<>();
    params.put("id", annotatedText.getId());
    try (Transaction tx = database.beginTx()) {
        database.execute(saveQuery.get(), params);
        tx.success();
    } catch (Exception e) {
        LOG.error("Error while saving results: ", e);
    }
    return true;
}
Also used : PageRank(com.graphaware.nlp.ml.pagerank.PageRank) AtomicReference(java.util.concurrent.atomic.AtomicReference)

Example 2 with PageRank

use of com.graphaware.nlp.ml.pagerank.PageRank in project neo4j-nlp by graphaware.

the class TextRank method evaluate.

public TextRankResult evaluate(List<Node> annotatedTexts, String language, int iter, double damp, double threshold) {
    Map<Long, Map<Long, CoOccurrenceItem>> coOccurrence = createCooccurrences(annotatedTexts, language, cooccurrencesFromDependencies);
    if (coOccurrence == null) {
        return TextRankResult.SUCCESS(new HashMap<>());
    }
    PageRank pageRank = new PageRank(database);
    Map<Long, Double> pageRanks = pageRank.run(coOccurrence, iter, damp, threshold);
    if (cooccurrencesFromDependencies) {
        coOccurrence.clear();
        // co-occurrences from natural word flow; needed for merging keywords into key phrases
        coOccurrence = createCooccurrences(annotatedTexts, language, false);
    }
    if (pageRanks == null) {
        LOG.error("Page ranks not retrieved, aborting evaluate() method ...");
        return TextRankResult.FAILED("Page ranks not retrieved");
    }
    int n_oneThird = (int) (pageRanks.size() * topxTags);
    List<Long> topThird = getTopX(pageRanks, n_oneThird);
    LOG.info("Keyword candidates are top " + n_oneThird + " tags from this list:");
    pageRanks.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).forEach(en -> LOG.debug("   " + idToValue.get(en.getKey()) + ": " + en.getValue()));
    Map<String, Object> params = new HashMap<>();
    params.put("posList", admittedPOSs);
    params.put("stopwords", removeStopWords ? stopWords : new ArrayList<>());
    // Detail tag analysis - get start & end positions and related tags (dependencies)
    List<KeywordExtractedItem> keywordsOccurrences = new ArrayList<>();
    Map<Long, KeywordExtractedItem> keywordMap = new HashMap<>();
    List<Long> wrongNEs = new ArrayList<>();
    for (Node node : annotatedTexts) {
        params.put("id", node.getId());
        detailedTagAnalysis(GET_TAG_QUERY, params, pageRanks, keywordsOccurrences, keywordMap, wrongNEs);
    }
    Map<String, Keyword> results = new HashMap<>();
    while (!keywordsOccurrences.isEmpty()) {
        final AtomicReference<KeywordExtractedItem> keywordOccurrence = new AtomicReference<>(keywordsOccurrences.remove(0));
        final AtomicReference<String> currValue = new AtomicReference<>(keywordOccurrence.get().getValue());
        final AtomicReference<Double> currRelevance = new AtomicReference<>(keywordOccurrence.get().getRelevance());
        final AtomicReference<Integer> currNTopRated = new AtomicReference<>(0);
        Set<Long> relTagIDs = getRelTagsIntoDepth(keywordOccurrence.get(), keywordsOccurrences);
        // keep only those that are among top 1/3
        relTagIDs.retainAll(topThird);
        if (// if useDependencies==false, keep only those keywords that are among top 1/3
        !useDependencies && !topThird.contains(keywordOccurrence.get().getTagId()))
            continue;
        if (useDependencies && !topThird.contains(keywordOccurrence.get().getTagId()) && relTagIDs.size() == 0)
            continue;
        Map<String, Keyword> localResults;
        if (topThird.contains(keywordOccurrence.get().getTagId()))
            currNTopRated.set(currNTopRated.get() + 1);
        do {
            int endPosition = keywordOccurrence.get().getEndPosition();
            localResults = checkNextKeyword(keywordOccurrence.get(), coOccurrence, keywordMap);
            if (localResults.size() > 0) {
                keywordOccurrence.set(null);
                localResults.entrySet().stream().forEach((item) -> {
                    KeywordExtractedItem nextKeyword = keywordsOccurrences.get(0);
                    if (nextKeyword != null && nextKeyword.getValue().equalsIgnoreCase(item.getKey()) && (topThird.contains(nextKeyword.getTagId()) || useDependencies) && // crucial condition for graphs from co-occurrences, but very useful also for graphs from dependencies
                    (nextKeyword.getStartPosition() - endPosition) == 1) {
                        String newCurrValue = currValue.get().trim().split("_")[0] + " " + item.getKey();
                        double newCurrRelevance = currRelevance.get() + item.getValue().getRelevance();
                        if (topThird.contains(nextKeyword.getTagId()))
                            currNTopRated.set(currNTopRated.get() + 1);
                        currValue.set(newCurrValue);
                        currRelevance.set(newCurrRelevance);
                        keywordOccurrence.set(nextKeyword);
                        keywordsOccurrences.remove(0);
                    }
                });
            }
        } while (!localResults.isEmpty() && keywordOccurrence.get() != null);
        if (currNTopRated.get() > 0)
            addToResults(currValue.get(), currRelevance.get(), TFIDF_1_1, currNTopRated.get(), results, 1);
    }
    if (expandNEs) {
        // add named entities that contain at least some of the top 1/3 of words
        for (Long key : neExpanded.keySet()) {
            if (neExpanded.get(key).stream().filter(v -> topThird.contains(v)).count() == 0)
                continue;
            if (wrongNEs.contains(key))
                continue;
            // .toLowerCase();
            String keystr = idToValue.get(key);
            double pr = pageRanks.containsKey(key) ? pageRanks.get(key) : 0.;
            if (// set PageRank value of a NE to max value of PR of it's composite words
            pr == 0.)
                pr = (double) pageRanks.entrySet().stream().filter(en -> neExpanded.get(key).contains(en.getKey())).mapToDouble(en -> en.getValue()).max().orElse(0.);
            addToResults(keystr, pr, TFIDF_1_1, (int) (neExpanded.get(key).stream().filter(v -> topThird.contains(v)).count()), results, 1);
        }
    }
    computeTotalOccurrence(results);
    if (cleanKeywords) {
        results = cleanFinalKeywords(results, n_oneThird);
    }
    return TextRankResult.SUCCESS(results);
}
Also used : NLPManager(com.graphaware.nlp.NLPManager) java.util(java.util) DynamicConfiguration(com.graphaware.nlp.configuration.DynamicConfiguration) Log(org.neo4j.logging.Log) TfIdfObject(com.graphaware.nlp.domain.TfIdfObject) PageRank(com.graphaware.nlp.ml.pagerank.PageRank) Keyword(com.graphaware.nlp.domain.Keyword) Collectors(java.util.stream.Collectors) AtomicReference(java.util.concurrent.atomic.AtomicReference) CoOccurrenceItem(com.graphaware.nlp.ml.pagerank.CoOccurrenceItem) Relationships(com.graphaware.nlp.persistence.constants.Relationships) TypeConverter(com.graphaware.nlp.util.TypeConverter) Labels(com.graphaware.nlp.persistence.constants.Labels) org.neo4j.graphdb(org.neo4j.graphdb) LoggerFactory(com.graphaware.common.log.LoggerFactory) Pair(com.graphaware.common.util.Pair) PipelineSpecification(com.graphaware.nlp.dsl.request.PipelineSpecification) Keyword(com.graphaware.nlp.domain.Keyword) PageRank(com.graphaware.nlp.ml.pagerank.PageRank) AtomicReference(java.util.concurrent.atomic.AtomicReference) TfIdfObject(com.graphaware.nlp.domain.TfIdfObject)

Aggregations

PageRank (com.graphaware.nlp.ml.pagerank.PageRank)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 LoggerFactory (com.graphaware.common.log.LoggerFactory)1 Pair (com.graphaware.common.util.Pair)1 NLPManager (com.graphaware.nlp.NLPManager)1 DynamicConfiguration (com.graphaware.nlp.configuration.DynamicConfiguration)1 Keyword (com.graphaware.nlp.domain.Keyword)1 TfIdfObject (com.graphaware.nlp.domain.TfIdfObject)1 PipelineSpecification (com.graphaware.nlp.dsl.request.PipelineSpecification)1 CoOccurrenceItem (com.graphaware.nlp.ml.pagerank.CoOccurrenceItem)1 Labels (com.graphaware.nlp.persistence.constants.Labels)1 Relationships (com.graphaware.nlp.persistence.constants.Relationships)1 TypeConverter (com.graphaware.nlp.util.TypeConverter)1 java.util (java.util)1 Collectors (java.util.stream.Collectors)1 org.neo4j.graphdb (org.neo4j.graphdb)1 Log (org.neo4j.logging.Log)1