use of com.graphaware.nlp.ml.pagerank.PageRank in project neo4j-nlp by graphaware.
the class TextRankSummarizer method evaluate.
private boolean evaluate(Node annotatedText, int iter, double damp, double threshold, Set<String> stopWords, List<String> admittedPOSs, List<String> forbiddenPOSs) {
Map<Long, Map<Long, CoOccurrenceItem>> coOccurrence = createGraph(annotatedText, stopWords, admittedPOSs, forbiddenPOSs);
if (coOccurrence == null || coOccurrence.isEmpty()) {
LOG.info("Graph of co-occurrences is empty, aborting ...");
return true;
}
PageRank pageRank = new PageRank(database);
Map<Long, Double> pageRanks = pageRank.run(coOccurrence, iter, damp, threshold);
if (pageRanks == null) {
LOG.error("Page ranks not retrieved, aborting evaluate() method ...");
return false;
}
System.out.println("\n >> Ranked sentences:");
AtomicReference<Integer> currOrder = new AtomicReference<>(1);
AtomicReference<String> saveQuery = new AtomicReference<>("MATCH (a:AnnotatedText) WHERE id(a) = {id}\n");
pageRanks.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).forEach(en -> {
System.out.println(" " + en.getKey() + ": " + en.getValue());
saveQuery.set(saveQuery.get() + "WITH a\n" + "MATCH (a)-[:CONTAINS_SENTENCE]->(s:Sentence {sentenceNumber: " + en.getKey() + "})\n" + "SET s.summaryRank = " + currOrder.get() + ", s.summaryRelevance = " + en.getValue() + "\n");
currOrder.set(currOrder.get() + 1);
});
// Save results
// System.out.println(saveQuery.get());
Map<String, Object> params = new HashMap<>();
params.put("id", annotatedText.getId());
try (Transaction tx = database.beginTx()) {
database.execute(saveQuery.get(), params);
tx.success();
} catch (Exception e) {
LOG.error("Error while saving results: ", e);
}
return true;
}
use of com.graphaware.nlp.ml.pagerank.PageRank in project neo4j-nlp by graphaware.
the class TextRank method evaluate.
public TextRankResult evaluate(List<Node> annotatedTexts, String language, int iter, double damp, double threshold) {
Map<Long, Map<Long, CoOccurrenceItem>> coOccurrence = createCooccurrences(annotatedTexts, language, cooccurrencesFromDependencies);
if (coOccurrence == null) {
return TextRankResult.SUCCESS(new HashMap<>());
}
PageRank pageRank = new PageRank(database);
Map<Long, Double> pageRanks = pageRank.run(coOccurrence, iter, damp, threshold);
if (cooccurrencesFromDependencies) {
coOccurrence.clear();
// co-occurrences from natural word flow; needed for merging keywords into key phrases
coOccurrence = createCooccurrences(annotatedTexts, language, false);
}
if (pageRanks == null) {
LOG.error("Page ranks not retrieved, aborting evaluate() method ...");
return TextRankResult.FAILED("Page ranks not retrieved");
}
int n_oneThird = (int) (pageRanks.size() * topxTags);
List<Long> topThird = getTopX(pageRanks, n_oneThird);
LOG.info("Keyword candidates are top " + n_oneThird + " tags from this list:");
pageRanks.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).forEach(en -> LOG.debug(" " + idToValue.get(en.getKey()) + ": " + en.getValue()));
Map<String, Object> params = new HashMap<>();
params.put("posList", admittedPOSs);
params.put("stopwords", removeStopWords ? stopWords : new ArrayList<>());
// Detail tag analysis - get start & end positions and related tags (dependencies)
List<KeywordExtractedItem> keywordsOccurrences = new ArrayList<>();
Map<Long, KeywordExtractedItem> keywordMap = new HashMap<>();
List<Long> wrongNEs = new ArrayList<>();
for (Node node : annotatedTexts) {
params.put("id", node.getId());
detailedTagAnalysis(GET_TAG_QUERY, params, pageRanks, keywordsOccurrences, keywordMap, wrongNEs);
}
Map<String, Keyword> results = new HashMap<>();
while (!keywordsOccurrences.isEmpty()) {
final AtomicReference<KeywordExtractedItem> keywordOccurrence = new AtomicReference<>(keywordsOccurrences.remove(0));
final AtomicReference<String> currValue = new AtomicReference<>(keywordOccurrence.get().getValue());
final AtomicReference<Double> currRelevance = new AtomicReference<>(keywordOccurrence.get().getRelevance());
final AtomicReference<Integer> currNTopRated = new AtomicReference<>(0);
Set<Long> relTagIDs = getRelTagsIntoDepth(keywordOccurrence.get(), keywordsOccurrences);
// keep only those that are among top 1/3
relTagIDs.retainAll(topThird);
if (// if useDependencies==false, keep only those keywords that are among top 1/3
!useDependencies && !topThird.contains(keywordOccurrence.get().getTagId()))
continue;
if (useDependencies && !topThird.contains(keywordOccurrence.get().getTagId()) && relTagIDs.size() == 0)
continue;
Map<String, Keyword> localResults;
if (topThird.contains(keywordOccurrence.get().getTagId()))
currNTopRated.set(currNTopRated.get() + 1);
do {
int endPosition = keywordOccurrence.get().getEndPosition();
localResults = checkNextKeyword(keywordOccurrence.get(), coOccurrence, keywordMap);
if (localResults.size() > 0) {
keywordOccurrence.set(null);
localResults.entrySet().stream().forEach((item) -> {
KeywordExtractedItem nextKeyword = keywordsOccurrences.get(0);
if (nextKeyword != null && nextKeyword.getValue().equalsIgnoreCase(item.getKey()) && (topThird.contains(nextKeyword.getTagId()) || useDependencies) && // crucial condition for graphs from co-occurrences, but very useful also for graphs from dependencies
(nextKeyword.getStartPosition() - endPosition) == 1) {
String newCurrValue = currValue.get().trim().split("_")[0] + " " + item.getKey();
double newCurrRelevance = currRelevance.get() + item.getValue().getRelevance();
if (topThird.contains(nextKeyword.getTagId()))
currNTopRated.set(currNTopRated.get() + 1);
currValue.set(newCurrValue);
currRelevance.set(newCurrRelevance);
keywordOccurrence.set(nextKeyword);
keywordsOccurrences.remove(0);
}
});
}
} while (!localResults.isEmpty() && keywordOccurrence.get() != null);
if (currNTopRated.get() > 0)
addToResults(currValue.get(), currRelevance.get(), TFIDF_1_1, currNTopRated.get(), results, 1);
}
if (expandNEs) {
// add named entities that contain at least some of the top 1/3 of words
for (Long key : neExpanded.keySet()) {
if (neExpanded.get(key).stream().filter(v -> topThird.contains(v)).count() == 0)
continue;
if (wrongNEs.contains(key))
continue;
// .toLowerCase();
String keystr = idToValue.get(key);
double pr = pageRanks.containsKey(key) ? pageRanks.get(key) : 0.;
if (// set PageRank value of a NE to max value of PR of it's composite words
pr == 0.)
pr = (double) pageRanks.entrySet().stream().filter(en -> neExpanded.get(key).contains(en.getKey())).mapToDouble(en -> en.getValue()).max().orElse(0.);
addToResults(keystr, pr, TFIDF_1_1, (int) (neExpanded.get(key).stream().filter(v -> topThird.contains(v)).count()), results, 1);
}
}
computeTotalOccurrence(results);
if (cleanKeywords) {
results = cleanFinalKeywords(results, n_oneThird);
}
return TextRankResult.SUCCESS(results);
}
Aggregations