use of org.edamontology.pubfetcher.Database in project edammap by edamontology.
the class PubMedApps method beforeAfter.
private static void beforeAfter(PreProcessorArgs preProcessorArgs, String queryIdf, String database, List<String> pubFile) throws IOException {
PreProcessor preProcessor = new PreProcessor(preProcessorArgs);
Idf idf = new Idf(queryIdf);
List<Publication> publications = getPublications(database, pubFile);
Map<String, Integer> before = new HashMap<>();
Map<String, Integer> after = new HashMap<>();
Map<String, Integer> all = new HashMap<>();
Map<String, Double> allBeforeScores = new HashMap<>();
int allBeforeScoresSum = 0;
Map<String, Double> allAfterScores = new HashMap<>();
int allAfterScoresSum = 0;
for (Publication publication : publications) {
String toolTitle = publication.getTitle().getContent();
Matcher titleSeparator = TITLE_SEPARATOR.matcher(toolTitle);
if (titleSeparator.find()) {
toolTitle = toolTitle.substring(0, titleSeparator.start()).trim();
} else {
continue;
}
List<String> toolTitleProcessedWords = preProcessor.process(toolTitle);
if (toolTitleProcessedWords.size() != 1)
continue;
String toolTitleProcessed = toolTitleProcessedWords.get(0);
List<String> abstractSentences = preProcessor.sentences(preProcessor.removeLinks(publication.getAbstract().getContent()));
List<List<String>> processed = new ArrayList<>();
for (String sentence : abstractSentences) {
processed.add(preProcessor.process(sentence));
}
Map<String, Double> scores = new HashMap<>();
for (List<String> sentence : processed) {
for (String word : sentence) {
scores.merge(word, Math.pow(idf.getIdf(word), QUERY_IDF_SCALING), Double::sum);
}
}
for (List<String> sentenceProcessed : processed) {
for (int i = 0; i < sentenceProcessed.size(); ++i) {
if (sentenceProcessed.get(i).equals(toolTitleProcessed)) {
if (i - 1 >= 0)
before.merge(sentenceProcessed.get(i - 1), 1, Integer::sum);
if (i - 2 >= 0)
before.merge(sentenceProcessed.get(i - 2), 1, Integer::sum);
if (i + 1 < sentenceProcessed.size())
after.merge(sentenceProcessed.get(i + 1), 1, Integer::sum);
if (i + 2 < sentenceProcessed.size())
after.merge(sentenceProcessed.get(i + 2), 1, Integer::sum);
}
}
}
for (List<String> sentenceProcessed : processed) {
for (int i = 0; i < sentenceProcessed.size(); ++i) {
String wordProcessed = sentenceProcessed.get(i);
all.merge(wordProcessed, 1, Integer::sum);
if (i - 1 >= 0) {
allBeforeScores.merge(wordProcessed, scores.get(sentenceProcessed.get(i - 1)), Double::sum);
++allBeforeScoresSum;
}
if (i - 2 >= 0) {
allBeforeScores.merge(wordProcessed, scores.get(sentenceProcessed.get(i - 2)), Double::sum);
++allBeforeScoresSum;
}
if (i + 1 < sentenceProcessed.size()) {
allAfterScores.merge(wordProcessed, scores.get(sentenceProcessed.get(i + 1)), Double::sum);
++allAfterScoresSum;
}
if (i + 2 < sentenceProcessed.size()) {
allAfterScores.merge(wordProcessed, scores.get(sentenceProcessed.get(i + 2)), Double::sum);
++allAfterScoresSum;
}
}
}
}
Map<String, Integer> beforeSorted = before.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (k, v) -> {
throw new AssertionError();
}, LinkedHashMap::new));
System.out.println("BEFORE_TOOL_TITLE\tCOUNT\tTOTAL\tPRECISION\tAVERAGE_SCORE\tPRECISION/AVERAGE_SCORE");
for (Map.Entry<String, Integer> bs : beforeSorted.entrySet()) {
String word = bs.getKey();
int count = bs.getValue();
int total = all.get(word);
double precision = count / (double) total;
Double totalScore = allAfterScores.get(word);
double averageScore = (totalScore != null ? totalScore / allAfterScoresSum : 0);
System.out.printf(Locale.ROOT, "%16s\t%d\t%d\t%.6f\t%.6f\t%8.1f\n", word, count, total, precision, averageScore, precision / averageScore);
}
System.out.println();
Map<String, Integer> afterSorted = after.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (k, v) -> {
throw new AssertionError();
}, LinkedHashMap::new));
System.out.println("AFTER_TOOL_TITLE\tCOUNT\tTOTAL\tPRECISION\tAVERAGE_SCORE\tPRECISION/AVERAGE_SCORE");
for (Map.Entry<String, Integer> as : afterSorted.entrySet()) {
String word = as.getKey();
int count = as.getValue();
int total = all.get(word);
double precision = count / (double) total;
Double totalScore = allBeforeScores.get(word);
double averageScore = (totalScore != null ? totalScore / allBeforeScoresSum : 0);
System.out.printf(Locale.ROOT, "%16s\t%d\t%d\t%.6f\t%.6f\t%8.1f\n", word, count, total, precision, averageScore, precision / averageScore);
}
}
Aggregations