use of info.ephyra.search.Result in project lucida by claritylab.
the class SentenceExtractionFilter method apply.
/**
* Splits the answer strings of the results into sentences and creates a
* new result for each sentence.
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
ArrayList<Result> newResults = new ArrayList<Result>();
for (Result result : results) {
// do not apply this filter to results from knowledge annotators
if (result.getScore() == Float.POSITIVE_INFINITY) {
newResults.add(result);
continue;
}
// split the answer string into sentences
String answer = result.getAnswer();
String[] sentences = LingPipe.sentDetect(answer);
// create a new result for each sentence
for (String sentence : sentences) {
Result newResult = result.getCopy();
result.setAnswer(sentence);
newResults.add(newResult);
}
}
return newResults.toArray(new Result[newResults.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class Logger method logResults.
/**
* Logs the results returned by the QA engine.
*
* @param results the results
* @return true, iff logging was successful
*/
public static boolean logResults(Result[] results) {
// logging is disabled or log file is not specified
if (!enabled || logfile == null)
return false;
try {
PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
for (Result result : results) {
out.println("\t<result>");
out.println("\t\t<answer>");
out.println("\t\t\t" + result.getAnswer());
out.println("\t\t</answer>");
out.println("\t\t<score>");
out.println("\t\t\t" + result.getScore());
out.println("\t\t</score>");
if (result.getDocID() != null) {
out.println("\t\t<docid>");
out.println("\t\t\t" + result.getDocID());
out.println("\t\t</docid>");
}
QuestionInterpretation qi = result.getQuery().getInterpretation();
if (qi != null) {
out.println("\t\t<interpretation>");
out.println("\t\t\t<property>");
out.println("\t\t\t\t" + qi.getProperty());
out.println("\t\t\t</property>");
out.println("\t\t\t<target>");
out.println("\t\t\t\t" + qi.getTarget());
out.println("\t\t\t</target>");
for (String context : qi.getContext()) {
out.println("\t\t\t<context>");
out.println("\t\t\t\t" + context);
out.println("\t\t\t</context>");
}
out.println("\t\t</interpretation>");
}
out.println("\t</result>");
}
out.close();
} catch (IOException e) {
return false;
}
return true;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class DuplicateFilter method apply.
/**
* Filters duplicate results and increments the scores of the remaining
* results by the scores of the dropped results.
*
* @param results array of <code>Result</code> objects
* @return array of <code>Result</code> objects without duplicates
*/
public Result[] apply(Result[] results) {
// sort results by their scores in descending order
results = (new ScoreSorterFilter()).apply(results);
// drop results with forbidden answer strings
for (String as : forbidden) for (int i = 0; i < results.length; i++) {
if (results[i] == null || results[i].getScore() == Float.POSITIVE_INFINITY)
continue;
if (results[i].getScore() == Float.NEGATIVE_INFINITY)
break;
if (StringUtils.equalsCommonNorm(as, results[i].getAnswer()))
results[i] = null;
}
// drop duplicates
for (int i = 0; i < results.length - 1; i++) {
if (results[i] == null || results[i].getScore() == Float.POSITIVE_INFINITY)
continue;
if (results[i].getScore() == Float.NEGATIVE_INFINITY)
break;
for (int j = i + 1; j < results.length; j++) {
if (results[j] == null || results[j].getScore() == Float.POSITIVE_INFINITY)
continue;
if (results[j].getScore() == Float.NEGATIVE_INFINITY)
break;
if (StringUtils.equalsCommonNorm(results[i].getAnswer(), results[j].getAnswer())) {
// increment score of higher-scored result
results[i].incScore(results[j].getScore());
// drop lower-scored result
results[j] = null;
}
}
}
// return remaining results
ArrayList<Result> noDups = new ArrayList<Result>();
for (Result result : results) if (result != null)
noDups.add(result);
return noDups.toArray(new Result[noDups.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class DuplicateSnippetFilter method apply.
/**
* Filters duplicate results and increments the scores of the remaining
* results by the scores of the dropped results.
*
* @param results array of <code>Result</code> objects
* @return array of <code>Result</code> objects without duplicates
*/
public Result[] apply(Result[] results) {
// sort results by their scores in descending order
results = (new ScoreSorterFilter()).apply(results);
// return remaining results
ArrayList<Result> rawResults = new ArrayList<Result>();
HashSet<String> contained = new HashSet<String>();
// drop duplicates
for (Result res : results) {
String text = res.getAnswer();
if (text != null) {
// remove meaningless drivel
text = text.toLowerCase().trim();
text = text.replaceAll("(\\'|\\\"|\\`|\\_)", "");
text = SnowballStemmer.stemAllTokens(text);
// produce and store keywords for subset elimination
String[] tokens = NETagger.tokenize(text);
HashSet<String> keywords = new HashSet<String>();
for (String term : tokens) if ((term.length() > 1) && !FunctionWords.lookup(term))
keywords.add(term);
// produce term string
ArrayList<String> sortedKeywords = new ArrayList<String>(keywords);
Collections.sort(sortedKeywords);
StringBuffer keywordString = new StringBuffer();
for (String term : sortedKeywords) keywordString.append(" " + term);
// check if same keywords contained in previous snippet
if (contained.add(keywordString.toString().trim()))
rawResults.add(res);
}
}
return rawResults.toArray(new Result[rawResults.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class FactoidsFromPredicatesFilter method apply.
/**
* Extracts factoids from the predicates withing the answer strings of the
* <code>Result</code> objects and creates a new <code>Result</code> for
* each extracted unique answer.
*
* @param results array of <code>Result</code> objects containing predicates
* @return array of <code>Result</code> objects containing factoids
*/
public Result[] apply(Result[] results) {
// old results that are passed along the pipeline
ArrayList<Result> oldResults = new ArrayList<Result>();
// extracted factoid answers and corresponding results
Hashtable<String, Result> factoids = new Hashtable<String, Result>();
// extracted factoid answers and maximum weights of predicates
Hashtable<String, Double> maxScores = new Hashtable<String, Double>();
for (Result result : results) {
// only apply this filter to results for the semantic parsing
// approach
Query query = result.getQuery();
Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
if (!query.extractWith(ID) || ps.length == 0 || result.getScore() != 0) {
oldResults.add(result);
continue;
}
Predicate p = result.getPredicate();
Predicate questionP = p.getSimPredicate();
double simScore = p.getSimScore();
Map<String, String[]> nes = result.getNes();
// get answer strings
ArrayList<String> answers = new ArrayList<String>();
if (nes != null) {
// - allow entities in all arguments
for (String ne : nes.keySet()) for (String arg : p.getArgs()) if (arg.contains(ne)) {
answers.add(ne);
break;
}
// - allow entities in missing arguments only
// for (String ne : nes.keySet())
// for (String missing : questionP.getMissingArgs()) {
// String arg = p.get(missing);
// if (arg != null && arg.contains(ne)) {
// answers.add(ne);
// break;
// }
// }
} else {
// arguments as factoid answers
for (String missing : questionP.getMissingArgs()) {
String arg = p.get(missing);
if (arg != null)
answers.add(arg);
}
}
// create result objects
for (String answer : answers) {
String norm = StringUtils.normalize(answer);
Result factoid = factoids.get(norm);
if (factoid == null) {
// new answer
// query, doc ID and sentence can be ambiguous
factoid = new Result(answer, result.getQuery(), result.getDocID());
factoid.setSentence(result.getSentence());
factoid.addExtractionTechnique(ID);
factoids.put(norm, factoid);
maxScores.put(norm, simScore);
} else if (simScore > maxScores.get(norm)) {
// remember document ID of predicate with highest score
factoid.setDocID(result.getDocID());
maxScores.put(norm, simScore);
}
if (nes != null)
for (String neType : nes.get(answer)) factoid.addNeType(neType);
factoid.incScore((float) simScore);
}
}
// keep old results
Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
Result[] allResults = new Result[oldResults.size() + newResults.length];
oldResults.toArray(allResults);
for (int i = 0; i < newResults.length; i++) allResults[oldResults.size() + i] = newResults[i];
return allResults;
}
Aggregations