use of info.ephyra.search.Result in project lucida by claritylab.
the class OpenEphyraServer method askFactoid.
/**
* Asks Ephyra a factoid question and returns up to <code>maxAnswers</code>
* results that have a score of at least <code>absThresh</code>.
*
* @param question factoid question
* @param maxAnswers maximum number of answers
* @param absThresh absolute threshold for scores
* @return array of results
*/
public Result[] askFactoid(String question, int maxAnswers, float absThresh) {
// initialize pipeline
initFactoid();
// analyze question
MsgPrinter.printAnalyzingQuestion();
AnalyzedQuestion aq = QuestionAnalysis.analyze(question);
// get answers
Result[] results = runPipeline(aq, maxAnswers, absThresh);
return results;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class AdaptiveNumberOfKeywordsFilter method apply.
/**
* Score result snippets according to the number of keywords (target terms)
* they contain. Within two snippets from the same document, transfer score
* from a snippet to the subsequent one if the former contains many of the
* keywords. The idea is that a subsequent snippet might use a pronoun for
* the target (thus not contain the target itself), but provide useful
* information anyway.
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
// raw results returned by the searchers
ArrayList<Result> rawResults = new ArrayList<Result>();
int lastScore = 0;
String lastDocID = "";
int keywordCount = 1;
for (Result result : results) {
if (result.getScore() != Float.NEGATIVE_INFINITY) {
String[] keywords = NETagger.tokenize(result.getQuery().getQueryString());
for (int k = 0; k < keywords.length; k++) keywords[k] = SnowballStemmer.stem(keywords[k]);
int k = keywords.length;
keywordCount = k;
String[] wordsInResult = NETagger.tokenize(result.getAnswer());
for (int r = 0; r < wordsInResult.length; r++) wordsInResult[r] = SnowballStemmer.stem(wordsInResult[r]);
int m = getNumberOfMatches(keywords, wordsInResult);
if (m >= Math.floor(Math.sqrt(k - 1) + 1)) {
// remember doc ID so score is propagated only within same document
lastDocID = result.getDocID();
if (lastDocID == null)
lastDocID = "";
// remember score
lastScore = ((m * m + 1) / 2);
// lastScore = ((m + 1) / 2); // remember score
// manipulate score
result.incScore(m * m);
// result.incScore(m); // manipulate score
// keep result
rawResults.add(result);
} else if ((lastScore > 0) && lastDocID.equalsIgnoreCase(result.getDocID())) {
// manipulate score
result.incScore(lastScore);
// keep result
rawResults.add(result);
// decay last score
lastScore = (lastScore / 2);
} else {
// reset remembered score
lastScore = 0;
}
}
}
// if too little results, match againg and consider only proper names
if (rawResults.size() < 100) {
for (Result result : results) {
if (result.getScore() != Float.NEGATIVE_INFINITY) {
String[] keywords = NETagger.tokenize(result.getQuery().getQueryString());
ArrayList<String> keywordList = new ArrayList<String>();
for (int k = 0; k < keywords.length; k++) if (keywords[k].matches("[A-Z]++.*+"))
keywordList.add(SnowballStemmer.stem(keywords[k]));
keywords = keywordList.toArray(new String[keywordList.size()]);
int k = keywords.length;
// do this only if now less keywords
if ((keywords.length != 0) && (k < keywordCount)) {
String[] wordsInResult = NETagger.tokenize(result.getAnswer());
for (int r = 0; r < wordsInResult.length; r++) wordsInResult[r] = SnowballStemmer.stem(wordsInResult[r]);
int m = getNumberOfMatches(keywords, wordsInResult);
if (m >= Math.floor(Math.sqrt(k - 1) + 1)) {
// manipulate score
result.incScore(m * m);
// result.incScore(m); // manipulate score
// keep result
rawResults.add(result);
}
}
}
}
}
return rawResults.toArray(new Result[rawResults.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class AnswerPatternFilter method apply.
/**
* Applies the answer patterns to the answer strings of the
* <code>Result</code> objects and creates a new <code>Result</code> for
* each extracted unique answer.
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
// extracted factoid answers and corresponding results
Hashtable<String, Result> factoids = new Hashtable<String, Result>();
for (Result result : results) {
// only apply this filter to results for the pattern matching
// approach
Query query = result.getQuery();
QuestionInterpretation qi = query.getInterpretation();
if (!query.extractWith(ID) || qi == null || result.getScore() > Float.NEGATIVE_INFINITY)
continue;
// extract PROPERTY objects
extractPos(result);
// create new result for each unique normalized PROPERTY object
for (int i = 0; i < extr.size(); i++) {
String po = extr.get(i);
String[] neTypes = types.get(i);
String norm = StringUtils.normalize(po);
String sentence = sents.get(i);
float conf = aps.get(i).getConfidence();
Result factoid = factoids.get(norm);
if (factoid == null) {
// new answer
// query, doc ID and sentence can be ambiguous
factoid = new Result(po, result.getQuery(), result.getDocID());
factoid.setSentence(sentence);
factoid.addExtractionTechnique(ID);
factoids.put(norm, factoid);
}
if (neTypes != null)
for (String neType : neTypes) factoid.addNeType(neType);
factoid.incScore(conf);
}
}
// keep old results
Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
Result[] allResults = new Result[results.length + newResults.length];
for (int i = 0; i < results.length; i++) allResults[i] = results[i];
for (int i = 0; i < newResults.length; i++) allResults[results.length + i] = newResults[i];
return allResults;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class AnswerProjectionFilter method apply.
/**
* Projects Web answers onto the corpus.
*
* @param results array of <code>Result</code> objects from the Web
* @return array of <code>Result</code> objects from the corpus
*/
public Result[] apply(Result[] results) {
// split corpus results into factoid answers and raw results
Hashtable<String, Result> factoids = new Hashtable<String, Result>();
Hashtable<String, Result> sentences = new Hashtable<String, Result>();
ArrayList<String> normSentences = new ArrayList<String>();
Filter sorter = new HitPositionSorterFilter();
// sort by hit position
resultsCorp = sorter.apply(resultsCorp);
for (Result resultCorp : resultsCorp) {
if (resultCorp.getScore() > 0) {
// factoid answer
String norm = StringUtils.normalize(resultCorp.getAnswer());
Result factoid = factoids.get(norm);
if (factoid != null) {
if (hasHigherPreference(resultCorp, factoid)) {
factoids.put(norm, resultCorp);
String[] neTypes = factoid.getNeTypes();
if (neTypes != null)
for (String neType : neTypes) resultCorp.addNeType(neType);
} else {
String[] neTypes = resultCorp.getNeTypes();
if (neTypes != null)
for (String neType : neTypes) factoid.addNeType(neType);
}
} else {
factoids.put(norm, resultCorp);
}
} else {
// raw result
String[] sents = OpenNLP.sentDetect(resultCorp.getAnswer());
for (String sent : sents) {
// one result for each sentence
String norm = StringUtils.normalize(sent);
if (!sentences.containsKey(norm)) {
Result sentence = resultCorp.getCopy();
sentence.setAnswer(sent);
sentences.put(norm, sentence);
normSentences.add(norm);
}
}
}
}
// project web results onto corpus
ArrayList<Result> projected = new ArrayList<Result>();
for (Result resultWeb : results) {
// only project factoids
if (resultWeb.getScore() <= 0)
continue;
String norm = StringUtils.normalize(resultWeb.getAnswer());
// Answer projection rules:
// - first try to find a matching factoid answer extracted from the
// corpus, only if this attempt fails browse the raw results
// - a named entity from a model-based tagger is projected only if
// the same named entity was extracted from the corpus (this takes
// the poor performance of the model-based NE taggers on the noisy
// Web data into account)
// - if a factoid answer was extracted from the corpus with more
// than one technique, then the first extraction technique in
// 'EXTRACTION_TECHNIQUES' determines the supporting document
Result factoid = factoids.get(norm);
if (factoid != null && (!NETagger.allModelType(resultWeb.getNeTypes()) || factoid.isNamedEntity())) {
// factoid answer also extracted from corpus:
// if web answer not a named entity from a model-based tagger or
// corpus answer also a named entity
// -> project answer
Result result = resultWeb.getCopy();
result.setAnswer(factoid.getAnswer());
result.setDocID(factoid.getDocID());
result.setSentence(factoid.getSentence());
projected.add(result);
} else if (!NETagger.allModelType(resultWeb.getNeTypes())) {
// factoid answer not extracted from corpus:
// if answer not a named entity from a model-based tagger
// -> browse sentences for answer
String normRegex = RegexConverter.strToRegexWithBounds(norm);
for (String normSentence : normSentences) {
String[] truncs = normSentence.split(normRegex, -1);
if (truncs.length > 1) {
// sentence contains answer?
// undo normalization
Result sentence = sentences.get(normSentence);
String sent = sentence.getAnswer();
int start = truncs[0].split(" ", -1).length - 1;
int end = start + norm.split(" ").length;
String[] tokens = NETagger.tokenize(sent);
String answer = tokens[start];
for (int i = start + 1; i < end; i++) answer += " " + tokens[i];
answer = OpenNLP.untokenize(answer, sent);
if (norm.equals(StringUtils.normalize(answer))) {
Result result = resultWeb.getCopy();
result.setAnswer(answer);
result.setDocID(sentence.getDocID());
result.setSentence(sentence.getAnswer());
projected.add(result);
break;
} else {
MsgPrinter.printErrorMsg("\nNormalization could " + "not be undone:\n" + norm);
}
}
}
}
}
return projected.toArray(new Result[projected.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class DeserializationFilter method apply.
/**
* Filters an array of <code>Result</code> objects.
*
* @param results results to filter
* @return filtered results
*/
public Result[] apply(Result[] results) {
// any input file set?
if (serialFiles == null || serialFiles.length == 0)
return results;
// keep old results
ArrayList<Result> resultsL = new ArrayList<Result>();
for (Result result : results) resultsL.add(result);
// deserialize and add results
for (File serialFile : serialFiles) {
// input file exists?
if (!serialFile.exists())
continue;
try {
FileInputStream fis = new FileInputStream(serialFile);
ObjectInputStream ois = new ObjectInputStream(fis);
try {
while (true) {
Object o = ois.readObject();
if (o instanceof Result) {
Result result = (Result) o;
resultsL.add(result);
}
}
} catch (EOFException e) {
/* end of file reached */
}
ois.close();
} catch (Exception e) {
MsgPrinter.printErrorMsg("Could not read serialized results:");
MsgPrinter.printErrorMsg(e.toString());
System.exit(1);
}
}
return resultsL.toArray(new Result[resultsL.size()]);
}
Aggregations