use of info.ephyra.search.Result in project lucida by claritylab.
the class EphyraTREC13To16 method askOther.
/**
* Asks Ephyra an 'other' question, making use of the target description and
* previous questions and answers.
*
* @param target the target the 'other' question is about
* @return array of results
*/
public Result[] askOther(TRECTarget target) {
// get target type from interpretations of factoid/list questions
TRECQuestion[] factoidQuestions = target.getQuestions();
ArrayList<String> props = new ArrayList<String>();
ArrayList<String> vals = new ArrayList<String>();
ArrayList<String> sentences = new ArrayList<String>();
String[] targetTokens = NETagger.tokenize(target.getTargetDesc());
for (String tt : targetTokens) sentences.add(tt);
// collect properties and answers from FACTOID and LIST questions
for (TRECQuestion fq : factoidQuestions) {
QuestionInterpretation qi = fq.getInterpretation();
if (qi != null) {
String prop = qi.getProperty();
TRECAnswer[] answers = fq.getAnswers();
if (answers.length != 0) {
// collect property/value pair
String val = answers[0].getAnswerString();
props.add(prop);
vals.add(val);
// MsgPrinter.printStatusMsg("Dossier on '" + target.getTargetDesc() + "' contains: '" + prop + "' is '" + val + "'");
// remember answer sentence for previous results
String[] questionTokens = NETagger.tokenize(fq.getQuestionString());
for (String qt : questionTokens) sentences.add(qt);
}
}
}
// filter out results that bring no new terms but ones contained in the target, a previous question, or an answert to a previous question
TermFilter.setPreviousResultsTerms(sentences.toArray(new String[sentences.size()]));
// initialize Dossier
// Dossier dossier = Dossier.getDossier(target.getTargetDesc(), target.getTargetType(), props.toArray(new String[props.size()]), vals.toArray(new String[vals.size()]));
Dossier dossier = Dossier.getDossier(target.getTargetDesc(), null, props.toArray(new String[props.size()]), vals.toArray(new String[vals.size()]));
// MsgPrinter.printStatusMsg("Target type of '" + target.getTargetDesc() + "' is " + dossier.getTargetType());
ArrayList<Result> rawResults = new ArrayList<Result>();
// collect missing properties
String[] missingProps = dossier.getMissingPropertyNames();
for (String mp : missingProps) {
// generate FACTOID question from template
String question = QuestionInterpreter.getQuestion(target.getTargetDesc(), mp);
// if valid template exists, ask FACTOID question
if (question != null) {
// MsgPrinter.printStatusMsg("Building Dossier on '" + target.getTargetDesc() + "', would ask this question now: '" + question + "'");
// Logger.enableLogging(false);
// Result res = this.askFactoid(question);
// Logger.enableLogging(true);
//
// // if question could be answered, add new property and value to dossier
// if (res != null) {
// dossier.setProperty(mp, res.getAnswer());
// MsgPrinter.printStatusMsg("Dossier on '" + target.getTargetDesc() + "' extended: '" + mp + "' set to '" + res.getAnswer() + "'");
// rawResults.add(res);
// String sentence = res.getSentence();
//
// // get supporting sentence of answer and, if existing, remember it as nugget
// if (sentence != null) {
// Result newRes = new Result(sentence, res.getQuery(), res.getDocID(), res.getHitPos());
// newRes.setScore(res.getScore() + 2);
// rawResults.add(newRes);
// }
// }
}
}
NuggetEvaluationFilter.setTargetID(target.getId());
// collect BagOfWords results for target
Result[] nuggets = askOther(target.getTargetDesc());
for (Result r : nuggets) rawResults.add(r);
nuggets = rawResults.toArray(new Result[rawResults.size()]);
NuggetEvaluationFilter.targetFinished();
// reset term filter
TermFilter.setPreviousResultsTerms(null);
NuggetEvaluationFilter.setTargetID(null);
return nuggets;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class EphyraTREC8To11 method runAndEval.
/**
* Initializes Ephyra, asks the questions and evaluates and logs the
* answers.
*/
private static void runAndEval() {
// initialize Ephyra
EphyraTREC8To11 ephyra = new EphyraTREC8To11();
float precision = 0;
float mrr = 0;
for (int i = 0; i < qss.length; i++) {
MsgPrinter.printQuestion(qss[i]);
Logger.enableLogging(false);
// ask Ephyra or load answer from log file
Result[] results = null;
if (loadLog)
results = TREC13To16Parser.loadResults(qss[i], "FACTOID", logFile);
if (results == null) {
// answer not loaded from log file
Logger.enableLogging(true);
Logger.logFactoidStart(qss[i]);
results = ephyra.askFactoid(qss[i], FACTOID_MAX_ANSWERS, FACTOID_ABS_THRESH);
}
// evaluate answers
boolean[] correct = new boolean[results.length];
int firstCorrect = 0;
if (regexs[i] != null) {
Pattern p = Pattern.compile(regexs[i]);
for (int j = 0; j < results.length; j++) {
Matcher m = p.matcher(results[j].getAnswer());
correct[j] = m.find();
if (correct[j] && firstCorrect == 0)
firstCorrect = j + 1;
}
}
if (firstCorrect > 0) {
precision++;
mrr += ((float) 1) / firstCorrect;
}
Logger.logResultsJudged(results, correct);
Logger.logFactoidEnd();
}
precision /= qss.length;
mrr /= qss.length;
Logger.logScores(precision, mrr);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class TREC13To16Parser method loadResults.
/**
* Loads the results for a question from a log file.
*
* @param question the question
* @param type the type of question ("factoid", "list" or "other")
* @param logfile the log file
* @return array of results or <code>null</code> if the question could not
* be found in the log file
*/
public static Result[] loadResults(String question, String type, String logfile) {
try {
// get cached entries for given question type
ArrayList<String> entries;
if (type.equals("FACTOID"))
entries = factoidEntries;
else if (type.equals("LIST"))
entries = listEntries;
else
entries = otherEntries;
// get entries from log file if not cached
if (entries == null) {
entries = new ArrayList<String>();
String entry = "";
BufferedReader in = new BufferedReader(new FileReader(logfile));
while (in.ready()) {
String line = in.readLine();
// handle characters that are not allowed in XML
for (int i = 0; i < SPECIALCHARS.length; i++) line = line.replace(SPECIALCHARS[i], REPLACEMENTS[i]);
if (line.matches("<" + type.toLowerCase() + ">"))
entry = "";
entry += line + "\n";
if (line.matches("</" + type.toLowerCase() + ">"))
entries.add(entry);
}
// cache entries
if (type.equals("FACTOID"))
factoidEntries = entries;
else if (type.equals("LIST"))
listEntries = entries;
else
otherEntries = entries;
}
// traverse entries in reverse order
for (int i = entries.size() - 1; i >= 0; i--) {
// create factory object
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// create DOM parser
DocumentBuilder parser = factory.newDocumentBuilder();
// parse entry and build tree
Document entryD = parser.parse(new InputSource(new StringReader(entries.get(i))));
// Is this the question we are looking for?
Element questionE = (Element) entryD.getElementsByTagName("question").item(0);
String questionS = questionE.getFirstChild().getNodeValue().trim();
if (!questionS.equals(question))
continue;
// get results
ArrayList<Result> results = new ArrayList<Result>();
NodeList resultsL = entryD.getElementsByTagName("result");
for (int j = 0; j < resultsL.getLength(); j++) {
Element resultE = (Element) resultsL.item(j);
Element answerE = (Element) resultE.getElementsByTagName("answer").item(0);
String answerS = answerE.getFirstChild().getNodeValue().trim();
Element scoreE = (Element) resultE.getElementsByTagName("score").item(0);
float scoreF = Float.parseFloat(scoreE.getFirstChild().getNodeValue().trim());
Element docidE = (Element) resultE.getElementsByTagName("docid").item(0);
String docidS = docidE.getFirstChild().getNodeValue().trim();
Element qiE = (Element) resultE.getElementsByTagName("interpretation").item(0);
QuestionInterpretation qi = null;
if (qiE != null) {
Element propertyE = (Element) qiE.getElementsByTagName("property").item(0);
String propertyS = propertyE.getFirstChild().getNodeValue().trim();
Element targetE = (Element) qiE.getElementsByTagName("target").item(0);
String targetS = targetE.getFirstChild().getNodeValue().trim();
NodeList contextL = qiE.getElementsByTagName("context");
String[] contextS = new String[contextL.getLength()];
for (int k = 0; k < contextS.length; k++) {
Element contextE = (Element) contextL.item(k);
contextS[k] = contextE.getFirstChild().getNodeValue().trim();
}
qi = new QuestionInterpretation(targetS, contextS, propertyS);
}
Query query = new Query(null);
query.setInterpretation(qi);
Result result = new Result(answerS, query, docidS);
result.setScore(scoreF);
results.add(result);
}
return results.toArray(new Result[results.size()]);
}
// question not found
return null;
} catch (Exception e) {
MsgPrinter.printErrorMsg("Failed to load or parse log file:");
MsgPrinter.printErrorMsg(e.toString());
return null;
}
}
Aggregations