use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.
the class Logger method logInterpretations.
/**
* Logs the interpretations of a question.
*
* @param qis question interpretations
* @return true, iff logging was successful
*/
public static boolean logInterpretations(QuestionInterpretation[] qis) {
// logging is disabled or log file is not specified
if (!enabled || logfile == null)
return false;
try {
PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
for (QuestionInterpretation qi : qis) {
out.println("\t<interpretation>");
out.println("\t\t<property>");
out.println("\t\t\t" + qi.getProperty());
out.println("\t\t</property>");
out.println("\t\t<target>");
out.println("\t\t\t" + qi.getTarget());
out.println("\t\t</target>");
for (String context : qi.getContext()) {
out.println("\t\t<context>");
out.println("\t\t\t" + context);
out.println("\t\t</context>");
}
out.println("\t</interpretation>");
}
out.close();
} catch (IOException e) {
return false;
}
return true;
}
use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.
the class PatternLearner method formQueries.
/**
* Loads target-context-answer-regex tuples from resource files and forms
* queries.
*
* @param dir directory containing the target-context-answer-regex tuples
* @return queries formed from the tuples
*/
private static Query[] formQueries(String dir) {
QuestionInterpretationG queryGenerator = new QuestionInterpretationG();
ArrayList<Query> results = new ArrayList<Query>();
File[] files = FileUtils.getFiles(dir);
BufferedReader in;
String[] tuple, context, kws;
String prop, line, target, as, regex, queryString;
QuestionInterpretation qi;
Query query;
try {
for (File file : files) {
prop = file.getName();
in = new BufferedReader(new FileReader(file));
while (in.ready()) {
line = in.readLine().trim();
if (line.length() == 0 || line.startsWith("//"))
// skip blank lines and comments
continue;
// extract interpretation, answer string and pattern
tuple = line.split("#", -1);
target = tuple[0];
context = new String[tuple.length - 3];
for (int i = 1; i < tuple.length - 2; i++) context[i - 1] = tuple[i];
as = tuple[tuple.length - 2];
regex = tuple[tuple.length - 1];
// complement answer string or regular expression
if (as.equals(""))
as = RegexConverter.regexToQueryStr(regex);
else if (regex.equals(""))
regex = RegexConverter.strToRegex(as);
// create query object
qi = new QuestionInterpretation(target, context, prop);
kws = new String[] { "\"" + as + "\"" };
queryString = queryGenerator.queryString(target, context, kws);
query = new Query(queryString, null, 0);
query.setInterpretation(qi);
// store query, answer and regular expression
results.add(query);
ass.put(queryString, as);
regexs.put(queryString, regex);
}
}
} catch (IOException e) {
return new Query[0];
}
return results.toArray(new Query[results.size()]);
}
use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.
the class PatternExtractor method extract.
/**
* Extracts answer patterns from the answer string of a <code>Result</code>
* object and adds them to the <code>AnswerPatternFilter</code>.
*
* @param result <code>Result</code> object
* @param as the answer to the question
*/
public static void extract(Result result, String as) {
// get interpretation and answer string
QuestionInterpretation qi = result.getQuery().getInterpretation();
String to = qi.getTarget();
// String[] cos = qi.getContext();
// CONTEXT objects are ignored
String[] cos = new String[0];
String prop = qi.getProperty();
String answer = result.getAnswer();
// tokenize interpretation and provided answer, convert to lower-case
to = NETagger.tokenizeWithSpaces(to).toLowerCase();
for (int i = 0; i < cos.length; i++) cos[i] = NETagger.tokenizeWithSpaces(cos[i]).toLowerCase();
as = NETagger.tokenizeWithSpaces(as).toLowerCase();
// split answer string into sentences and tokenize sentences
String[] sentences = OpenNLP.sentDetect(answer);
String[][] tokens = new String[sentences.length][];
for (int i = 0; i < sentences.length; i++) {
tokens[i] = NETagger.tokenize(sentences[i]);
sentences[i] = StringUtils.concatWithSpaces(tokens[i]);
}
// extract named entities
String[][][] nes = NETagger.extractNes(tokens);
// convert sentences and named entities to lower-case
for (int i = 0; i < nes.length; i++) {
sentences[i] = sentences[i].toLowerCase();
for (int j = 0; j < nes[i].length; j++) for (int k = 0; k < nes[i][j].length; k++) nes[i][j][k] = nes[i][j][k].toLowerCase();
}
for (int i = 0; i < sentences.length; i++) {
// prepare sentence for pattern extraction
sentences[i] = prepSentence(sentences[i], to, cos, as, nes[i]);
if (sentences[i] == null)
continue;
// extract patterns
String[] patterns = extractPatterns(sentences[i]);
// generalize patterns
patterns = generalizePatterns(patterns, prop);
// add patterns
for (String pattern : patterns) AnswerPatternFilter.addPattern(pattern, prop);
}
}
use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.
the class EphyraTREC13To16 method runAndEval.
/**
* Initializes Ephyra, asks the questions or loads the answers from a log
* file, evaluates the answers if patterns are available and logs and saves
* the answers.
*/
private static void runAndEval() {
// initialize Ephyra
EphyraTREC13To16 ephyra = new EphyraTREC13To16();
// evaluate for multiple thresholds
boolean firstThreshold = true;
// for (float fAbsThresh = FACTOID_ABS_THRESH;
// fAbsThresh <= 1; fAbsThresh += 0.01) {
float fAbsThresh = FACTOID_ABS_THRESH;
// for (float lRelThresh = LIST_REL_THRESH;
// lRelThresh <= 1; lRelThresh += 0.01) {
float lRelThresh = LIST_REL_THRESH;
for (TRECTarget target : targets) {
MsgPrinter.printTarget(target.getTargetDesc());
// normalize target description, determine target types
if (firstThreshold)
TargetPreprocessor.preprocess(target);
String targetDesc = target.getTargetDesc();
String condensedTarget = target.getCondensedTarget();
TRECQuestion[] questions = target.getQuestions();
// condensed target is used as contextual information
QuestionAnalysis.setContext(condensedTarget);
for (int i = 0; i < questions.length; i++) {
MsgPrinter.printQuestion(questions[i].getQuestionString());
String id = questions[i].getId();
String type = questions[i].getType();
String qs;
if (type.equals("FACTOID") || type.equals("LIST")) {
// resolve coreferences in factoid and list questions
if (firstThreshold) {
MsgPrinter.printResolvingCoreferences();
CorefResolver.resolvePronounsToTarget(target, i);
}
qs = questions[i].getQuestionString();
} else {
qs = targetDesc;
}
// set pattern used to evaluate answers for overlap analysis
OverlapAnalysisFilter.setPattern(null);
if (type.equals("FACTOID")) {
for (TRECPattern pattern : factoidPatterns) {
if (pattern.getId().equals(id)) {
OverlapAnalysisFilter.setPattern(pattern);
break;
}
}
}
// ask Ephyra or load answer from log file
Result[] results = null;
if ((type.equals("FACTOID") && factoidLog) || (type.equals("LIST") && listLog) || (type.equals("OTHER") && otherLog)) {
results = TREC13To16Parser.loadResults(qs, type, inputLogFile);
}
if (results == null) {
// answer not loaded from log file
if (type.equals("FACTOID")) {
Logger.logFactoidStart(qs);
results = ephyra.askFactoid(qs, FACTOID_MAX_ANSWERS, FACTOID_ABS_THRESH);
// results = new Result[0];
Logger.logResults(results);
Logger.logFactoidEnd();
} else if (type.equals("LIST")) {
Logger.logListStart(qs);
results = ephyra.askList(qs, LIST_REL_THRESH);
// results = new Result[0];
Logger.logResults(results);
Logger.logListEnd();
} else {
Logger.logDefinitionalStart(qs);
results = ephyra.askOther(target);
// results = new Result[0];
Logger.logResults(results);
Logger.logDefinitionalEnd();
}
}
// calculate question score if patterns are available
boolean[] correct = null;
if (type.equals("FACTOID") && factoidPatterns != null)
correct = evalFactoidQuestion(id, results, fAbsThresh);
else if (type.equals("LIST") && listPatterns != null)
correct = evalListQuestion(id, results, lRelThresh);
// update target data structure
TRECAnswer[] answers = new TRECAnswer[results.length];
for (int j = 0; j < results.length; j++) {
String answer = results[j].getAnswer();
String supportDoc = results[j].getDocID();
answers[j] = new TRECAnswer(id, answer, supportDoc);
}
questions[i].setAnswers(answers);
if (results.length > 0) {
QuestionInterpretation qi = results[0].getQuery().getInterpretation();
if (qi != null)
questions[i].setInterpretation(qi);
}
if (answers.length == 0) {
// no answer found
answers = new TRECAnswer[1];
if (type.equals("FACTOID"))
answers[0] = new TRECAnswer(id, null, "NIL");
else
answers[0] = new TRECAnswer(id, "No answers found.", "XIE19960101.0001");
}
// save answers to output file
TREC13To16Parser.saveAnswers("log/" + runTag, answers, correct, runTag);
}
// calculate target scores if patterns are available
if (factoidPatterns != null)
evalFactoidTarget();
if (listPatterns != null)
evalListTarget();
}
// calculate component scores and log scores if patterns are available
if (factoidPatterns != null)
evalFactoidTotal(fAbsThresh);
if (listPatterns != null)
evalListTotal(lRelThresh);
firstThreshold = false;
// }
// }
}
use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.
the class EphyraTREC13To16 method askOther.
/**
* Asks Ephyra an 'other' question, making use of the target description and
* previous questions and answers.
*
* @param target the target the 'other' question is about
* @return array of results
*/
public Result[] askOther(TRECTarget target) {
// get target type from interpretations of factoid/list questions
TRECQuestion[] factoidQuestions = target.getQuestions();
ArrayList<String> props = new ArrayList<String>();
ArrayList<String> vals = new ArrayList<String>();
ArrayList<String> sentences = new ArrayList<String>();
String[] targetTokens = NETagger.tokenize(target.getTargetDesc());
for (String tt : targetTokens) sentences.add(tt);
// collect properties and answers from FACTOID and LIST questions
for (TRECQuestion fq : factoidQuestions) {
QuestionInterpretation qi = fq.getInterpretation();
if (qi != null) {
String prop = qi.getProperty();
TRECAnswer[] answers = fq.getAnswers();
if (answers.length != 0) {
// collect property/value pair
String val = answers[0].getAnswerString();
props.add(prop);
vals.add(val);
// MsgPrinter.printStatusMsg("Dossier on '" + target.getTargetDesc() + "' contains: '" + prop + "' is '" + val + "'");
// remember answer sentence for previous results
String[] questionTokens = NETagger.tokenize(fq.getQuestionString());
for (String qt : questionTokens) sentences.add(qt);
}
}
}
// filter out results that bring no new terms but ones contained in the target, a previous question, or an answert to a previous question
TermFilter.setPreviousResultsTerms(sentences.toArray(new String[sentences.size()]));
// initialize Dossier
// Dossier dossier = Dossier.getDossier(target.getTargetDesc(), target.getTargetType(), props.toArray(new String[props.size()]), vals.toArray(new String[vals.size()]));
Dossier dossier = Dossier.getDossier(target.getTargetDesc(), null, props.toArray(new String[props.size()]), vals.toArray(new String[vals.size()]));
// MsgPrinter.printStatusMsg("Target type of '" + target.getTargetDesc() + "' is " + dossier.getTargetType());
ArrayList<Result> rawResults = new ArrayList<Result>();
// collect missing properties
String[] missingProps = dossier.getMissingPropertyNames();
for (String mp : missingProps) {
// generate FACTOID question from template
String question = QuestionInterpreter.getQuestion(target.getTargetDesc(), mp);
// if valid template exists, ask FACTOID question
if (question != null) {
// MsgPrinter.printStatusMsg("Building Dossier on '" + target.getTargetDesc() + "', would ask this question now: '" + question + "'");
// Logger.enableLogging(false);
// Result res = this.askFactoid(question);
// Logger.enableLogging(true);
//
// // if question could be answered, add new property and value to dossier
// if (res != null) {
// dossier.setProperty(mp, res.getAnswer());
// MsgPrinter.printStatusMsg("Dossier on '" + target.getTargetDesc() + "' extended: '" + mp + "' set to '" + res.getAnswer() + "'");
// rawResults.add(res);
// String sentence = res.getSentence();
//
// // get supporting sentence of answer and, if existing, remember it as nugget
// if (sentence != null) {
// Result newRes = new Result(sentence, res.getQuery(), res.getDocID(), res.getHitPos());
// newRes.setScore(res.getScore() + 2);
// rawResults.add(newRes);
// }
// }
}
}
NuggetEvaluationFilter.setTargetID(target.getId());
// collect BagOfWords results for target
Result[] nuggets = askOther(target.getTargetDesc());
for (Result r : nuggets) rawResults.add(r);
nuggets = rawResults.toArray(new Result[rawResults.size()]);
NuggetEvaluationFilter.targetFinished();
// reset term filter
TermFilter.setPreviousResultsTerms(null);
NuggetEvaluationFilter.setTargetID(null);
return nuggets;
}
Aggregations