Search in sources :

Example 6 with QuestionInterpretation

use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.

the class Logger method logInterpretations.

/**
	 * Logs the interpretations of a question.
	 * 
	 * @param qis question interpretations
	 * @return true, iff logging was successful
	 */
public static boolean logInterpretations(QuestionInterpretation[] qis) {
    // logging is disabled or log file is not specified
    if (!enabled || logfile == null)
        return false;
    try {
        PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
        for (QuestionInterpretation qi : qis) {
            out.println("\t<interpretation>");
            out.println("\t\t<property>");
            out.println("\t\t\t" + qi.getProperty());
            out.println("\t\t</property>");
            out.println("\t\t<target>");
            out.println("\t\t\t" + qi.getTarget());
            out.println("\t\t</target>");
            for (String context : qi.getContext()) {
                out.println("\t\t<context>");
                out.println("\t\t\t" + context);
                out.println("\t\t</context>");
            }
            out.println("\t</interpretation>");
        }
        out.close();
    } catch (IOException e) {
        return false;
    }
    return true;
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 7 with QuestionInterpretation

use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.

the class PatternLearner method formQueries.

/**
	 * Loads target-context-answer-regex tuples from resource files and forms
	 * queries.
	 * 
	 * @param dir directory containing the target-context-answer-regex tuples
	 * @return queries formed from the tuples
	 */
private static Query[] formQueries(String dir) {
    QuestionInterpretationG queryGenerator = new QuestionInterpretationG();
    ArrayList<Query> results = new ArrayList<Query>();
    File[] files = FileUtils.getFiles(dir);
    BufferedReader in;
    String[] tuple, context, kws;
    String prop, line, target, as, regex, queryString;
    QuestionInterpretation qi;
    Query query;
    try {
        for (File file : files) {
            prop = file.getName();
            in = new BufferedReader(new FileReader(file));
            while (in.ready()) {
                line = in.readLine().trim();
                if (line.length() == 0 || line.startsWith("//"))
                    // skip blank lines and comments
                    continue;
                // extract interpretation, answer string and pattern
                tuple = line.split("#", -1);
                target = tuple[0];
                context = new String[tuple.length - 3];
                for (int i = 1; i < tuple.length - 2; i++) context[i - 1] = tuple[i];
                as = tuple[tuple.length - 2];
                regex = tuple[tuple.length - 1];
                // complement answer string or regular expression
                if (as.equals(""))
                    as = RegexConverter.regexToQueryStr(regex);
                else if (regex.equals(""))
                    regex = RegexConverter.strToRegex(as);
                // create query object
                qi = new QuestionInterpretation(target, context, prop);
                kws = new String[] { "\"" + as + "\"" };
                queryString = queryGenerator.queryString(target, context, kws);
                query = new Query(queryString, null, 0);
                query.setInterpretation(qi);
                // store query, answer and regular expression
                results.add(query);
                ass.put(queryString, as);
                regexs.put(queryString, regex);
            }
        }
    } catch (IOException e) {
        return new Query[0];
    }
    return results.toArray(new Query[results.size()]);
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FileReader(java.io.FileReader) File(java.io.File)

Example 8 with QuestionInterpretation

use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.

the class PatternExtractor method extract.

/**
	 * Extracts answer patterns from the answer string of a <code>Result</code>
	 * object and adds them to the <code>AnswerPatternFilter</code>.
	 * 
	 * @param result <code>Result</code> object
	 * @param as the answer to the question
	 */
public static void extract(Result result, String as) {
    // get interpretation and answer string
    QuestionInterpretation qi = result.getQuery().getInterpretation();
    String to = qi.getTarget();
    //		String[] cos = qi.getContext();
    // CONTEXT objects are ignored
    String[] cos = new String[0];
    String prop = qi.getProperty();
    String answer = result.getAnswer();
    // tokenize interpretation and provided answer, convert to lower-case
    to = NETagger.tokenizeWithSpaces(to).toLowerCase();
    for (int i = 0; i < cos.length; i++) cos[i] = NETagger.tokenizeWithSpaces(cos[i]).toLowerCase();
    as = NETagger.tokenizeWithSpaces(as).toLowerCase();
    // split answer string into sentences and tokenize sentences
    String[] sentences = OpenNLP.sentDetect(answer);
    String[][] tokens = new String[sentences.length][];
    for (int i = 0; i < sentences.length; i++) {
        tokens[i] = NETagger.tokenize(sentences[i]);
        sentences[i] = StringUtils.concatWithSpaces(tokens[i]);
    }
    // extract named entities
    String[][][] nes = NETagger.extractNes(tokens);
    // convert sentences and named entities to lower-case
    for (int i = 0; i < nes.length; i++) {
        sentences[i] = sentences[i].toLowerCase();
        for (int j = 0; j < nes[i].length; j++) for (int k = 0; k < nes[i][j].length; k++) nes[i][j][k] = nes[i][j][k].toLowerCase();
    }
    for (int i = 0; i < sentences.length; i++) {
        // prepare sentence for pattern extraction
        sentences[i] = prepSentence(sentences[i], to, cos, as, nes[i]);
        if (sentences[i] == null)
            continue;
        // extract patterns
        String[] patterns = extractPatterns(sentences[i]);
        // generalize patterns
        patterns = generalizePatterns(patterns, prop);
        // add patterns
        for (String pattern : patterns) AnswerPatternFilter.addPattern(pattern, prop);
    }
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation)

Example 9 with QuestionInterpretation

use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.

the class EphyraTREC13To16 method runAndEval.

/**
	 * Initializes Ephyra, asks the questions or loads the answers from a log
	 * file, evaluates the answers if patterns are available and logs and saves
	 * the answers.
	 */
private static void runAndEval() {
    // initialize Ephyra
    EphyraTREC13To16 ephyra = new EphyraTREC13To16();
    // evaluate for multiple thresholds
    boolean firstThreshold = true;
    //		for (float fAbsThresh = FACTOID_ABS_THRESH;
    //			 fAbsThresh <= 1; fAbsThresh += 0.01) {
    float fAbsThresh = FACTOID_ABS_THRESH;
    //		for (float lRelThresh = LIST_REL_THRESH;
    //			 lRelThresh <= 1; lRelThresh += 0.01) {
    float lRelThresh = LIST_REL_THRESH;
    for (TRECTarget target : targets) {
        MsgPrinter.printTarget(target.getTargetDesc());
        // normalize target description, determine target types
        if (firstThreshold)
            TargetPreprocessor.preprocess(target);
        String targetDesc = target.getTargetDesc();
        String condensedTarget = target.getCondensedTarget();
        TRECQuestion[] questions = target.getQuestions();
        // condensed target is used as contextual information
        QuestionAnalysis.setContext(condensedTarget);
        for (int i = 0; i < questions.length; i++) {
            MsgPrinter.printQuestion(questions[i].getQuestionString());
            String id = questions[i].getId();
            String type = questions[i].getType();
            String qs;
            if (type.equals("FACTOID") || type.equals("LIST")) {
                // resolve coreferences in factoid and list questions
                if (firstThreshold) {
                    MsgPrinter.printResolvingCoreferences();
                    CorefResolver.resolvePronounsToTarget(target, i);
                }
                qs = questions[i].getQuestionString();
            } else {
                qs = targetDesc;
            }
            // set pattern used to evaluate answers for overlap analysis
            OverlapAnalysisFilter.setPattern(null);
            if (type.equals("FACTOID")) {
                for (TRECPattern pattern : factoidPatterns) {
                    if (pattern.getId().equals(id)) {
                        OverlapAnalysisFilter.setPattern(pattern);
                        break;
                    }
                }
            }
            // ask Ephyra or load answer from log file
            Result[] results = null;
            if ((type.equals("FACTOID") && factoidLog) || (type.equals("LIST") && listLog) || (type.equals("OTHER") && otherLog)) {
                results = TREC13To16Parser.loadResults(qs, type, inputLogFile);
            }
            if (results == null) {
                // answer not loaded from log file
                if (type.equals("FACTOID")) {
                    Logger.logFactoidStart(qs);
                    results = ephyra.askFactoid(qs, FACTOID_MAX_ANSWERS, FACTOID_ABS_THRESH);
                    //						results = new Result[0];
                    Logger.logResults(results);
                    Logger.logFactoidEnd();
                } else if (type.equals("LIST")) {
                    Logger.logListStart(qs);
                    results = ephyra.askList(qs, LIST_REL_THRESH);
                    //						results = new Result[0];
                    Logger.logResults(results);
                    Logger.logListEnd();
                } else {
                    Logger.logDefinitionalStart(qs);
                    results = ephyra.askOther(target);
                    //						results = new Result[0];
                    Logger.logResults(results);
                    Logger.logDefinitionalEnd();
                }
            }
            // calculate question score if patterns are available
            boolean[] correct = null;
            if (type.equals("FACTOID") && factoidPatterns != null)
                correct = evalFactoidQuestion(id, results, fAbsThresh);
            else if (type.equals("LIST") && listPatterns != null)
                correct = evalListQuestion(id, results, lRelThresh);
            // update target data structure
            TRECAnswer[] answers = new TRECAnswer[results.length];
            for (int j = 0; j < results.length; j++) {
                String answer = results[j].getAnswer();
                String supportDoc = results[j].getDocID();
                answers[j] = new TRECAnswer(id, answer, supportDoc);
            }
            questions[i].setAnswers(answers);
            if (results.length > 0) {
                QuestionInterpretation qi = results[0].getQuery().getInterpretation();
                if (qi != null)
                    questions[i].setInterpretation(qi);
            }
            if (answers.length == 0) {
                // no answer found
                answers = new TRECAnswer[1];
                if (type.equals("FACTOID"))
                    answers[0] = new TRECAnswer(id, null, "NIL");
                else
                    answers[0] = new TRECAnswer(id, "No answers found.", "XIE19960101.0001");
            }
            // save answers to output file
            TREC13To16Parser.saveAnswers("log/" + runTag, answers, correct, runTag);
        }
        // calculate target scores if patterns are available
        if (factoidPatterns != null)
            evalFactoidTarget();
        if (listPatterns != null)
            evalListTarget();
    }
    // calculate component scores and log scores if patterns are available
    if (factoidPatterns != null)
        evalFactoidTotal(fAbsThresh);
    if (listPatterns != null)
        evalListTotal(lRelThresh);
    firstThreshold = false;
//		}
//		}
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) Result(info.ephyra.search.Result)

Example 10 with QuestionInterpretation

use of info.ephyra.questionanalysis.QuestionInterpretation in project lucida by claritylab.

the class EphyraTREC13To16 method askOther.

/**
	 * Asks Ephyra an 'other' question, making use of the target description and
	 * previous questions and answers.
	 * 
	 * @param target the target the 'other' question is about
	 * @return array of results
	 */
public Result[] askOther(TRECTarget target) {
    //	get target type from interpretations of factoid/list questions
    TRECQuestion[] factoidQuestions = target.getQuestions();
    ArrayList<String> props = new ArrayList<String>();
    ArrayList<String> vals = new ArrayList<String>();
    ArrayList<String> sentences = new ArrayList<String>();
    String[] targetTokens = NETagger.tokenize(target.getTargetDesc());
    for (String tt : targetTokens) sentences.add(tt);
    //	collect properties and answers from FACTOID and LIST questions
    for (TRECQuestion fq : factoidQuestions) {
        QuestionInterpretation qi = fq.getInterpretation();
        if (qi != null) {
            String prop = qi.getProperty();
            TRECAnswer[] answers = fq.getAnswers();
            if (answers.length != 0) {
                //	collect property/value pair
                String val = answers[0].getAnswerString();
                props.add(prop);
                vals.add(val);
                //					MsgPrinter.printStatusMsg("Dossier on '" + target.getTargetDesc() + "' contains: '" + prop + "' is '" + val + "'");
                //	remember answer sentence for previous results
                String[] questionTokens = NETagger.tokenize(fq.getQuestionString());
                for (String qt : questionTokens) sentences.add(qt);
            }
        }
    }
    //	filter out results that bring no new terms but ones contained in the target, a previous question, or an answert to a previous question
    TermFilter.setPreviousResultsTerms(sentences.toArray(new String[sentences.size()]));
    //	initialize Dossier
    //		Dossier dossier = Dossier.getDossier(target.getTargetDesc(), target.getTargetType(), props.toArray(new String[props.size()]), vals.toArray(new String[vals.size()]));
    Dossier dossier = Dossier.getDossier(target.getTargetDesc(), null, props.toArray(new String[props.size()]), vals.toArray(new String[vals.size()]));
    //		MsgPrinter.printStatusMsg("Target type of '" + target.getTargetDesc() + "' is " + dossier.getTargetType());
    ArrayList<Result> rawResults = new ArrayList<Result>();
    //	collect missing properties
    String[] missingProps = dossier.getMissingPropertyNames();
    for (String mp : missingProps) {
        //	generate FACTOID question from template
        String question = QuestionInterpreter.getQuestion(target.getTargetDesc(), mp);
        //	if valid template exists, ask FACTOID question
        if (question != null) {
        //				MsgPrinter.printStatusMsg("Building Dossier on '" + target.getTargetDesc() + "', would ask this question now: '" + question + "'");
        //				Logger.enableLogging(false);
        //				Result res = this.askFactoid(question);
        //				Logger.enableLogging(true);
        //				
        //				//	if question could be answered, add new property and value to dossier
        //				if (res != null) {
        //					dossier.setProperty(mp, res.getAnswer());
        //					MsgPrinter.printStatusMsg("Dossier on '" + target.getTargetDesc() + "' extended: '" + mp + "' set to '" + res.getAnswer() + "'");
        //					rawResults.add(res);
        //					String sentence = res.getSentence();
        //					
        //					//	get supporting sentence of answer and, if existing, remember it as nugget
        //					if (sentence != null) {
        //						Result newRes = new Result(sentence, res.getQuery(), res.getDocID(), res.getHitPos());
        //						newRes.setScore(res.getScore() + 2);
        //						rawResults.add(newRes);
        //					}
        //				}
        }
    }
    NuggetEvaluationFilter.setTargetID(target.getId());
    //	collect BagOfWords results for target
    Result[] nuggets = askOther(target.getTargetDesc());
    for (Result r : nuggets) rawResults.add(r);
    nuggets = rawResults.toArray(new Result[rawResults.size()]);
    NuggetEvaluationFilter.targetFinished();
    //	reset term filter
    TermFilter.setPreviousResultsTerms(null);
    NuggetEvaluationFilter.setTargetID(null);
    return nuggets;
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) Dossier(info.ephyra.answerselection.definitional.Dossier)

Aggregations

QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)11 Result (info.ephyra.search.Result)5 Query (info.ephyra.querygeneration.Query)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 BufferedReader (java.io.BufferedReader)2 FileOutputStream (java.io.FileOutputStream)2 FileReader (java.io.FileReader)2 PrintWriter (java.io.PrintWriter)2 AnswerPattern (info.ephyra.answerselection.AnswerPattern)1 Dossier (info.ephyra.answerselection.definitional.Dossier)1 QuestionInterpretationG (info.ephyra.querygeneration.generators.QuestionInterpretationG)1 File (java.io.File)1 StringReader (java.io.StringReader)1 Hashtable (java.util.Hashtable)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 Document (org.w3c.dom.Document)1 Element (org.w3c.dom.Element)1 NodeList (org.w3c.dom.NodeList)1