Search in sources :

Example 11 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class PatternLearner method extract.

/**
	 * Loads target-context-answer-regex tuples from resource files, forms
	 * queries, fetches text passages, extracts answer patterns and writes them
	 * to resource files.
	 * 
	 * @return <code>true</code>, iff the answer patterns could be extracted
	 */
public static boolean extract() {
    // load tuples and form queries
    MsgPrinter.printFormingQueries();
    ass = new Hashtable<String, String>();
    regexs = new Hashtable<String, String>();
    Query[] queries;
    ArrayList<Query> queryList = new ArrayList<Query>();
    queries = formQueries("res/patternlearning/interpretations");
    for (Query query : queries) queryList.add(query);
    queries = formQueries("res/patternlearning/interpretations_extract");
    for (Query query : queries) queryList.add(query);
    queries = queryList.toArray(new Query[queryList.size()]);
    // fetch text passages
    MsgPrinter.printFetchingPassages();
    Result[] results = fetchPassages(queries);
    // extract answer patterns
    MsgPrinter.printExtractingPatterns();
    extractPatterns(results);
    // save answer patterns
    MsgPrinter.printSavingPatterns();
    return savePatterns("res/patternlearning/answerpatterns_extract");
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 12 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class PatternLearner method assess.

/**
	 * Loads target-context-answer-regex tuples and answer patterns from
	 * resource files, forms queries from the tuples, fetches text passages,
	 * assesses the answer patterns on the text passages and writes them to
	 * resource files.
	 * 
	 * @return <code>true</code>, iff the answer patterns could be assessed
	 */
public static boolean assess() {
    // load answer patterns
    MsgPrinter.printLoadingPatterns();
    if (!loadPatterns("res/patternlearning/answerpatterns_extract"))
        return false;
    // load tuples and form queries
    MsgPrinter.printFormingQueries();
    ass = new Hashtable<String, String>();
    regexs = new Hashtable<String, String>();
    Query[] queries;
    ArrayList<Query> queryList = new ArrayList<Query>();
    queries = formQueries("res/patternlearning/interpretations");
    for (Query query : queries) queryList.add(query);
    queries = formQueries("res/patternlearning/interpretations_assess");
    for (Query query : queries) queryList.add(query);
    queries = queryList.toArray(new Query[queryList.size()]);
    // fetch text passages
    MsgPrinter.printFetchingPassages();
    Result[] results = fetchPassages(queries);
    // assess answer patterns
    MsgPrinter.printAssessingPatterns();
    assessPatterns(results);
    // save answer patterns
    MsgPrinter.printSavingPatterns();
    return savePatterns("res/patternlearning/answerpatterns_assess");
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 13 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class PatternLearner method formQueries.

/**
	 * Loads target-context-answer-regex tuples from resource files and forms
	 * queries.
	 * 
	 * @param dir directory containing the target-context-answer-regex tuples
	 * @return queries formed from the tuples
	 */
private static Query[] formQueries(String dir) {
    QuestionInterpretationG queryGenerator = new QuestionInterpretationG();
    ArrayList<Query> results = new ArrayList<Query>();
    File[] files = FileUtils.getFiles(dir);
    BufferedReader in;
    String[] tuple, context, kws;
    String prop, line, target, as, regex, queryString;
    QuestionInterpretation qi;
    Query query;
    try {
        for (File file : files) {
            prop = file.getName();
            in = new BufferedReader(new FileReader(file));
            while (in.ready()) {
                line = in.readLine().trim();
                if (line.length() == 0 || line.startsWith("//"))
                    // skip blank lines and comments
                    continue;
                // extract interpretation, answer string and pattern
                tuple = line.split("#", -1);
                target = tuple[0];
                context = new String[tuple.length - 3];
                for (int i = 1; i < tuple.length - 2; i++) context[i - 1] = tuple[i];
                as = tuple[tuple.length - 2];
                regex = tuple[tuple.length - 1];
                // complement answer string or regular expression
                if (as.equals(""))
                    as = RegexConverter.regexToQueryStr(regex);
                else if (regex.equals(""))
                    regex = RegexConverter.strToRegex(as);
                // create query object
                qi = new QuestionInterpretation(target, context, prop);
                kws = new String[] { "\"" + as + "\"" };
                queryString = queryGenerator.queryString(target, context, kws);
                query = new Query(queryString, null, 0);
                query.setInterpretation(qi);
                // store query, answer and regular expression
                results.add(query);
                ass.put(queryString, as);
                regexs.put(queryString, regex);
            }
        }
    } catch (IOException e) {
        return new Query[0];
    }
    return results.toArray(new Query[results.size()]);
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FileReader(java.io.FileReader) File(java.io.File)

Example 14 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class BagOfWordsG method generateQueries.

/**
	 * Generates a "bag of words" query from the keywords in the question
	 * string.
	 * 
	 * @param aq analyzed question
	 * @return <code>Query</code> objects
	 */
public Query[] generateQueries(AnalyzedQuestion aq) {
    // only generate a query if the answer type is known, predicates could
    // be extracted or the question is not a factoid question
    String[] ats = aq.getAnswerTypes();
    Predicate[] ps = aq.getPredicates();
    if (ats.length == 0 && ps.length == 0 && aq.isFactoid())
        return new Query[0];
    // create query string
    Term[] terms = aq.getTerms();
    String[] kws = aq.getKeywords();
    String queryString = getQueryString(terms, kws);
    // create query, set answer types
    Query[] queries = new Query[1];
    queries[0] = new Query(queryString, aq, SCORE);
    queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
    return queries;
}
Also used : Query(info.ephyra.querygeneration.Query) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 15 with Query

use of info.ephyra.querygeneration.Query in project lucida by claritylab.

the class WikipediaTermImportanceFilter method main.

public static void main(String[] args) {
    TEST_TERM_DOWMLOD = true;
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    //		// create sentence detector
    //		MsgPrinter.printStatusMsg("Creating sentence detector...");
    //		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    //		// create part of speech tagger
    //		MsgPrinter.printStatusMsg("Creating POS tagger...");
    //		if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
    //									 "res/nlp/postagger/opennlp/tagdict"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    //		// create chunker
    //		MsgPrinter.printStatusMsg("Creating chunker...");
    //		if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" +
    //								   "EnglishChunk.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create chunker.");
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //		if (!NETagger.loadNameFinders("res/nlp/netagger/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    //		if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
    //			MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    WikipediaTermImportanceFilter wtif = new WikipediaTermImportanceFilter(NO_NORMALIZATION, NO_NORMALIZATION, false);
    TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
    for (TRECTarget target : targets) {
        String question = target.getTargetDesc();
        // query generation
        MsgPrinter.printGeneratingQueries();
        String qn = QuestionNormalizer.normalize(question);
        // print normalized question string
        MsgPrinter.printNormalization(qn);
        // log normalized question string
        Logger.logNormalization(qn);
        String[] kws = KeywordExtractor.getKeywords(qn);
        AnalyzedQuestion aq = new AnalyzedQuestion(question);
        aq.setKeywords(kws);
        aq.setFactoid(false);
        Query[] queries = new BagOfWordsG().generateQueries(aq);
        for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
        Result[] results = new Result[1];
        results[0] = new Result("This would be the answer", queries[0]);
        wtif.apply(results);
    }
}
Also used : Query(info.ephyra.querygeneration.Query) TRECTarget(info.ephyra.trec.TRECTarget) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result)

Aggregations

Query (info.ephyra.querygeneration.Query)19 Result (info.ephyra.search.Result)11 ArrayList (java.util.ArrayList)8 Predicate (info.ephyra.nlp.semantics.Predicate)6 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)4 Term (info.ephyra.questionanalysis.Term)4 IOException (java.io.IOException)4 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)3 Hashtable (java.util.Hashtable)3 TRECTarget (info.ephyra.trec.TRECTarget)2 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 HashSet (java.util.HashSet)2 QuestionReformulator (info.ephyra.querygeneration.QuestionReformulator)1 QuestionInterpretationG (info.ephyra.querygeneration.generators.QuestionInterpretationG)1 FileCache (info.ephyra.util.FileCache)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 PrintWriter (java.io.PrintWriter)1