use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class PatternLearner method extract.
/**
* Loads target-context-answer-regex tuples from resource files, forms
* queries, fetches text passages, extracts answer patterns and writes them
* to resource files.
*
* @return <code>true</code>, iff the answer patterns could be extracted
*/
public static boolean extract() {
// load tuples and form queries
MsgPrinter.printFormingQueries();
ass = new Hashtable<String, String>();
regexs = new Hashtable<String, String>();
Query[] queries;
ArrayList<Query> queryList = new ArrayList<Query>();
queries = formQueries("res/patternlearning/interpretations");
for (Query query : queries) queryList.add(query);
queries = formQueries("res/patternlearning/interpretations_extract");
for (Query query : queries) queryList.add(query);
queries = queryList.toArray(new Query[queryList.size()]);
// fetch text passages
MsgPrinter.printFetchingPassages();
Result[] results = fetchPassages(queries);
// extract answer patterns
MsgPrinter.printExtractingPatterns();
extractPatterns(results);
// save answer patterns
MsgPrinter.printSavingPatterns();
return savePatterns("res/patternlearning/answerpatterns_extract");
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class PatternLearner method assess.
/**
* Loads target-context-answer-regex tuples and answer patterns from
* resource files, forms queries from the tuples, fetches text passages,
* assesses the answer patterns on the text passages and writes them to
* resource files.
*
* @return <code>true</code>, iff the answer patterns could be assessed
*/
public static boolean assess() {
// load answer patterns
MsgPrinter.printLoadingPatterns();
if (!loadPatterns("res/patternlearning/answerpatterns_extract"))
return false;
// load tuples and form queries
MsgPrinter.printFormingQueries();
ass = new Hashtable<String, String>();
regexs = new Hashtable<String, String>();
Query[] queries;
ArrayList<Query> queryList = new ArrayList<Query>();
queries = formQueries("res/patternlearning/interpretations");
for (Query query : queries) queryList.add(query);
queries = formQueries("res/patternlearning/interpretations_assess");
for (Query query : queries) queryList.add(query);
queries = queryList.toArray(new Query[queryList.size()]);
// fetch text passages
MsgPrinter.printFetchingPassages();
Result[] results = fetchPassages(queries);
// assess answer patterns
MsgPrinter.printAssessingPatterns();
assessPatterns(results);
// save answer patterns
MsgPrinter.printSavingPatterns();
return savePatterns("res/patternlearning/answerpatterns_assess");
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class PatternLearner method formQueries.
/**
* Loads target-context-answer-regex tuples from resource files and forms
* queries.
*
* @param dir directory containing the target-context-answer-regex tuples
* @return queries formed from the tuples
*/
private static Query[] formQueries(String dir) {
QuestionInterpretationG queryGenerator = new QuestionInterpretationG();
ArrayList<Query> results = new ArrayList<Query>();
File[] files = FileUtils.getFiles(dir);
BufferedReader in;
String[] tuple, context, kws;
String prop, line, target, as, regex, queryString;
QuestionInterpretation qi;
Query query;
try {
for (File file : files) {
prop = file.getName();
in = new BufferedReader(new FileReader(file));
while (in.ready()) {
line = in.readLine().trim();
if (line.length() == 0 || line.startsWith("//"))
// skip blank lines and comments
continue;
// extract interpretation, answer string and pattern
tuple = line.split("#", -1);
target = tuple[0];
context = new String[tuple.length - 3];
for (int i = 1; i < tuple.length - 2; i++) context[i - 1] = tuple[i];
as = tuple[tuple.length - 2];
regex = tuple[tuple.length - 1];
// complement answer string or regular expression
if (as.equals(""))
as = RegexConverter.regexToQueryStr(regex);
else if (regex.equals(""))
regex = RegexConverter.strToRegex(as);
// create query object
qi = new QuestionInterpretation(target, context, prop);
kws = new String[] { "\"" + as + "\"" };
queryString = queryGenerator.queryString(target, context, kws);
query = new Query(queryString, null, 0);
query.setInterpretation(qi);
// store query, answer and regular expression
results.add(query);
ass.put(queryString, as);
regexs.put(queryString, regex);
}
}
} catch (IOException e) {
return new Query[0];
}
return results.toArray(new Query[results.size()]);
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class BagOfWordsG method generateQueries.
/**
* Generates a "bag of words" query from the keywords in the question
* string.
*
* @param aq analyzed question
* @return <code>Query</code> objects
*/
public Query[] generateQueries(AnalyzedQuestion aq) {
// only generate a query if the answer type is known, predicates could
// be extracted or the question is not a factoid question
String[] ats = aq.getAnswerTypes();
Predicate[] ps = aq.getPredicates();
if (ats.length == 0 && ps.length == 0 && aq.isFactoid())
return new Query[0];
// create query string
Term[] terms = aq.getTerms();
String[] kws = aq.getKeywords();
String queryString = getQueryString(terms, kws);
// create query, set answer types
Query[] queries = new Query[1];
queries[0] = new Query(queryString, aq, SCORE);
queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
return queries;
}
use of info.ephyra.querygeneration.Query in project lucida by claritylab.
the class WikipediaTermImportanceFilter method main.
public static void main(String[] args) {
TEST_TERM_DOWMLOD = true;
MsgPrinter.enableStatusMsgs(true);
MsgPrinter.enableErrorMsgs(true);
// create tokenizer
MsgPrinter.printStatusMsg("Creating tokenizer...");
if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
MsgPrinter.printErrorMsg("Could not create tokenizer.");
// LingPipe.createTokenizer();
// // create sentence detector
// MsgPrinter.printStatusMsg("Creating sentence detector...");
// if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
// MsgPrinter.printErrorMsg("Could not create sentence detector.");
// LingPipe.createSentenceDetector();
// create stemmer
MsgPrinter.printStatusMsg("Creating stemmer...");
SnowballStemmer.create();
// // create part of speech tagger
// MsgPrinter.printStatusMsg("Creating POS tagger...");
// if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
// "res/nlp/postagger/opennlp/tagdict"))
// MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
// if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
// "train-wsj-0-18.holder"))
// MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
// // create chunker
// MsgPrinter.printStatusMsg("Creating chunker...");
// if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" +
// "EnglishChunk.bin.gz"))
// MsgPrinter.printErrorMsg("Could not create chunker.");
// create named entity taggers
MsgPrinter.printStatusMsg("Creating NE taggers...");
NETagger.loadListTaggers("res/nlp/netagger/lists/");
NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
MsgPrinter.printStatusMsg(" ...loading models");
// if (!NETagger.loadNameFinders("res/nlp/netagger/opennlp/"))
// MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
// if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
// MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
MsgPrinter.printStatusMsg(" ...done");
WikipediaTermImportanceFilter wtif = new WikipediaTermImportanceFilter(NO_NORMALIZATION, NO_NORMALIZATION, false);
TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
for (TRECTarget target : targets) {
String question = target.getTargetDesc();
// query generation
MsgPrinter.printGeneratingQueries();
String qn = QuestionNormalizer.normalize(question);
// print normalized question string
MsgPrinter.printNormalization(qn);
// log normalized question string
Logger.logNormalization(qn);
String[] kws = KeywordExtractor.getKeywords(qn);
AnalyzedQuestion aq = new AnalyzedQuestion(question);
aq.setKeywords(kws);
aq.setFactoid(false);
Query[] queries = new BagOfWordsG().generateQueries(aq);
for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
Result[] results = new Result[1];
results[0] = new Result("This would be the answer", queries[0]);
wtif.apply(results);
}
}
Aggregations