use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.
the class EphyraTREC13To16 method askOther.
// Layout 2
// /**
// * Initializes the pipeline for 'other' questions.
// */
// protected void initOther() {
// // query generation
// QueryGeneration.clearQueryGenerators();
//
// // search
// // - knowledge miners for unstructured knowledge sources
// Search.clearKnowledgeMiners();
// for (String[] indriIndices : IndriKM.getIndriIndices())
// Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// // - knowledge annotators for (semi-)structured knowledge sources
// Search.clearKnowledgeAnnotators();
//
// // answer extraction and selection
// // (the filters are applied in this order)
// AnswerSelection.clearFilters();
//
// // initialize scores
// AnswerSelection.addFilter(new ScoreResetterFilter());
//
// // extract sentences from snippets
// AnswerSelection.addFilter(new SentenceExtractionFilter());
//
// // cut meaningless introductions from sentences
// AnswerSelection.addFilter(new CutKeywordsFilter());
// AnswerSelection.addFilter(new CutStatementProviderFilter());
// AnswerSelection.addFilter(new SentenceSplitterFilter());
// AnswerSelection.addFilter(new CutKeywordsFilter());
//
// // remove duplicates
// AnswerSelection.addFilter(new DuplicateSnippetFilter());
//
// // throw out enumerations of proper names
// AnswerSelection.addFilter(new ProperNameFilter());
//
// // throw out direct speech snippets, rarely contain useful information
// AnswerSelection.addFilter(new DirectSpeechFilter());
//
// AnswerSelection.addFilter(
// new WikipediaGoogleWebTermImportanceFilter(
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// false
// )
// );
// AnswerSelection.addFilter(new ScoreSorterFilter());
//
// // cut off result
// AnswerSelection.addFilter(new ResultLengthFilter(3000));
// }
// Layout 3
// /**
// * Initializes the pipeline for 'other' questions.
// */
// protected void initOther() {
// // query generation
// QueryGeneration.clearQueryGenerators();
//
// // search
// // - knowledge miners for unstructured knowledge sources
// Search.clearKnowledgeMiners();
// for (String[] indriIndices : IndriKM.getIndriIndices())
// Search.addKnowledgeMiner(new IndriDocumentKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriDocumentKM(indriServers, true));
// // - knowledge annotators for (semi-)structured knowledge sources
// Search.clearKnowledgeAnnotators();
//
// // answer extraction and selection
// // (the filters are applied in this order)
// AnswerSelection.clearFilters();
//
// // initialize scores
// AnswerSelection.addFilter(new ScoreResetterFilter());
//
// // extract sentences from snippets
// AnswerSelection.addFilter(new SentenceExtractionFilter());
//
// // cut meaningless introductions from sentences
// AnswerSelection.addFilter(new CutKeywordsFilter());
// AnswerSelection.addFilter(new CutStatementProviderFilter());
// AnswerSelection.addFilter(new SentenceSplitterFilter());
// AnswerSelection.addFilter(new CutKeywordsFilter());
//
// // remove duplicates
// AnswerSelection.addFilter(new DuplicateSnippetFilter());
//
// // throw out enumerations of proper names
// AnswerSelection.addFilter(new ProperNameFilter());
//
// // throw out direct speech snippets, rarely contain useful information
// AnswerSelection.addFilter(new DirectSpeechFilter());
//
// // sort out snippets containing no new terms
// AnswerSelection.addFilter(new TermFilter());
//
// AnswerSelection.addFilter(
// new WikipediaGoogleWebTermImportanceFilter(
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
// false
// )
// );
// AnswerSelection.addFilter(new ScoreSorterFilter());
//
// // cut off result
// AnswerSelection.addFilter(new ResultLengthFilter(3000));
// }
/**
* Asks Ephyra an 'other' question.
*
* @param question other question
* @return array of results
*/
public final Result[] askOther(String question) {
// initialize pipeline
initOther();
// query generation
MsgPrinter.printGeneratingQueries();
String qn = QuestionNormalizer.normalize(question);
// print normalized question string
MsgPrinter.printNormalization(qn);
// log normalized question string
Logger.logNormalization(qn);
String[] kws = KeywordExtractor.getKeywords(qn);
AnalyzedQuestion aq = new AnalyzedQuestion(question);
aq.setKeywords(kws);
aq.setFactoid(false);
BagOfWordsG gen = new BagOfWordsG();
Query[] queries = gen.generateQueries(aq);
for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
// print query strings
MsgPrinter.printQueryStrings(queries);
// log query strings
Logger.logQueryStrings(queries);
// search
MsgPrinter.printSearching();
Result[] results = Search.doSearch(queries);
// answer selection
MsgPrinter.printSelectingAnswers();
results = AnswerSelection.getResults(results, Integer.MAX_VALUE, 0);
return results;
}
use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.
the class OpenEphyraCorpus method initFactoidWeb.
/**
* Initializes the pipeline for factoid questions, using the Web as a
* knowledge source.
*
* @param resultsCorp results retrieved from the corpus
*/
protected void initFactoidWeb(Result[] resultsCorp) {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
Search.addKnowledgeMiner(new BingKM());
// Search.addKnowledgeMiner(new GoogleKM());
// Search.addKnowledgeMiner(new YahooKM());
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new AnswerProjectionFilter(resultsCorp));
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
AnswerSelection.addFilter(new ResultLengthFilter());
}
Aggregations