use of info.ephyra.search.searchers.IndriKM in project lucida by claritylab.
the class OpenEphyraCorpus method initFactoidCorpus.
/**
* Initializes the pipeline for factoid questions, using a local corpus as a
* knowledge source.
*/
protected void initFactoidCorpus() {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
for (String[] indriServers : IndriKM.getIndriServers()) Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
}
use of info.ephyra.search.searchers.IndriKM in project lucida by claritylab.
the class OpenEphyra method initFactoid.
/**
* Initializes the pipeline for factoid questions.
*/
protected void initFactoid() {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
// Search.addKnowledgeMiner(new YahooKM());
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
//AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
}
use of info.ephyra.search.searchers.IndriKM in project lucida by claritylab.
the class OpenEphyraServer method initFactoid.
/**
* Initializes the pipeline for factoid questions.
*/
protected void initFactoid() {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
/* Search.addKnowledgeAnnotator(new WikipediaKA("list.txt")); */
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
}
use of info.ephyra.search.searchers.IndriKM in project lucida by claritylab.
the class EphyraTREC13To16 method initOther.
// Layout 1
/**
* Initializes the pipeline for 'other' questions.
*/
protected void initOther() {
// query generation
QueryGeneration.clearQueryGenerators();
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
for (String[] indriServers : IndriKM.getIndriServers()) Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// initialize scores
AnswerSelection.addFilter(new ScoreResetterFilter());
// extract sentences from snippets
AnswerSelection.addFilter(new SentenceExtractionFilter());
// cut meaningless introductions from sentences
AnswerSelection.addFilter(new CutKeywordsFilter());
AnswerSelection.addFilter(new CutStatementProviderFilter());
AnswerSelection.addFilter(new SentenceSplitterFilter());
AnswerSelection.addFilter(new CutKeywordsFilter());
// remove duplicates
AnswerSelection.addFilter(new DuplicateSnippetFilter());
// throw out enumerations of proper names
AnswerSelection.addFilter(new ProperNameFilter());
// throw out direct speech snippets, rarely contain useful information
AnswerSelection.addFilter(new DirectSpeechFilter());
// sort out snippets containing no new terms
AnswerSelection.addFilter(new TermFilter());
AnswerSelection.addFilter(new WikipediaGoogleTermImportanceFilter(WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION, WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION, false));
AnswerSelection.addFilter(new ScoreSorterFilter());
// cut off result
AnswerSelection.addFilter(new ResultLengthFilter(3000));
}
Aggregations