use of info.ephyra.search.searchers.BingKM in project lucida by claritylab.
the class PatternLearner method init.
/**
* Initializes the pattern learning tool.
*/
public static void init() {
MsgPrinter.printInitializing();
// create tokenizer
MsgPrinter.printStatusMsg("Creating tokenizer...");
if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/" + "EnglishTok.bin.gz"))
MsgPrinter.printErrorMsg("Could not create tokenizer.");
// LingPipe.createTokenizer();
// create sentence detector
MsgPrinter.printStatusMsg("Creating sentence detector...");
if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/" + "opennlp/EnglishSD.bin.gz"))
MsgPrinter.printErrorMsg("Could not create sentence detector.");
// LingPipe.createSentenceDetector();
// create stemmer
MsgPrinter.printStatusMsg("Creating stemmer...");
SnowballStemmer.create();
// create part of speech tagger
MsgPrinter.printStatusMsg("Creating POS tagger...");
if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict"))
MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
// if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
// "train-wsj-0-18.holder"))
// MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
// create chunker
MsgPrinter.printStatusMsg("Creating chunker...");
if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" + "EnglishChunk.bin.gz"))
MsgPrinter.printErrorMsg("Could not create chunker.");
// create syntactic parser
// MsgPrinter.printStatusMsg("Creating syntactic parser...");
// if (!OpenNLP.createParser("res/nlp/syntacticparser/opennlp/"))
// MsgPrinter.printErrorMsg("Could not create OpenNLP parser.");
// try {
// StanfordParser.initialize();
// } catch (Exception e) {
// MsgPrinter.printErrorMsg("Could not create Stanford parser.");
// }
// create named entity taggers
MsgPrinter.printStatusMsg("Creating NE taggers...");
NETagger.loadListTaggers("res/nlp/netagger/lists/");
NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
MsgPrinter.printStatusMsg(" ...loading models");
// MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
MsgPrinter.printStatusMsg(" ...done");
// create linker
// MsgPrinter.printStatusMsg("Creating linker...");
// if (!OpenNLP.createLinker("res/nlp/corefresolver/opennlp/"))
// MsgPrinter.printErrorMsg("Could not create linker.");
// create WordNet dictionary
MsgPrinter.printStatusMsg("Creating WordNet dictionary...");
if (!WordNet.initialize("res/ontologies/wordnet/file_properties.xml"))
MsgPrinter.printErrorMsg("Could not create WordNet dictionary.");
// load function words (numbers are excluded)
MsgPrinter.printStatusMsg("Loading function verbs...");
if (!FunctionWords.loadIndex("res/indices/functionwords_nonumbers"))
MsgPrinter.printErrorMsg("Could not load function words.");
// load prepositions
MsgPrinter.printStatusMsg("Loading prepositions...");
if (!Prepositions.loadIndex("res/indices/prepositions"))
MsgPrinter.printErrorMsg("Could not load prepositions.");
// load irregular verbs
MsgPrinter.printStatusMsg("Loading irregular verbs...");
if (!IrregularVerbs.loadVerbs("res/indices/irregularverbs"))
MsgPrinter.printErrorMsg("Could not load irregular verbs.");
// load question patterns
MsgPrinter.printStatusMsg("Loading question patterns...");
if (!QuestionInterpreter.loadPatterns("res/patternlearning/" + "questionpatterns/"))
MsgPrinter.printErrorMsg("Could not load question patterns.");
// add knowledge miners used to fetch text passages for pattern learning
MsgPrinter.printStatusMsg("Adding BingKM...");
Search.addKnowledgeMiner(new BingKM());
// MsgPrinter.printStatusMsg("Adding GoogleKM...");
// Search.addKnowledgeMiner(new GoogleKM());
// MsgPrinter.printStatusMsg("Adding YahooKM...");
// Search.addKnowledgeMiner(new YahooKM());
// MsgPrinter.printStatusMsg("Adding IndriKMs...");
// for (String[] indriIndices : IndriKM.getIndriIndices())
// Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriKM(indriServers, true));
}
use of info.ephyra.search.searchers.BingKM in project lucida by claritylab.
the class OpenEphyraCorpus method initFactoidWeb.
/**
* Initializes the pipeline for factoid questions, using the Web as a
* knowledge source.
*
* @param resultsCorp results retrieved from the corpus
*/
protected void initFactoidWeb(Result[] resultsCorp) {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
Search.addKnowledgeMiner(new BingKM());
// Search.addKnowledgeMiner(new GoogleKM());
// Search.addKnowledgeMiner(new YahooKM());
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new AnswerProjectionFilter(resultsCorp));
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
AnswerSelection.addFilter(new ResultLengthFilter());
}
Aggregations