Search in sources :

Example 1 with BingKM

use of info.ephyra.search.searchers.BingKM in project lucida by claritylab.

the class PatternLearner method init.

/**
	 * Initializes the pattern learning tool.
	 */
public static void init() {
    MsgPrinter.printInitializing();
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/" + "EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    // create sentence detector
    MsgPrinter.printStatusMsg("Creating sentence detector...");
    if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/" + "opennlp/EnglishSD.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    // create part of speech tagger
    MsgPrinter.printStatusMsg("Creating POS tagger...");
    if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict"))
        MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    // create chunker
    MsgPrinter.printStatusMsg("Creating chunker...");
    if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" + "EnglishChunk.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create chunker.");
    // create syntactic parser
    //		MsgPrinter.printStatusMsg("Creating syntactic parser...");
    //		if (!OpenNLP.createParser("res/nlp/syntacticparser/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP parser.");
    //		try {
    //			StanfordParser.initialize();
    //		} catch (Exception e) {
    //			MsgPrinter.printErrorMsg("Could not create Stanford parser.");
    //		}
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
        MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    // create linker
    //		MsgPrinter.printStatusMsg("Creating linker...");
    //		if (!OpenNLP.createLinker("res/nlp/corefresolver/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create linker.");
    // create WordNet dictionary
    MsgPrinter.printStatusMsg("Creating WordNet dictionary...");
    if (!WordNet.initialize("res/ontologies/wordnet/file_properties.xml"))
        MsgPrinter.printErrorMsg("Could not create WordNet dictionary.");
    // load function words (numbers are excluded)
    MsgPrinter.printStatusMsg("Loading function verbs...");
    if (!FunctionWords.loadIndex("res/indices/functionwords_nonumbers"))
        MsgPrinter.printErrorMsg("Could not load function words.");
    // load prepositions
    MsgPrinter.printStatusMsg("Loading prepositions...");
    if (!Prepositions.loadIndex("res/indices/prepositions"))
        MsgPrinter.printErrorMsg("Could not load prepositions.");
    // load irregular verbs
    MsgPrinter.printStatusMsg("Loading irregular verbs...");
    if (!IrregularVerbs.loadVerbs("res/indices/irregularverbs"))
        MsgPrinter.printErrorMsg("Could not load irregular verbs.");
    // load question patterns
    MsgPrinter.printStatusMsg("Loading question patterns...");
    if (!QuestionInterpreter.loadPatterns("res/patternlearning/" + "questionpatterns/"))
        MsgPrinter.printErrorMsg("Could not load question patterns.");
    // add knowledge miners used to fetch text passages for pattern learning
    MsgPrinter.printStatusMsg("Adding BingKM...");
    Search.addKnowledgeMiner(new BingKM());
//		MsgPrinter.printStatusMsg("Adding GoogleKM...");
//		Search.addKnowledgeMiner(new GoogleKM());
//		MsgPrinter.printStatusMsg("Adding YahooKM...");
//		Search.addKnowledgeMiner(new YahooKM());
//		MsgPrinter.printStatusMsg("Adding IndriKMs...");
//		for (String[] indriIndices : IndriKM.getIndriIndices())
//			Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
//		for (String[] indriServers : IndriKM.getIndriServers())
//			Search.addKnowledgeMiner(new IndriKM(indriServers, true));
}
Also used : BingKM(info.ephyra.search.searchers.BingKM)

Example 2 with BingKM

use of info.ephyra.search.searchers.BingKM in project lucida by claritylab.

the class OpenEphyraCorpus method initFactoidWeb.

/**
	 * Initializes the pipeline for factoid questions, using the Web as a
	 * knowledge source.
	 * 
	 * @param resultsCorp results retrieved from the corpus
	 */
protected void initFactoidWeb(Result[] resultsCorp) {
    // question analysis
    Ontology wordNet = new WordNet();
    // - dictionaries for term extraction
    QuestionAnalysis.clearDictionaries();
    QuestionAnalysis.addDictionary(wordNet);
    // - ontologies for term expansion
    QuestionAnalysis.clearOntologies();
    QuestionAnalysis.addOntology(wordNet);
    // query generation
    QueryGeneration.clearQueryGenerators();
    QueryGeneration.addQueryGenerator(new BagOfWordsG());
    QueryGeneration.addQueryGenerator(new BagOfTermsG());
    QueryGeneration.addQueryGenerator(new PredicateG());
    QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
    QueryGeneration.addQueryGenerator(new QuestionReformulationG());
    // search
    // - knowledge miners for unstructured knowledge sources
    Search.clearKnowledgeMiners();
    Search.addKnowledgeMiner(new BingKM());
    //		Search.addKnowledgeMiner(new GoogleKM());
    //		Search.addKnowledgeMiner(new YahooKM());
    // - knowledge annotators for (semi-)structured knowledge sources
    Search.clearKnowledgeAnnotators();
    // answer extraction and selection
    // (the filters are applied in this order)
    AnswerSelection.clearFilters();
    // - answer extraction filters
    AnswerSelection.addFilter(new AnswerTypeFilter());
    AnswerSelection.addFilter(new AnswerPatternFilter());
    AnswerSelection.addFilter(new WebDocumentFetcherFilter());
    AnswerSelection.addFilter(new PredicateExtractionFilter());
    AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
    AnswerSelection.addFilter(new TruncationFilter());
    // - answer selection filters
    AnswerSelection.addFilter(new StopwordFilter());
    AnswerSelection.addFilter(new QuestionKeywordsFilter());
    AnswerSelection.addFilter(new AnswerProjectionFilter(resultsCorp));
    AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
    AnswerSelection.addFilter(new ScoreCombinationFilter());
    AnswerSelection.addFilter(new FactoidSubsetFilter());
    AnswerSelection.addFilter(new DuplicateFilter());
    AnswerSelection.addFilter(new ScoreSorterFilter());
    AnswerSelection.addFilter(new ResultLengthFilter());
}
Also used : ScoreCombinationFilter(info.ephyra.answerselection.filters.ScoreCombinationFilter) ScoreSorterFilter(info.ephyra.answerselection.filters.ScoreSorterFilter) Ontology(info.ephyra.nlp.semantics.ontologies.Ontology) AnswerPatternFilter(info.ephyra.answerselection.filters.AnswerPatternFilter) PredicateExtractionFilter(info.ephyra.answerselection.filters.PredicateExtractionFilter) ScoreNormalizationFilter(info.ephyra.answerselection.filters.ScoreNormalizationFilter) WebDocumentFetcherFilter(info.ephyra.answerselection.filters.WebDocumentFetcherFilter) StopwordFilter(info.ephyra.answerselection.filters.StopwordFilter) TruncationFilter(info.ephyra.answerselection.filters.TruncationFilter) WordNet(info.ephyra.nlp.semantics.ontologies.WordNet) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) PredicateG(info.ephyra.querygeneration.generators.PredicateG) AnswerTypeFilter(info.ephyra.answerselection.filters.AnswerTypeFilter) ResultLengthFilter(info.ephyra.answerselection.filters.ResultLengthFilter) QuestionReformulationG(info.ephyra.querygeneration.generators.QuestionReformulationG) QuestionKeywordsFilter(info.ephyra.answerselection.filters.QuestionKeywordsFilter) DuplicateFilter(info.ephyra.answerselection.filters.DuplicateFilter) FactoidSubsetFilter(info.ephyra.answerselection.filters.FactoidSubsetFilter) BagOfTermsG(info.ephyra.querygeneration.generators.BagOfTermsG) BingKM(info.ephyra.search.searchers.BingKM) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FactoidsFromPredicatesFilter(info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter) AnswerProjectionFilter(info.ephyra.answerselection.filters.AnswerProjectionFilter)

Aggregations

BingKM (info.ephyra.search.searchers.BingKM)2 AnswerPatternFilter (info.ephyra.answerselection.filters.AnswerPatternFilter)1 AnswerProjectionFilter (info.ephyra.answerselection.filters.AnswerProjectionFilter)1 AnswerTypeFilter (info.ephyra.answerselection.filters.AnswerTypeFilter)1 DuplicateFilter (info.ephyra.answerselection.filters.DuplicateFilter)1 FactoidSubsetFilter (info.ephyra.answerselection.filters.FactoidSubsetFilter)1 FactoidsFromPredicatesFilter (info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter)1 PredicateExtractionFilter (info.ephyra.answerselection.filters.PredicateExtractionFilter)1 QuestionKeywordsFilter (info.ephyra.answerselection.filters.QuestionKeywordsFilter)1 ResultLengthFilter (info.ephyra.answerselection.filters.ResultLengthFilter)1 ScoreCombinationFilter (info.ephyra.answerselection.filters.ScoreCombinationFilter)1 ScoreNormalizationFilter (info.ephyra.answerselection.filters.ScoreNormalizationFilter)1 ScoreSorterFilter (info.ephyra.answerselection.filters.ScoreSorterFilter)1 StopwordFilter (info.ephyra.answerselection.filters.StopwordFilter)1 TruncationFilter (info.ephyra.answerselection.filters.TruncationFilter)1 WebDocumentFetcherFilter (info.ephyra.answerselection.filters.WebDocumentFetcherFilter)1 Ontology (info.ephyra.nlp.semantics.ontologies.Ontology)1 WordNet (info.ephyra.nlp.semantics.ontologies.WordNet)1 BagOfTermsG (info.ephyra.querygeneration.generators.BagOfTermsG)1 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)1