Search in sources :

Example 1 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class OpenEphyraCorpus method initFactoidCorpus.

/**
	 * Initializes the pipeline for factoid questions, using a local corpus as a
	 * knowledge source.
	 */
protected void initFactoidCorpus() {
    // question analysis
    Ontology wordNet = new WordNet();
    // - dictionaries for term extraction
    QuestionAnalysis.clearDictionaries();
    QuestionAnalysis.addDictionary(wordNet);
    // - ontologies for term expansion
    QuestionAnalysis.clearOntologies();
    QuestionAnalysis.addOntology(wordNet);
    // query generation
    QueryGeneration.clearQueryGenerators();
    QueryGeneration.addQueryGenerator(new BagOfWordsG());
    QueryGeneration.addQueryGenerator(new BagOfTermsG());
    QueryGeneration.addQueryGenerator(new PredicateG());
    QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
    QueryGeneration.addQueryGenerator(new QuestionReformulationG());
    // search
    // - knowledge miners for unstructured knowledge sources
    Search.clearKnowledgeMiners();
    for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
    for (String[] indriServers : IndriKM.getIndriServers()) Search.addKnowledgeMiner(new IndriKM(indriServers, true));
    // - knowledge annotators for (semi-)structured knowledge sources
    Search.clearKnowledgeAnnotators();
    // answer extraction and selection
    // (the filters are applied in this order)
    AnswerSelection.clearFilters();
    // - answer extraction filters
    AnswerSelection.addFilter(new AnswerTypeFilter());
    AnswerSelection.addFilter(new AnswerPatternFilter());
    AnswerSelection.addFilter(new WebDocumentFetcherFilter());
    AnswerSelection.addFilter(new PredicateExtractionFilter());
    AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
    AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
}
Also used : Ontology(info.ephyra.nlp.semantics.ontologies.Ontology) AnswerPatternFilter(info.ephyra.answerselection.filters.AnswerPatternFilter) PredicateExtractionFilter(info.ephyra.answerselection.filters.PredicateExtractionFilter) WebDocumentFetcherFilter(info.ephyra.answerselection.filters.WebDocumentFetcherFilter) IndriKM(info.ephyra.search.searchers.IndriKM) TruncationFilter(info.ephyra.answerselection.filters.TruncationFilter) WordNet(info.ephyra.nlp.semantics.ontologies.WordNet) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) PredicateG(info.ephyra.querygeneration.generators.PredicateG) AnswerTypeFilter(info.ephyra.answerselection.filters.AnswerTypeFilter) QuestionReformulationG(info.ephyra.querygeneration.generators.QuestionReformulationG) BagOfTermsG(info.ephyra.querygeneration.generators.BagOfTermsG) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FactoidsFromPredicatesFilter(info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter)

Example 2 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class OpenEphyra method initFactoid.

/**
	 * Initializes the pipeline for factoid questions.
	 */
protected void initFactoid() {
    // question analysis
    Ontology wordNet = new WordNet();
    // - dictionaries for term extraction
    QuestionAnalysis.clearDictionaries();
    QuestionAnalysis.addDictionary(wordNet);
    // - ontologies for term expansion
    QuestionAnalysis.clearOntologies();
    QuestionAnalysis.addOntology(wordNet);
    // query generation
    QueryGeneration.clearQueryGenerators();
    QueryGeneration.addQueryGenerator(new BagOfWordsG());
    QueryGeneration.addQueryGenerator(new BagOfTermsG());
    QueryGeneration.addQueryGenerator(new PredicateG());
    QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
    QueryGeneration.addQueryGenerator(new QuestionReformulationG());
    // search
    // - knowledge miners for unstructured knowledge sources
    Search.clearKnowledgeMiners();
    //		Search.addKnowledgeMiner(new YahooKM());
    for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
    //		for (String[] indriServers : IndriKM.getIndriServers())
    //			Search.addKnowledgeMiner(new IndriKM(indriServers, true));
    // - knowledge annotators for (semi-)structured knowledge sources
    Search.clearKnowledgeAnnotators();
    // answer extraction and selection
    // (the filters are applied in this order)
    AnswerSelection.clearFilters();
    // - answer extraction filters
    AnswerSelection.addFilter(new AnswerTypeFilter());
    AnswerSelection.addFilter(new AnswerPatternFilter());
    //AnswerSelection.addFilter(new WebDocumentFetcherFilter());
    AnswerSelection.addFilter(new PredicateExtractionFilter());
    AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
    AnswerSelection.addFilter(new TruncationFilter());
    // - answer selection filters
    AnswerSelection.addFilter(new StopwordFilter());
    AnswerSelection.addFilter(new QuestionKeywordsFilter());
    AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
    AnswerSelection.addFilter(new ScoreCombinationFilter());
    AnswerSelection.addFilter(new FactoidSubsetFilter());
    AnswerSelection.addFilter(new DuplicateFilter());
    AnswerSelection.addFilter(new ScoreSorterFilter());
}
Also used : ScoreCombinationFilter(info.ephyra.answerselection.filters.ScoreCombinationFilter) ScoreSorterFilter(info.ephyra.answerselection.filters.ScoreSorterFilter) Ontology(info.ephyra.nlp.semantics.ontologies.Ontology) AnswerPatternFilter(info.ephyra.answerselection.filters.AnswerPatternFilter) PredicateExtractionFilter(info.ephyra.answerselection.filters.PredicateExtractionFilter) ScoreNormalizationFilter(info.ephyra.answerselection.filters.ScoreNormalizationFilter) IndriKM(info.ephyra.search.searchers.IndriKM) StopwordFilter(info.ephyra.answerselection.filters.StopwordFilter) TruncationFilter(info.ephyra.answerselection.filters.TruncationFilter) WordNet(info.ephyra.nlp.semantics.ontologies.WordNet) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) PredicateG(info.ephyra.querygeneration.generators.PredicateG) AnswerTypeFilter(info.ephyra.answerselection.filters.AnswerTypeFilter) QuestionReformulationG(info.ephyra.querygeneration.generators.QuestionReformulationG) QuestionKeywordsFilter(info.ephyra.answerselection.filters.QuestionKeywordsFilter) DuplicateFilter(info.ephyra.answerselection.filters.DuplicateFilter) FactoidSubsetFilter(info.ephyra.answerselection.filters.FactoidSubsetFilter) BagOfTermsG(info.ephyra.querygeneration.generators.BagOfTermsG) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FactoidsFromPredicatesFilter(info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter)

Example 3 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class WebTermImportanceFilter method main.

public static void main(String[] args) {
    TEST_TARGET_GENERATION = true;
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    // create sentence detector
    //		MsgPrinter.printStatusMsg("Creating sentence detector...");
    //		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    // create part of speech tagger
    MsgPrinter.printStatusMsg("Creating POS tagger...");
    if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict"))
        MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    // create chunker
    MsgPrinter.printStatusMsg("Creating chunker...");
    if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" + "EnglishChunk.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create chunker.");
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
        MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    WebTermImportanceFilter wtif = new TargetGeneratorTest(NO_NORMALIZATION);
    TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
    for (TRECTarget target : targets) {
        String question = target.getTargetDesc();
        // query generation
        MsgPrinter.printGeneratingQueries();
        String qn = QuestionNormalizer.normalize(question);
        // print normalized question string
        MsgPrinter.printNormalization(qn);
        // log normalized question string
        Logger.logNormalization(qn);
        String[] kws = KeywordExtractor.getKeywords(qn);
        AnalyzedQuestion aq = new AnalyzedQuestion(question);
        aq.setKeywords(kws);
        aq.setFactoid(false);
        Query[] queries = new BagOfWordsG().generateQueries(aq);
        for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
        Result[] results = new Result[1];
        results[0] = new Result("This would be the answer", queries[0]);
        wtif.apply(results);
    }
}
Also used : Query(info.ephyra.querygeneration.Query) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result) TRECTarget(info.ephyra.trec.TRECTarget) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion)

Example 4 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class OpenEphyraServer method initFactoid.

/**
	 * Initializes the pipeline for factoid questions.
	 */
protected void initFactoid() {
    // question analysis
    Ontology wordNet = new WordNet();
    // - dictionaries for term extraction
    QuestionAnalysis.clearDictionaries();
    QuestionAnalysis.addDictionary(wordNet);
    // - ontologies for term expansion
    QuestionAnalysis.clearOntologies();
    QuestionAnalysis.addOntology(wordNet);
    // query generation
    QueryGeneration.clearQueryGenerators();
    QueryGeneration.addQueryGenerator(new BagOfWordsG());
    QueryGeneration.addQueryGenerator(new BagOfTermsG());
    QueryGeneration.addQueryGenerator(new PredicateG());
    QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
    QueryGeneration.addQueryGenerator(new QuestionReformulationG());
    // search
    // - knowledge miners for unstructured knowledge sources
    Search.clearKnowledgeMiners();
    for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
    // - knowledge annotators for (semi-)structured knowledge sources
    Search.clearKnowledgeAnnotators();
    /* Search.addKnowledgeAnnotator(new WikipediaKA("list.txt")); */
    // answer extraction and selection
    // (the filters are applied in this order)
    AnswerSelection.clearFilters();
    // - answer extraction filters
    AnswerSelection.addFilter(new AnswerTypeFilter());
    AnswerSelection.addFilter(new AnswerPatternFilter());
    AnswerSelection.addFilter(new PredicateExtractionFilter());
    AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
    AnswerSelection.addFilter(new TruncationFilter());
    // - answer selection filters
    AnswerSelection.addFilter(new StopwordFilter());
    AnswerSelection.addFilter(new QuestionKeywordsFilter());
    AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
    AnswerSelection.addFilter(new ScoreCombinationFilter());
    AnswerSelection.addFilter(new FactoidSubsetFilter());
    AnswerSelection.addFilter(new DuplicateFilter());
    AnswerSelection.addFilter(new ScoreSorterFilter());
}
Also used : ScoreCombinationFilter(info.ephyra.answerselection.filters.ScoreCombinationFilter) ScoreSorterFilter(info.ephyra.answerselection.filters.ScoreSorterFilter) Ontology(info.ephyra.nlp.semantics.ontologies.Ontology) AnswerPatternFilter(info.ephyra.answerselection.filters.AnswerPatternFilter) PredicateExtractionFilter(info.ephyra.answerselection.filters.PredicateExtractionFilter) ScoreNormalizationFilter(info.ephyra.answerselection.filters.ScoreNormalizationFilter) IndriKM(info.ephyra.search.searchers.IndriKM) StopwordFilter(info.ephyra.answerselection.filters.StopwordFilter) TruncationFilter(info.ephyra.answerselection.filters.TruncationFilter) WordNet(info.ephyra.nlp.semantics.ontologies.WordNet) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) PredicateG(info.ephyra.querygeneration.generators.PredicateG) AnswerTypeFilter(info.ephyra.answerselection.filters.AnswerTypeFilter) QuestionReformulationG(info.ephyra.querygeneration.generators.QuestionReformulationG) QuestionKeywordsFilter(info.ephyra.answerselection.filters.QuestionKeywordsFilter) DuplicateFilter(info.ephyra.answerselection.filters.DuplicateFilter) FactoidSubsetFilter(info.ephyra.answerselection.filters.FactoidSubsetFilter) BagOfTermsG(info.ephyra.querygeneration.generators.BagOfTermsG) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FactoidsFromPredicatesFilter(info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter)

Example 5 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class WikipediaTermImportanceFilter method main.

public static void main(String[] args) {
    TEST_TERM_DOWMLOD = true;
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    //		// create sentence detector
    //		MsgPrinter.printStatusMsg("Creating sentence detector...");
    //		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    //		// create part of speech tagger
    //		MsgPrinter.printStatusMsg("Creating POS tagger...");
    //		if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
    //									 "res/nlp/postagger/opennlp/tagdict"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    //		// create chunker
    //		MsgPrinter.printStatusMsg("Creating chunker...");
    //		if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" +
    //								   "EnglishChunk.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create chunker.");
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //		if (!NETagger.loadNameFinders("res/nlp/netagger/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    //		if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
    //			MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    WikipediaTermImportanceFilter wtif = new WikipediaTermImportanceFilter(NO_NORMALIZATION, NO_NORMALIZATION, false);
    TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
    for (TRECTarget target : targets) {
        String question = target.getTargetDesc();
        // query generation
        MsgPrinter.printGeneratingQueries();
        String qn = QuestionNormalizer.normalize(question);
        // print normalized question string
        MsgPrinter.printNormalization(qn);
        // log normalized question string
        Logger.logNormalization(qn);
        String[] kws = KeywordExtractor.getKeywords(qn);
        AnalyzedQuestion aq = new AnalyzedQuestion(question);
        aq.setKeywords(kws);
        aq.setFactoid(false);
        Query[] queries = new BagOfWordsG().generateQueries(aq);
        for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
        Result[] results = new Result[1];
        results[0] = new Result("This would be the answer", queries[0]);
        wtif.apply(results);
    }
}
Also used : Query(info.ephyra.querygeneration.Query) TRECTarget(info.ephyra.trec.TRECTarget) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result)

Aggregations

BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)7 AnswerPatternFilter (info.ephyra.answerselection.filters.AnswerPatternFilter)4 AnswerTypeFilter (info.ephyra.answerselection.filters.AnswerTypeFilter)4 FactoidsFromPredicatesFilter (info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter)4 PredicateExtractionFilter (info.ephyra.answerselection.filters.PredicateExtractionFilter)4 TruncationFilter (info.ephyra.answerselection.filters.TruncationFilter)4 Ontology (info.ephyra.nlp.semantics.ontologies.Ontology)4 WordNet (info.ephyra.nlp.semantics.ontologies.WordNet)4 BagOfTermsG (info.ephyra.querygeneration.generators.BagOfTermsG)4 PredicateG (info.ephyra.querygeneration.generators.PredicateG)4 QuestionInterpretationG (info.ephyra.querygeneration.generators.QuestionInterpretationG)4 QuestionReformulationG (info.ephyra.querygeneration.generators.QuestionReformulationG)4 DuplicateFilter (info.ephyra.answerselection.filters.DuplicateFilter)3 FactoidSubsetFilter (info.ephyra.answerselection.filters.FactoidSubsetFilter)3 QuestionKeywordsFilter (info.ephyra.answerselection.filters.QuestionKeywordsFilter)3 ScoreCombinationFilter (info.ephyra.answerselection.filters.ScoreCombinationFilter)3 ScoreNormalizationFilter (info.ephyra.answerselection.filters.ScoreNormalizationFilter)3 ScoreSorterFilter (info.ephyra.answerselection.filters.ScoreSorterFilter)3 StopwordFilter (info.ephyra.answerselection.filters.StopwordFilter)3 Query (info.ephyra.querygeneration.Query)3