Search in sources :

Example 6 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class EphyraTREC13To16 method askOther.

// Layout 2
//	/**
//	 * Initializes the pipeline for 'other' questions.
//	 */
//	protected void initOther() {
//		// query generation
//		QueryGeneration.clearQueryGenerators();
//		
//		// search
//		// - knowledge miners for unstructured knowledge sources
//		Search.clearKnowledgeMiners();
//		for (String[] indriIndices : IndriKM.getIndriIndices())
//			Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
//		for (String[] indriServers : IndriKM.getIndriServers())
//			Search.addKnowledgeMiner(new IndriKM(indriServers, true));
//		// - knowledge annotators for (semi-)structured knowledge sources
//		Search.clearKnowledgeAnnotators();
//		
//		// answer extraction and selection
//		// (the filters are applied in this order)
//		AnswerSelection.clearFilters();
//		
//		//	initialize scores
//		AnswerSelection.addFilter(new ScoreResetterFilter());
//		
//		//	extract sentences from snippets
//		AnswerSelection.addFilter(new SentenceExtractionFilter());
//		
//		//	cut meaningless introductions from sentences
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		AnswerSelection.addFilter(new CutStatementProviderFilter());
//		AnswerSelection.addFilter(new SentenceSplitterFilter());
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		
//		//	remove duplicates
//		AnswerSelection.addFilter(new DuplicateSnippetFilter());
//		
//		//	throw out enumerations of proper names
//		AnswerSelection.addFilter(new ProperNameFilter());
//		
//		//	throw out direct speech snippets, rarely contain useful information
//		AnswerSelection.addFilter(new DirectSpeechFilter());
//		
//		AnswerSelection.addFilter(
//				new WikipediaGoogleWebTermImportanceFilter(
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					false
//				)
//			);
//		AnswerSelection.addFilter(new ScoreSorterFilter());
//		
//		//	cut off result
//		AnswerSelection.addFilter(new ResultLengthFilter(3000));
//	}
// Layout 3
//	/**
//	 * Initializes the pipeline for 'other' questions.
//	 */
//	protected void initOther() {
//		// query generation
//		QueryGeneration.clearQueryGenerators();
//		
//		// search
//		// - knowledge miners for unstructured knowledge sources
//		Search.clearKnowledgeMiners();
//		for (String[] indriIndices : IndriKM.getIndriIndices())
//			Search.addKnowledgeMiner(new IndriDocumentKM(indriIndices, false));
//		for (String[] indriServers : IndriKM.getIndriServers())
//			Search.addKnowledgeMiner(new IndriDocumentKM(indriServers, true));
//		// - knowledge annotators for (semi-)structured knowledge sources
//		Search.clearKnowledgeAnnotators();
//		
//		// answer extraction and selection
//		// (the filters are applied in this order)
//		AnswerSelection.clearFilters();
//		
//		//	initialize scores
//		AnswerSelection.addFilter(new ScoreResetterFilter());
//		
//		//	extract sentences from snippets
//		AnswerSelection.addFilter(new SentenceExtractionFilter());
//		
//		//	cut meaningless introductions from sentences
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		AnswerSelection.addFilter(new CutStatementProviderFilter());
//		AnswerSelection.addFilter(new SentenceSplitterFilter());
//		AnswerSelection.addFilter(new CutKeywordsFilter());
//		
//		//	remove duplicates
//		AnswerSelection.addFilter(new DuplicateSnippetFilter());
//		
//		//	throw out enumerations of proper names
//		AnswerSelection.addFilter(new ProperNameFilter());
//		
//		//	throw out direct speech snippets, rarely contain useful information
//		AnswerSelection.addFilter(new DirectSpeechFilter());
//		
//		//	sort out snippets containing no new terms
//		AnswerSelection.addFilter(new TermFilter());
//		
//		AnswerSelection.addFilter(
//				new WikipediaGoogleWebTermImportanceFilter(
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					WebTermImportanceFilter.LOG_LENGTH_NORMALIZATION,
//					false
//				)
//			);
//		AnswerSelection.addFilter(new ScoreSorterFilter());
//		
//		//	cut off result
//		AnswerSelection.addFilter(new ResultLengthFilter(3000));
//	}
/**
	 * Asks Ephyra an 'other' question.
	 * 
	 * @param question other question
	 * @return array of results
	 */
public final Result[] askOther(String question) {
    // initialize pipeline
    initOther();
    // query generation
    MsgPrinter.printGeneratingQueries();
    String qn = QuestionNormalizer.normalize(question);
    // print normalized question string
    MsgPrinter.printNormalization(qn);
    // log normalized question string
    Logger.logNormalization(qn);
    String[] kws = KeywordExtractor.getKeywords(qn);
    AnalyzedQuestion aq = new AnalyzedQuestion(question);
    aq.setKeywords(kws);
    aq.setFactoid(false);
    BagOfWordsG gen = new BagOfWordsG();
    Query[] queries = gen.generateQueries(aq);
    for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
    // print query strings
    MsgPrinter.printQueryStrings(queries);
    // log query strings
    Logger.logQueryStrings(queries);
    // search
    MsgPrinter.printSearching();
    Result[] results = Search.doSearch(queries);
    // answer selection
    MsgPrinter.printSelectingAnswers();
    results = AnswerSelection.getResults(results, Integer.MAX_VALUE, 0);
    return results;
}
Also used : Query(info.ephyra.querygeneration.Query) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result)

Example 7 with BagOfWordsG

use of info.ephyra.querygeneration.generators.BagOfWordsG in project lucida by claritylab.

the class OpenEphyraCorpus method initFactoidWeb.

/**
	 * Initializes the pipeline for factoid questions, using the Web as a
	 * knowledge source.
	 * 
	 * @param resultsCorp results retrieved from the corpus
	 */
protected void initFactoidWeb(Result[] resultsCorp) {
    // question analysis
    Ontology wordNet = new WordNet();
    // - dictionaries for term extraction
    QuestionAnalysis.clearDictionaries();
    QuestionAnalysis.addDictionary(wordNet);
    // - ontologies for term expansion
    QuestionAnalysis.clearOntologies();
    QuestionAnalysis.addOntology(wordNet);
    // query generation
    QueryGeneration.clearQueryGenerators();
    QueryGeneration.addQueryGenerator(new BagOfWordsG());
    QueryGeneration.addQueryGenerator(new BagOfTermsG());
    QueryGeneration.addQueryGenerator(new PredicateG());
    QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
    QueryGeneration.addQueryGenerator(new QuestionReformulationG());
    // search
    // - knowledge miners for unstructured knowledge sources
    Search.clearKnowledgeMiners();
    Search.addKnowledgeMiner(new BingKM());
    //		Search.addKnowledgeMiner(new GoogleKM());
    //		Search.addKnowledgeMiner(new YahooKM());
    // - knowledge annotators for (semi-)structured knowledge sources
    Search.clearKnowledgeAnnotators();
    // answer extraction and selection
    // (the filters are applied in this order)
    AnswerSelection.clearFilters();
    // - answer extraction filters
    AnswerSelection.addFilter(new AnswerTypeFilter());
    AnswerSelection.addFilter(new AnswerPatternFilter());
    AnswerSelection.addFilter(new WebDocumentFetcherFilter());
    AnswerSelection.addFilter(new PredicateExtractionFilter());
    AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
    AnswerSelection.addFilter(new TruncationFilter());
    // - answer selection filters
    AnswerSelection.addFilter(new StopwordFilter());
    AnswerSelection.addFilter(new QuestionKeywordsFilter());
    AnswerSelection.addFilter(new AnswerProjectionFilter(resultsCorp));
    AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
    AnswerSelection.addFilter(new ScoreCombinationFilter());
    AnswerSelection.addFilter(new FactoidSubsetFilter());
    AnswerSelection.addFilter(new DuplicateFilter());
    AnswerSelection.addFilter(new ScoreSorterFilter());
    AnswerSelection.addFilter(new ResultLengthFilter());
}
Also used : ScoreCombinationFilter(info.ephyra.answerselection.filters.ScoreCombinationFilter) ScoreSorterFilter(info.ephyra.answerselection.filters.ScoreSorterFilter) Ontology(info.ephyra.nlp.semantics.ontologies.Ontology) AnswerPatternFilter(info.ephyra.answerselection.filters.AnswerPatternFilter) PredicateExtractionFilter(info.ephyra.answerselection.filters.PredicateExtractionFilter) ScoreNormalizationFilter(info.ephyra.answerselection.filters.ScoreNormalizationFilter) WebDocumentFetcherFilter(info.ephyra.answerselection.filters.WebDocumentFetcherFilter) StopwordFilter(info.ephyra.answerselection.filters.StopwordFilter) TruncationFilter(info.ephyra.answerselection.filters.TruncationFilter) WordNet(info.ephyra.nlp.semantics.ontologies.WordNet) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) PredicateG(info.ephyra.querygeneration.generators.PredicateG) AnswerTypeFilter(info.ephyra.answerselection.filters.AnswerTypeFilter) ResultLengthFilter(info.ephyra.answerselection.filters.ResultLengthFilter) QuestionReformulationG(info.ephyra.querygeneration.generators.QuestionReformulationG) QuestionKeywordsFilter(info.ephyra.answerselection.filters.QuestionKeywordsFilter) DuplicateFilter(info.ephyra.answerselection.filters.DuplicateFilter) FactoidSubsetFilter(info.ephyra.answerselection.filters.FactoidSubsetFilter) BagOfTermsG(info.ephyra.querygeneration.generators.BagOfTermsG) BingKM(info.ephyra.search.searchers.BingKM) QuestionInterpretationG(info.ephyra.querygeneration.generators.QuestionInterpretationG) FactoidsFromPredicatesFilter(info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter) AnswerProjectionFilter(info.ephyra.answerselection.filters.AnswerProjectionFilter)

Aggregations

BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)7 AnswerPatternFilter (info.ephyra.answerselection.filters.AnswerPatternFilter)4 AnswerTypeFilter (info.ephyra.answerselection.filters.AnswerTypeFilter)4 FactoidsFromPredicatesFilter (info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter)4 PredicateExtractionFilter (info.ephyra.answerselection.filters.PredicateExtractionFilter)4 TruncationFilter (info.ephyra.answerselection.filters.TruncationFilter)4 Ontology (info.ephyra.nlp.semantics.ontologies.Ontology)4 WordNet (info.ephyra.nlp.semantics.ontologies.WordNet)4 BagOfTermsG (info.ephyra.querygeneration.generators.BagOfTermsG)4 PredicateG (info.ephyra.querygeneration.generators.PredicateG)4 QuestionInterpretationG (info.ephyra.querygeneration.generators.QuestionInterpretationG)4 QuestionReformulationG (info.ephyra.querygeneration.generators.QuestionReformulationG)4 DuplicateFilter (info.ephyra.answerselection.filters.DuplicateFilter)3 FactoidSubsetFilter (info.ephyra.answerselection.filters.FactoidSubsetFilter)3 QuestionKeywordsFilter (info.ephyra.answerselection.filters.QuestionKeywordsFilter)3 ScoreCombinationFilter (info.ephyra.answerselection.filters.ScoreCombinationFilter)3 ScoreNormalizationFilter (info.ephyra.answerselection.filters.ScoreNormalizationFilter)3 ScoreSorterFilter (info.ephyra.answerselection.filters.ScoreSorterFilter)3 StopwordFilter (info.ephyra.answerselection.filters.StopwordFilter)3 Query (info.ephyra.querygeneration.Query)3