use of info.ephyra.nlp.semantics.ontologies.Ontology in project lucida by claritylab.
the class OpenEphyra method initFactoid.
/**
* Initializes the pipeline for factoid questions.
*/
protected void initFactoid() {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
// Search.addKnowledgeMiner(new YahooKM());
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// for (String[] indriServers : IndriKM.getIndriServers())
// Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
//AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
}
use of info.ephyra.nlp.semantics.ontologies.Ontology in project lucida by claritylab.
the class OpenEphyraCorpus method initFactoidCorpus.
/**
* Initializes the pipeline for factoid questions, using a local corpus as a
* knowledge source.
*/
protected void initFactoidCorpus() {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
for (String[] indriServers : IndriKM.getIndriServers()) Search.addKnowledgeMiner(new IndriKM(indriServers, true));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
}
use of info.ephyra.nlp.semantics.ontologies.Ontology in project lucida by claritylab.
the class OpenEphyraCorpus method initFactoidWeb.
/**
* Initializes the pipeline for factoid questions, using the Web as a
* knowledge source.
*
* @param resultsCorp results retrieved from the corpus
*/
protected void initFactoidWeb(Result[] resultsCorp) {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
Search.addKnowledgeMiner(new BingKM());
// Search.addKnowledgeMiner(new GoogleKM());
// Search.addKnowledgeMiner(new YahooKM());
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new WebDocumentFetcherFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new AnswerProjectionFilter(resultsCorp));
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
AnswerSelection.addFilter(new ResultLengthFilter());
}
use of info.ephyra.nlp.semantics.ontologies.Ontology in project lucida by claritylab.
the class TermExpander method expandTerm.
/**
* Expands a term by looking up related terms in ontologies.
*
* @param term a term
* @param ps predicates in the same sentence
* @param ontologies ontologies used to expand the term
*/
public static void expandTerm(Term term, Predicate[] ps, Ontology[] ontologies) {
String text = term.getText();
String pos = term.getPos();
Map<String, Double> lemmas = new Hashtable<String, Double>();
Map<String, Double> expansions = new Hashtable<String, Double>();
// expand events, entities and modifiers
if (isTarget(term, ps) || pos.startsWith("VB")) {
// lemmatize verbs that are in WordNet
String lemma = WordNet.getLemma(text, POS.VERB);
if (lemma == null)
lemma = text;
// set lemma if the POS was misleading
if (!pos.startsWith("VB"))
term.setLemma(lemma);
// expand event
for (Ontology ontology : ontologies) {
Map<String, Double> expanded = ontology.expandEvent(lemma);
lemmas.putAll(expanded);
}
// ensure that there are at most MAX_EXPANSIONS expansions with
// weights of at least MIN_EXPANSION_WEIGHT
cutOffExpansions(lemmas, true);
// restore verb form
if (pos.equals("VBZ")) {
// third person singular
for (String exp : lemmas.keySet()) {
double weight = lemmas.get(exp);
String form = VerbFormConverter.infinitiveToThirdPersonS(exp);
expansions.put(form, weight);
}
} else if (pos.equals("VBG")) {
// gerund
for (String exp : lemmas.keySet()) {
double weight = lemmas.get(exp);
String[] forms = VerbFormConverter.infinitiveToGerund(exp);
for (String form : forms) expansions.put(form, weight);
}
} else if (pos.equals("VBD")) {
// simple past
for (String exp : lemmas.keySet()) {
double weight = lemmas.get(exp);
String[] forms = VerbFormConverter.infinitiveToSimplePast(exp);
for (String form : forms) expansions.put(form, weight);
}
} else if (pos.equals("VBN")) {
// past participle
for (String exp : lemmas.keySet()) {
double weight = lemmas.get(exp);
String[] forms = VerbFormConverter.infinitiveToPastParticiple(exp);
for (String form : forms) expansions.put(form, weight);
}
}
} else if (pos.startsWith("JJ") || pos.startsWith("RB")) {
// get modifier type
POS modType = (pos.startsWith("JJ")) ? POS.ADJECTIVE : POS.ADVERB;
// lemmatize adjectives and adverbs that are in WordNet
String lemma = WordNet.getLemma(text, modType);
if (lemma == null)
lemma = text;
// expand modifier
for (Ontology ontology : ontologies) {
Map<String, Double> expanded = ontology.expandModifier(lemma, modType);
lemmas.putAll(expanded);
}
// ensure that there are at most MAX_EXPANSIONS expansions with
// weights of at least MIN_EXPANSION_WEIGHT
cutOffExpansions(lemmas, true);
} else {
// lemmatize nouns that are in WordNet
String lemma;
if (pos.startsWith("COMPOUND"))
// compound
lemma = WordNet.getCompoundLemma(text, POS.NOUN);
else
// single token
lemma = WordNet.getLemma(text, POS.NOUN);
if (lemma == null)
lemma = text;
// expand entity
for (Ontology ontology : ontologies) {
Map<String, Double> expanded = ontology.expandEntity(lemma);
lemmas.putAll(expanded);
}
// ensure that there are at most MAX_EXPANSIONS expansions with
// weights of at least MIN_EXPANSION_WEIGHT
cutOffExpansions(lemmas, true);
// TODO restore plural forms if possible
}
term.setExpansionLemmas(lemmas);
term.setExpansions((expansions.size() > 0) ? expansions : lemmas);
}
use of info.ephyra.nlp.semantics.ontologies.Ontology in project lucida by claritylab.
the class OpenEphyraServer method initFactoid.
/**
* Initializes the pipeline for factoid questions.
*/
protected void initFactoid() {
// question analysis
Ontology wordNet = new WordNet();
// - dictionaries for term extraction
QuestionAnalysis.clearDictionaries();
QuestionAnalysis.addDictionary(wordNet);
// - ontologies for term expansion
QuestionAnalysis.clearOntologies();
QuestionAnalysis.addOntology(wordNet);
// query generation
QueryGeneration.clearQueryGenerators();
QueryGeneration.addQueryGenerator(new BagOfWordsG());
QueryGeneration.addQueryGenerator(new BagOfTermsG());
QueryGeneration.addQueryGenerator(new PredicateG());
QueryGeneration.addQueryGenerator(new QuestionInterpretationG());
QueryGeneration.addQueryGenerator(new QuestionReformulationG());
// search
// - knowledge miners for unstructured knowledge sources
Search.clearKnowledgeMiners();
for (String[] indriIndices : IndriKM.getIndriIndices()) Search.addKnowledgeMiner(new IndriKM(indriIndices, false));
// - knowledge annotators for (semi-)structured knowledge sources
Search.clearKnowledgeAnnotators();
/* Search.addKnowledgeAnnotator(new WikipediaKA("list.txt")); */
// answer extraction and selection
// (the filters are applied in this order)
AnswerSelection.clearFilters();
// - answer extraction filters
AnswerSelection.addFilter(new AnswerTypeFilter());
AnswerSelection.addFilter(new AnswerPatternFilter());
AnswerSelection.addFilter(new PredicateExtractionFilter());
AnswerSelection.addFilter(new FactoidsFromPredicatesFilter());
AnswerSelection.addFilter(new TruncationFilter());
// - answer selection filters
AnswerSelection.addFilter(new StopwordFilter());
AnswerSelection.addFilter(new QuestionKeywordsFilter());
AnswerSelection.addFilter(new ScoreNormalizationFilter(NORMALIZER));
AnswerSelection.addFilter(new ScoreCombinationFilter());
AnswerSelection.addFilter(new FactoidSubsetFilter());
AnswerSelection.addFilter(new DuplicateFilter());
AnswerSelection.addFilter(new ScoreSorterFilter());
}
Aggregations