use of info.ephyra.util.Dictionary in project lucida by claritylab.
the class PredicateExtractionFilter method checkSentence.
/**
* Decides if predicates should be extracted from this sentence. If the
* sentence passes the tests, NEs of the expected answer types and terms
* are extracted and added to the result.
*
* @param sentence sentence-level result
* @return <code>true</code> iff the sentence is relevant
*/
private boolean checkSentence(Result sentence) {
AnalyzedQuestion aq = sentence.getQuery().getAnalyzedQuestion();
String s = sentence.getAnswer();
// check the length of the sentence against thresholds
if (s.length() > MAX_SENT_LENGTH_CHARS)
return false;
String[] tokens = NETagger.tokenize(s);
if (tokens.length > MAX_SENT_LENGTH_TOKENS)
return false;
// // check if the sentence contains a matching verb term
// boolean match = false;
// Predicate[] questionPs = aq.getPredicates();
// String[] tokens = OpenNLP.tokenize(s);
// String[] pos = OpenNLP.tagPos(tokens);
// for (int i = 0; i < tokens.length; i++) {
// // look for verbs only
// if (!pos[i].startsWith("VB") || !pos[i].matches("[a-zA-Z]*"))
// continue;
// Term sentenceTerm = new Term(tokens[i], pos[i]);
//
// for (Predicate questionP : questionPs) {
// // compare to predicates with missing arguments only
// if (!questionP.hasMissingArgs()) continue;
// Term predicateTerm = questionP.getVerbTerm();
//
// if (predicateTerm.simScore(sentenceTerm.getLemma()) > 0) {
// match = true;
// break;
// }
// }
//
// if (match) break;
// }
// if (!match) return false;
// -> checked in apply() (performance optimized)
// check if the sentence contains NEs of the expected types
String[] answerTypes = aq.getAnswerTypes();
if (answerTypes.length != 0) {
// answer type known
boolean newNE = false;
Map<String, String[]> extracted = extractNes(s, answerTypes);
String questionNorm = StringUtils.normalize(aq.getQuestion());
for (String ne : extracted.keySet()) {
String neNorm = StringUtils.normalize(ne);
if (!StringUtils.isSubsetKeywords(neNorm, questionNorm)) {
newNE = true;
break;
}
}
// no NEs that are not in the question
if (!newNE)
return false;
sentence.setNes(extracted);
}
// check if the sentence contains a matching argument term
// - single-token terms are extracted first to avoid dictionary lookups
boolean match = false;
Term[] singleTerms = TermExtractor.getSingleTokenTerms(s);
Predicate[] questionPs = aq.getPredicates();
for (Term singleTerm : singleTerms) {
for (Predicate questionP : questionPs) {
// compare to predicates with missing arguments only
if (!questionP.hasMissingArgs())
continue;
Term[] predicateTerms = questionP.getArgTerms();
for (Term predicateTerm : predicateTerms) if (predicateTerm.simScore(singleTerm.getLemma()) > 0) {
match = true;
break;
}
if (match)
break;
}
if (match)
break;
}
if (!match)
return false;
// - multi-token terms are extracted from sentences that pass the test
Dictionary[] dicts = QuestionAnalysis.getDictionaries();
Term[] multiTerms = TermExtractor.getTerms(s, dicts);
sentence.setTerms(multiTerms);
return true;
}
use of info.ephyra.util.Dictionary in project lucida by claritylab.
the class QuestionInterpreter method lookupKeyword.
/**
* Looks up a word in the dictionary for the given PROPERTY.
*
* @param word the word to be looked up
* @param prop the PROPERTY
* @return true, iff <code>word</code> is in the dictionary for
* <code>prop</code>
*/
public static boolean lookupKeyword(String word, String prop) {
Dictionary dict = keywords.get(prop);
if (dict == null)
return false;
if (dict.contains(word))
return true;
String stem = PlingStemmer.stem(word);
if (dict.contains(stem))
return true;
String lemma = WordNet.getLemma(word, WordNet.VERB);
if (lemma != null && dict.contains(lemma))
return true;
return false;
}
Aggregations