Search in sources :

Example 1 with Dictionary

use of info.ephyra.util.Dictionary in project lucida by claritylab.

the class PredicateExtractionFilter method checkSentence.

/**
	 * Decides if predicates should be extracted from this sentence. If the
	 * sentence passes the tests, NEs of the expected answer types and terms
	 * are extracted and added to the result.
	 * 
	 * @param sentence sentence-level result
	 * @return <code>true</code> iff the sentence is relevant
	 */
private boolean checkSentence(Result sentence) {
    AnalyzedQuestion aq = sentence.getQuery().getAnalyzedQuestion();
    String s = sentence.getAnswer();
    // check the length of the sentence against thresholds
    if (s.length() > MAX_SENT_LENGTH_CHARS)
        return false;
    String[] tokens = NETagger.tokenize(s);
    if (tokens.length > MAX_SENT_LENGTH_TOKENS)
        return false;
    //		// check if the sentence contains a matching verb term
    //		boolean match = false;
    //		Predicate[] questionPs = aq.getPredicates();
    //		String[] tokens = OpenNLP.tokenize(s);
    //		String[] pos = OpenNLP.tagPos(tokens);
    //		for (int i = 0; i < tokens.length; i++) {
    //			// look for verbs only
    //			if (!pos[i].startsWith("VB") || !pos[i].matches("[a-zA-Z]*"))
    //				continue;
    //			Term sentenceTerm = new Term(tokens[i], pos[i]);
    //			
    //			for (Predicate questionP : questionPs) {
    //				// compare to predicates with missing arguments only
    //				if (!questionP.hasMissingArgs()) continue;
    //				Term predicateTerm = questionP.getVerbTerm();
    //				
    //				if (predicateTerm.simScore(sentenceTerm.getLemma()) > 0) {
    //					match = true;
    //					break;
    //				}
    //			}
    //			
    //			if (match) break;
    //		}
    //		if (!match) return false;
    //		-> checked in apply() (performance optimized)
    // check if the sentence contains NEs of the expected types
    String[] answerTypes = aq.getAnswerTypes();
    if (answerTypes.length != 0) {
        // answer type known
        boolean newNE = false;
        Map<String, String[]> extracted = extractNes(s, answerTypes);
        String questionNorm = StringUtils.normalize(aq.getQuestion());
        for (String ne : extracted.keySet()) {
            String neNorm = StringUtils.normalize(ne);
            if (!StringUtils.isSubsetKeywords(neNorm, questionNorm)) {
                newNE = true;
                break;
            }
        }
        // no NEs that are not in the question
        if (!newNE)
            return false;
        sentence.setNes(extracted);
    }
    // check if the sentence contains a matching argument term
    // - single-token terms are extracted first to avoid dictionary lookups
    boolean match = false;
    Term[] singleTerms = TermExtractor.getSingleTokenTerms(s);
    Predicate[] questionPs = aq.getPredicates();
    for (Term singleTerm : singleTerms) {
        for (Predicate questionP : questionPs) {
            // compare to predicates with missing arguments only
            if (!questionP.hasMissingArgs())
                continue;
            Term[] predicateTerms = questionP.getArgTerms();
            for (Term predicateTerm : predicateTerms) if (predicateTerm.simScore(singleTerm.getLemma()) > 0) {
                match = true;
                break;
            }
            if (match)
                break;
        }
        if (match)
            break;
    }
    if (!match)
        return false;
    // - multi-token terms are extracted from sentences that pass the test
    Dictionary[] dicts = QuestionAnalysis.getDictionaries();
    Term[] multiTerms = TermExtractor.getTerms(s, dicts);
    sentence.setTerms(multiTerms);
    return true;
}
Also used : Dictionary(info.ephyra.util.Dictionary) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 2 with Dictionary

use of info.ephyra.util.Dictionary in project lucida by claritylab.

the class QuestionInterpreter method lookupKeyword.

/**
	 * Looks up a word in the dictionary for the given PROPERTY.
	 * 
	 * @param word the word to be looked up
	 * @param prop the PROPERTY
	 * @return true, iff <code>word</code> is in the dictionary for
	 * 		   <code>prop</code>
	 */
public static boolean lookupKeyword(String word, String prop) {
    Dictionary dict = keywords.get(prop);
    if (dict == null)
        return false;
    if (dict.contains(word))
        return true;
    String stem = PlingStemmer.stem(word);
    if (dict.contains(stem))
        return true;
    String lemma = WordNet.getLemma(word, WordNet.VERB);
    if (lemma != null && dict.contains(lemma))
        return true;
    return false;
}
Also used : HashDictionary(info.ephyra.util.HashDictionary) Dictionary(info.ephyra.util.Dictionary)

Aggregations

Dictionary (info.ephyra.util.Dictionary)2 Predicate (info.ephyra.nlp.semantics.Predicate)1 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)1 Term (info.ephyra.questionanalysis.Term)1 HashDictionary (info.ephyra.util.HashDictionary)1