Search in sources :

Example 1 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class PredicateG method generateQueries.

/**
	 * Generates queries from predicate-argument structures extracted from the
	 * question string.
	 * 
	 * @param aq analyzed question
	 * @return <code>Query</code> objects
	 */
public Query[] generateQueries(AnalyzedQuestion aq) {
    // only generate a query if predicates could be extracted
    Predicate[] ps = aq.getPredicates();
    if (ps.length == 0)
        return new Query[0];
    // create query string
    Term[] terms = aq.getTerms();
    String[] kws = aq.getKeywords();
    String queryString = getQueryString(ps, terms, kws);
    // create query, set answer types and predicates
    Query[] queries = new Query[1];
    queries[0] = new Query(queryString, aq, SCORE);
    queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
    return queries;
}
Also used : Query(info.ephyra.querygeneration.Query) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 2 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class Predicate method setArgTerms.

/**
	 * Sets the terms in the arguments.
	 * 
	 * @param terms the terms in the sentence the predicate was extracted from
	 */
private void setArgTerms(Term[] terms) {
    List<String> tokenizedArgs = new ArrayList<String>();
    for (String arg : args) if (arg != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(arg));
    if (argLOC != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argLOC));
    if (argCAU != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argCAU));
    if (argEXT != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argEXT));
    if (argTMP != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argTMP));
    if (argDIS != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argDIS));
    if (argPNC != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argPNC));
    if (argADV != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argADV));
    if (argMNR != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argMNR));
    if (argNEG != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argNEG));
    if (argDIR != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argDIR));
    if (argMOD != null)
        tokenizedArgs.add(NETagger.tokenizeWithSpaces(argMOD));
    List<Term> argTerms = new ArrayList<Term>();
    // used to eliminate duplicate terms
    Set<String> uniqueTerms = new HashSet<String>();
    for (Term term : terms) {
        String tokenizedTerm = NETagger.tokenizeWithSpaces(term.getText());
        Pattern p = Pattern.compile("(^|\\W)" + RegexConverter.strToRegex(tokenizedTerm) + "($|\\W)");
        for (String tokenizedArg : tokenizedArgs) {
            Matcher m = p.matcher(tokenizedArg);
            if (m.find()) {
                if (uniqueTerms.add(tokenizedTerm))
                    argTerms.add(term);
                break;
            }
        }
    }
    this.argTerms = argTerms.toArray(new Term[argTerms.size()]);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) Term(info.ephyra.questionanalysis.Term) HashSet(java.util.HashSet)

Example 3 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class Predicate method setVerbTerm.

/**
	 * Sets the term for the verb.
	 * 
	 * @param terms the terms in the sentence the predicate was extracted from
	 */
private void setVerbTerm(Term[] terms) {
    String tokenizedVerb = NETagger.tokenizeWithSpaces(verb);
    Pattern p = Pattern.compile("(^|\\W)" + RegexConverter.strToRegex(tokenizedVerb) + "($|\\W)");
    for (Term term : terms) {
        String tokenizedTerm = NETagger.tokenizeWithSpaces(term.getText());
        Matcher m = p.matcher(tokenizedTerm);
        if (m.find()) {
            verbTerm = term;
            break;
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Term(info.ephyra.questionanalysis.Term)

Example 4 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class BagOfTermsG method generateQueries.

/**
	 * Generates a "bag of terms" query from the terms in the question string.
	 * 
	 * @param aq analyzed question
	 * @return <code>Query</code> objects
	 */
public Query[] generateQueries(AnalyzedQuestion aq) {
    // only generate a query if the answer type is known, predicates could
    // be extracted or the question is not a factoid question
    String[] ats = aq.getAnswerTypes();
    Predicate[] ps = aq.getPredicates();
    if (ats.length == 0 && ps.length == 0 && aq.isFactoid())
        return new Query[0];
    // create query string
    Term[] terms = aq.getTerms();
    String[] kws = aq.getKeywords();
    String queryString = getQueryString(terms, kws);
    // create query, set answer types
    Query[] queries = new Query[1];
    queries[0] = new Query(queryString, aq, SCORE);
    queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
    return queries;
}
Also used : Query(info.ephyra.querygeneration.Query) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 5 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class PredicateExtractionFilter method checkSentence.

/**
	 * Decides if predicates should be extracted from this sentence. If the
	 * sentence passes the tests, NEs of the expected answer types and terms
	 * are extracted and added to the result.
	 * 
	 * @param sentence sentence-level result
	 * @return <code>true</code> iff the sentence is relevant
	 */
private boolean checkSentence(Result sentence) {
    AnalyzedQuestion aq = sentence.getQuery().getAnalyzedQuestion();
    String s = sentence.getAnswer();
    // check the length of the sentence against thresholds
    if (s.length() > MAX_SENT_LENGTH_CHARS)
        return false;
    String[] tokens = NETagger.tokenize(s);
    if (tokens.length > MAX_SENT_LENGTH_TOKENS)
        return false;
    //		// check if the sentence contains a matching verb term
    //		boolean match = false;
    //		Predicate[] questionPs = aq.getPredicates();
    //		String[] tokens = OpenNLP.tokenize(s);
    //		String[] pos = OpenNLP.tagPos(tokens);
    //		for (int i = 0; i < tokens.length; i++) {
    //			// look for verbs only
    //			if (!pos[i].startsWith("VB") || !pos[i].matches("[a-zA-Z]*"))
    //				continue;
    //			Term sentenceTerm = new Term(tokens[i], pos[i]);
    //			
    //			for (Predicate questionP : questionPs) {
    //				// compare to predicates with missing arguments only
    //				if (!questionP.hasMissingArgs()) continue;
    //				Term predicateTerm = questionP.getVerbTerm();
    //				
    //				if (predicateTerm.simScore(sentenceTerm.getLemma()) > 0) {
    //					match = true;
    //					break;
    //				}
    //			}
    //			
    //			if (match) break;
    //		}
    //		if (!match) return false;
    //		-> checked in apply() (performance optimized)
    // check if the sentence contains NEs of the expected types
    String[] answerTypes = aq.getAnswerTypes();
    if (answerTypes.length != 0) {
        // answer type known
        boolean newNE = false;
        Map<String, String[]> extracted = extractNes(s, answerTypes);
        String questionNorm = StringUtils.normalize(aq.getQuestion());
        for (String ne : extracted.keySet()) {
            String neNorm = StringUtils.normalize(ne);
            if (!StringUtils.isSubsetKeywords(neNorm, questionNorm)) {
                newNE = true;
                break;
            }
        }
        // no NEs that are not in the question
        if (!newNE)
            return false;
        sentence.setNes(extracted);
    }
    // check if the sentence contains a matching argument term
    // - single-token terms are extracted first to avoid dictionary lookups
    boolean match = false;
    Term[] singleTerms = TermExtractor.getSingleTokenTerms(s);
    Predicate[] questionPs = aq.getPredicates();
    for (Term singleTerm : singleTerms) {
        for (Predicate questionP : questionPs) {
            // compare to predicates with missing arguments only
            if (!questionP.hasMissingArgs())
                continue;
            Term[] predicateTerms = questionP.getArgTerms();
            for (Term predicateTerm : predicateTerms) if (predicateTerm.simScore(singleTerm.getLemma()) > 0) {
                match = true;
                break;
            }
            if (match)
                break;
        }
        if (match)
            break;
    }
    if (!match)
        return false;
    // - multi-token terms are extracted from sentences that pass the test
    Dictionary[] dicts = QuestionAnalysis.getDictionaries();
    Term[] multiTerms = TermExtractor.getTerms(s, dicts);
    sentence.setTerms(multiTerms);
    return true;
}
Also used : Dictionary(info.ephyra.util.Dictionary) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Aggregations

Term (info.ephyra.questionanalysis.Term)10 Predicate (info.ephyra.nlp.semantics.Predicate)6 Query (info.ephyra.querygeneration.Query)4 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)1 Result (info.ephyra.search.Result)1 Dictionary (info.ephyra.util.Dictionary)1 ParseException (java.text.ParseException)1 Hashtable (java.util.Hashtable)1