Search in sources :

Example 6 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class PredicateExtractionFilter method getAllVerbForms.

/**
	 * Gets all forms of the verbs and expansions of predicates with missing
	 * arguments. The verb forms are associated with their weights.
	 * 
	 * @param ps predicates
	 * @return verb forms and their weights
	 */
private Hashtable<String[], Double> getAllVerbForms(Predicate[] ps) {
    Hashtable<String[], Double> allVerbForms = new Hashtable<String[], Double>();
    for (Predicate p : ps) {
        // get verbs from predicates with missing arguments only
        if (!p.hasMissingArgs())
            continue;
        // get predicate verb and expansions
        Term verbTerm = p.getVerbTerm();
        String verb = verbTerm.getText();
        Map<String, Double> expansionsMap = verbTerm.getExpansions();
        Set<String> expansions = expansionsMap.keySet();
        // get all verb forms
        String infinitive = WordNet.getLemma(verb, WordNet.VERB);
        if (infinitive == null)
            infinitive = verb;
        String[] verbForms = VerbFormConverter.getAllForms(infinitive);
        allVerbForms.put(verbForms, 1d);
        for (String expansion : expansions) {
            infinitive = WordNet.getLemma(expansion, WordNet.VERB);
            if (infinitive == null)
                infinitive = expansion;
            verbForms = VerbFormConverter.getAllForms(infinitive);
            allVerbForms.put(verbForms, expansionsMap.get(expansion));
        }
    }
    return allVerbForms;
}
Also used : Hashtable(java.util.Hashtable) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 7 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class PredicateExtractionFilter method apply.

/**
	 * Extracts relevant predicates from documents.
	 * 
	 * @param results array of <code>Result</code> objects containing documents
	 * @return array of <code>Result</code> objects containing predicates
	 */
public Result[] apply(Result[] results) {
    if (results.length == 0)
        return results;
    ArrayList<Result> allResults = new ArrayList<Result>();
    // extract relevant sentences
    // - get sentences that contain relevant verbs,
    //   use weights of verbs as confidence scores
    HashSet<Result> ssSet = new HashSet<Result>();
    for (Result result : results) {
        // only apply this filter to results for the semantic parsing
        // approach
        Query query = result.getQuery();
        Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
        if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() != 0) {
            allResults.add(result);
            continue;
        }
        // get all verb forms and build patterns
        Hashtable<String[], Double> verbFormsMap = getAllVerbForms(ps);
        ArrayList<String> verbPatterns = new ArrayList<String>();
        ArrayList<Double> verbWeights = new ArrayList<Double>();
        for (String[] verbForms : verbFormsMap.keySet()) {
            String verbPattern = "(?i).*?\\b(" + StringUtils.concat(verbForms, "|") + ")\\b.*+";
            verbPatterns.add(verbPattern);
            verbWeights.add(verbFormsMap.get(verbForms));
        }
        String[] paragraphs = result.getAnswer().split("\\n");
        for (String p : paragraphs) {
            // paragraph does not contain relevant verb?
            boolean contains = false;
            for (String verbPattern : verbPatterns) {
                if (p.matches(verbPattern)) {
                    contains = true;
                    break;
                }
            }
            if (!contains)
                continue;
            String[] sentences = LingPipe.sentDetect(p);
            for (String s : sentences) {
                // sentence does not contain relevant verb?
                Double weight = 0d;
                for (int i = 0; i < verbPatterns.size(); i++) {
                    if (s.matches(verbPatterns.get(i))) {
                        weight = verbWeights.get(i);
                        break;
                    }
                }
                if (weight == 0d)
                    continue;
                // replace whitespaces by single blanks and trim
                s = s.replaceAll("\\s++", " ").trim();
                // create sentence-level result object
                Result sentence = result.getCopy();
                sentence.setAnswer(s);
                sentence.setScore(weight.floatValue());
                ssSet.add(sentence);
            }
        }
    }
    // - check if these sentences are relevant,
    //   get MAX_SENTENCES sentences with most relevant verbs
    Result[] ss = ssSet.toArray(new Result[ssSet.size()]);
    ss = (new ScoreSorterFilter()).apply(ss);
    ArrayList<Result> ssList = new ArrayList<Result>();
    for (Result s : ss) {
        s.setScore(0);
        if (checkSentence(s))
            ssList.add(s);
        // get at most MAX_SENTENCES sentences
        if (ssList.size() >= MAX_SENTENCES)
            break;
    }
    ss = ssList.toArray(new Result[ssList.size()]);
    if (ss.length == 0)
        return allResults.toArray(new Result[allResults.size()]);
    // annotate predicates in sentences
    String[] sentences = new String[ss.length];
    for (int i = 0; i < ss.length; i++) sentences[i] = ss[i].getAnswer();
    String[][] ass = ASSERT.annotatePredicates(sentences);
    // extract predicates from annotations
    for (int i = 0; i < ass.length; i++) {
        Term[] terms = ss[i].getTerms();
        Predicate[] questionPs = ss[i].getQuery().getAnalyzedQuestion().getPredicates();
        for (int j = 0; j < ass[i].length; j++) {
            // build predicate
            Predicate predicate = null;
            try {
                predicate = new Predicate(sentences[i], ass[i][j], terms);
            } catch (ParseException e) {
                //					System.exit(1);
                continue;
            }
            // calculate similarity score
            double simScore = 0;
            Predicate simPredicate = null;
            for (Predicate questionP : questionPs) // compare to predicates with missing arguments only
            if (questionP.hasMissingArgs()) {
                double currSimScore = predicate.simScore(questionP);
                if (currSimScore > simScore) {
                    simScore = currSimScore;
                    simPredicate = questionP;
                }
            }
            // keep predicate if it is similar to a question predicate
            if (simScore > 0) {
                predicate.setSimScore(simScore);
                predicate.setSimPredicate(simPredicate);
                Result result = ss[i].getCopy();
                result.setAnswer(ass[i][j]);
                result.setSentence(sentences[i]);
                result.setPredicate(predicate);
                allResults.add(result);
            }
        }
    }
    return allResults.toArray(new Result[allResults.size()]);
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Term(info.ephyra.questionanalysis.Term) Result(info.ephyra.search.Result) Predicate(info.ephyra.nlp.semantics.Predicate) ParseException(java.text.ParseException) HashSet(java.util.HashSet)

Example 8 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class BagOfTermsG method getQueryString.

/**
	 * Forms a query string from the terms and individual keywords.
	 * 
	 * @param terms terms in the question
	 * @param kws keywords in the question
	 * @return query string
	 */
private String getQueryString(Term[] terms, String[] kws) {
    ArrayList<String> phraseL = new ArrayList<String>();
    HashSet<String> normSet = new HashSet<String>();
    // get terms
    for (Term term : terms) {
        String text = term.getText();
        if (normSet.add(StringUtils.normalize(text))) {
            // add quotation marks for compound phrases
            if (text.matches(".*?\\s.*+"))
                text = "\"" + text + "\"";
            String phrase = text;
            // append expansions
            Map<String, Double> expMap = term.getExpansions();
            expMap = TermExpander.reduceExpansionsQuery(expMap, true);
            if (expMap != null && expMap.size() > 0) {
                String[] expansions = expMap.keySet().toArray(new String[expMap.size()]);
                phrase = "(" + phrase;
                for (String expansion : expansions) {
                    // add quotation marks for compound phrases
                    if (expansion.matches(".*?\\s.*+"))
                        expansion = "\"" + expansion + "\"";
                    phrase += " OR " + expansion;
                }
                phrase += ")";
            }
            phraseL.add(phrase);
        }
    }
    // get individual keywords
    // - expand keywords (not supported by Web search engines!)
    //		for (Term term : terms) {
    //			String phrase;
    //			Map<String, Double> expMap = term.getExpansions();
    //			expMap = TermExpander.reduceExpansionsQuery(expMap, true);
    //			boolean newKeyword = false;  // term/expansion contains new keyword?
    //			
    //			if (expMap.size() == 0) {
    //				String[] keywords =
    //					KeywordExtractor.getKeywords(term.getText());
    //				List<String> uniqueL = new ArrayList<String>();
    //				for (String keyword : keywords)
    //					if (normSet.add(StringUtils.normalize(keyword)))
    //						uniqueL.add(keyword);
    //				String[] unique = uniqueL.toArray(new String[uniqueL.size()]);
    //				phrase = StringUtils.concatWithSpaces(unique);
    //				if (unique.length > 0) newKeyword = true;
    //			} else {
    //				// form AND query from keywords in term
    //				String[] keywords =
    //					KeywordExtractor.getKeywords(term.getText());
    //				String and = StringUtils.concat(keywords, " AND ");
    //				if (keywords.length > 1)
    //					and = "(" + and + ")";
    //				for (String keyword : keywords)
    //					if (normSet.add(StringUtils.normalize(keyword)))
    //						newKeyword = true;
    //				
    //				phrase = and;
    //				
    //				// append expansions
    //				if (expMap != null && expMap.size() > 0) {
    //					String[] expansions =
    //						expMap.keySet().toArray(new String[expMap.size()]);
    //					phrase = "(" + phrase;
    //					for (String expansion : expansions) {
    //						// form AND query from keywords in expansion
    //						keywords = KeywordExtractor.getKeywords(expansion);
    //						and = StringUtils.concat(keywords, " AND ");
    //						if (keywords.length > 1)
    //							and = "(" + and + ")";
    //						for (String keyword : keywords)
    //							if (normSet.add(StringUtils.normalize(keyword)))
    //								newKeyword = true;
    //						
    //						phrase += " OR " + and;
    //					}
    //					phrase += ")";
    //				}
    //			}
    //			
    //			// add phrase to the query if the term or one of its expansions has
    //			// multiple tokens and thus the keyword query is different from the
    //			// term query
    //			if (newKeyword) phraseL.add(phrase);
    //		}
    // - do not expand keywords
    //		for (String kw : kws)
    //			if (normSet.add(StringUtils.normalize(kw)))
    //				phraseL.add(kw);
    // build query string
    String[] phrases = phraseL.toArray(new String[phraseL.size()]);
    String queryString = StringUtils.concatWithSpaces(phrases);
    return queryString;
}
Also used : ArrayList(java.util.ArrayList) Term(info.ephyra.questionanalysis.Term) HashSet(java.util.HashSet)

Example 9 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class BagOfWordsG method generateQueries.

/**
	 * Generates a "bag of words" query from the keywords in the question
	 * string.
	 * 
	 * @param aq analyzed question
	 * @return <code>Query</code> objects
	 */
public Query[] generateQueries(AnalyzedQuestion aq) {
    // only generate a query if the answer type is known, predicates could
    // be extracted or the question is not a factoid question
    String[] ats = aq.getAnswerTypes();
    Predicate[] ps = aq.getPredicates();
    if (ats.length == 0 && ps.length == 0 && aq.isFactoid())
        return new Query[0];
    // create query string
    Term[] terms = aq.getTerms();
    String[] kws = aq.getKeywords();
    String queryString = getQueryString(terms, kws);
    // create query, set answer types
    Query[] queries = new Query[1];
    queries[0] = new Query(queryString, aq, SCORE);
    queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
    return queries;
}
Also used : Query(info.ephyra.querygeneration.Query) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 10 with Term

use of info.ephyra.questionanalysis.Term in project lucida by claritylab.

the class Predicate method simScore.

/**
	 * Calculates a similarity score from [0,1] for this predicate and a given predicate.
	 * 
	 * @param p predicate to compare with
	 * @return similarity score
	 */
public double simScore(Predicate p) {
    // calculate similarity score for the verbs
    Term pVerbTerm = p.getVerbTerm();
    if (verbTerm == null || pVerbTerm == null)
        return 0;
    String verbTermLemma = verbTerm.getLemma();
    double verbScore = pVerbTerm.simScore(verbTermLemma);
    if (verbScore == 0)
        return 0;
    // calculate similarity score for the arguments
    // (Jaccard coefficient)
    Term[] pArgTerms = p.getArgTerms();
    if (argTerms.length == 0 || pArgTerms.length == 0)
        return 0;
    double intersect = 0;
    int union = pArgTerms.length;
    for (Term argTerm : argTerms) {
        String argTermLemma = argTerm.getLemma();
        double argTermScore = 0;
        for (Term pArgTerm : pArgTerms) argTermScore = Math.max(argTermScore, pArgTerm.simScore(argTermLemma));
        if (argTermScore > 0)
            intersect += argTermScore;
        else
            union++;
    }
    double argScore = intersect / union;
    if (argScore == 0)
        return 0;
    // predicate similarity score is the product of verb score and argument score
    simScore = verbScore * argScore;
    // remember the predicate this predicate was compared to
    simPredicate = p;
    return simScore;
}
Also used : Term(info.ephyra.questionanalysis.Term)

Aggregations

Term (info.ephyra.questionanalysis.Term)10 Predicate (info.ephyra.nlp.semantics.Predicate)6 Query (info.ephyra.querygeneration.Query)4 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)1 Result (info.ephyra.search.Result)1 Dictionary (info.ephyra.util.Dictionary)1 ParseException (java.text.ParseException)1 Hashtable (java.util.Hashtable)1