Search in sources :

Example 6 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class Logger method logPredicates.

/**
	 * Logs the predicates in a question.
	 * 
	 * @param ps predicates
	 * @return true, iff logging was successful
	 */
public static boolean logPredicates(Predicate[] ps) {
    // logging is disabled or log file is not specified
    if (!enabled || logfile == null)
        return false;
    try {
        PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
        for (Predicate p : ps) {
            out.println("\t<predicate>");
            out.println("\t\t" + p.getAnnotated());
            out.println("\t</predicate>");
        }
        out.close();
    } catch (IOException e) {
        return false;
    }
    return true;
}
Also used : FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 7 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class FactoidsFromPredicatesFilter method apply.

/**
	 * Extracts factoids from the predicates withing the answer strings of the
	 * <code>Result</code> objects and creates a new <code>Result</code> for
	 * each extracted unique answer.
	 * 
	 * @param results array of <code>Result</code> objects containing predicates
	 * @return array of <code>Result</code> objects containing factoids
	 */
public Result[] apply(Result[] results) {
    // old results that are passed along the pipeline
    ArrayList<Result> oldResults = new ArrayList<Result>();
    // extracted factoid answers and corresponding results
    Hashtable<String, Result> factoids = new Hashtable<String, Result>();
    // extracted factoid answers and maximum weights of predicates
    Hashtable<String, Double> maxScores = new Hashtable<String, Double>();
    for (Result result : results) {
        // only apply this filter to results for the semantic parsing
        // approach
        Query query = result.getQuery();
        Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
        if (!query.extractWith(ID) || ps.length == 0 || result.getScore() != 0) {
            oldResults.add(result);
            continue;
        }
        Predicate p = result.getPredicate();
        Predicate questionP = p.getSimPredicate();
        double simScore = p.getSimScore();
        Map<String, String[]> nes = result.getNes();
        // get answer strings
        ArrayList<String> answers = new ArrayList<String>();
        if (nes != null) {
            // - allow entities in all arguments
            for (String ne : nes.keySet()) for (String arg : p.getArgs()) if (arg.contains(ne)) {
                answers.add(ne);
                break;
            }
        // - allow entities in missing arguments only
        //				for (String ne : nes.keySet())
        //					for (String missing : questionP.getMissingArgs()) {
        //						String arg = p.get(missing);
        //						if (arg != null && arg.contains(ne)) {
        //							answers.add(ne);
        //							break;
        //						}
        //					}
        } else {
            // arguments as factoid answers
            for (String missing : questionP.getMissingArgs()) {
                String arg = p.get(missing);
                if (arg != null)
                    answers.add(arg);
            }
        }
        // create result objects
        for (String answer : answers) {
            String norm = StringUtils.normalize(answer);
            Result factoid = factoids.get(norm);
            if (factoid == null) {
                // new answer
                // query, doc ID and sentence can be ambiguous
                factoid = new Result(answer, result.getQuery(), result.getDocID());
                factoid.setSentence(result.getSentence());
                factoid.addExtractionTechnique(ID);
                factoids.put(norm, factoid);
                maxScores.put(norm, simScore);
            } else if (simScore > maxScores.get(norm)) {
                // remember document ID of predicate with highest score
                factoid.setDocID(result.getDocID());
                maxScores.put(norm, simScore);
            }
            if (nes != null)
                for (String neType : nes.get(answer)) factoid.addNeType(neType);
            factoid.incScore((float) simScore);
        }
    }
    // keep old results
    Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
    Result[] allResults = new Result[oldResults.size() + newResults.length];
    oldResults.toArray(allResults);
    for (int i = 0; i < newResults.length; i++) allResults[oldResults.size() + i] = newResults[i];
    return allResults;
}
Also used : Query(info.ephyra.querygeneration.Query) Hashtable(java.util.Hashtable) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 8 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class PredicateExtractionFilter method checkSentence.

/**
	 * Decides if predicates should be extracted from this sentence. If the
	 * sentence passes the tests, NEs of the expected answer types and terms
	 * are extracted and added to the result.
	 * 
	 * @param sentence sentence-level result
	 * @return <code>true</code> iff the sentence is relevant
	 */
private boolean checkSentence(Result sentence) {
    AnalyzedQuestion aq = sentence.getQuery().getAnalyzedQuestion();
    String s = sentence.getAnswer();
    // check the length of the sentence against thresholds
    if (s.length() > MAX_SENT_LENGTH_CHARS)
        return false;
    String[] tokens = NETagger.tokenize(s);
    if (tokens.length > MAX_SENT_LENGTH_TOKENS)
        return false;
    //		// check if the sentence contains a matching verb term
    //		boolean match = false;
    //		Predicate[] questionPs = aq.getPredicates();
    //		String[] tokens = OpenNLP.tokenize(s);
    //		String[] pos = OpenNLP.tagPos(tokens);
    //		for (int i = 0; i < tokens.length; i++) {
    //			// look for verbs only
    //			if (!pos[i].startsWith("VB") || !pos[i].matches("[a-zA-Z]*"))
    //				continue;
    //			Term sentenceTerm = new Term(tokens[i], pos[i]);
    //			
    //			for (Predicate questionP : questionPs) {
    //				// compare to predicates with missing arguments only
    //				if (!questionP.hasMissingArgs()) continue;
    //				Term predicateTerm = questionP.getVerbTerm();
    //				
    //				if (predicateTerm.simScore(sentenceTerm.getLemma()) > 0) {
    //					match = true;
    //					break;
    //				}
    //			}
    //			
    //			if (match) break;
    //		}
    //		if (!match) return false;
    //		-> checked in apply() (performance optimized)
    // check if the sentence contains NEs of the expected types
    String[] answerTypes = aq.getAnswerTypes();
    if (answerTypes.length != 0) {
        // answer type known
        boolean newNE = false;
        Map<String, String[]> extracted = extractNes(s, answerTypes);
        String questionNorm = StringUtils.normalize(aq.getQuestion());
        for (String ne : extracted.keySet()) {
            String neNorm = StringUtils.normalize(ne);
            if (!StringUtils.isSubsetKeywords(neNorm, questionNorm)) {
                newNE = true;
                break;
            }
        }
        // no NEs that are not in the question
        if (!newNE)
            return false;
        sentence.setNes(extracted);
    }
    // check if the sentence contains a matching argument term
    // - single-token terms are extracted first to avoid dictionary lookups
    boolean match = false;
    Term[] singleTerms = TermExtractor.getSingleTokenTerms(s);
    Predicate[] questionPs = aq.getPredicates();
    for (Term singleTerm : singleTerms) {
        for (Predicate questionP : questionPs) {
            // compare to predicates with missing arguments only
            if (!questionP.hasMissingArgs())
                continue;
            Term[] predicateTerms = questionP.getArgTerms();
            for (Term predicateTerm : predicateTerms) if (predicateTerm.simScore(singleTerm.getLemma()) > 0) {
                match = true;
                break;
            }
            if (match)
                break;
        }
        if (match)
            break;
    }
    if (!match)
        return false;
    // - multi-token terms are extracted from sentences that pass the test
    Dictionary[] dicts = QuestionAnalysis.getDictionaries();
    Term[] multiTerms = TermExtractor.getTerms(s, dicts);
    sentence.setTerms(multiTerms);
    return true;
}
Also used : Dictionary(info.ephyra.util.Dictionary) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 9 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class PredicateExtractionFilter method getAllVerbForms.

/**
	 * Gets all forms of the verbs and expansions of predicates with missing
	 * arguments. The verb forms are associated with their weights.
	 * 
	 * @param ps predicates
	 * @return verb forms and their weights
	 */
private Hashtable<String[], Double> getAllVerbForms(Predicate[] ps) {
    Hashtable<String[], Double> allVerbForms = new Hashtable<String[], Double>();
    for (Predicate p : ps) {
        // get verbs from predicates with missing arguments only
        if (!p.hasMissingArgs())
            continue;
        // get predicate verb and expansions
        Term verbTerm = p.getVerbTerm();
        String verb = verbTerm.getText();
        Map<String, Double> expansionsMap = verbTerm.getExpansions();
        Set<String> expansions = expansionsMap.keySet();
        // get all verb forms
        String infinitive = WordNet.getLemma(verb, WordNet.VERB);
        if (infinitive == null)
            infinitive = verb;
        String[] verbForms = VerbFormConverter.getAllForms(infinitive);
        allVerbForms.put(verbForms, 1d);
        for (String expansion : expansions) {
            infinitive = WordNet.getLemma(expansion, WordNet.VERB);
            if (infinitive == null)
                infinitive = expansion;
            verbForms = VerbFormConverter.getAllForms(infinitive);
            allVerbForms.put(verbForms, expansionsMap.get(expansion));
        }
    }
    return allVerbForms;
}
Also used : Hashtable(java.util.Hashtable) Term(info.ephyra.questionanalysis.Term) Predicate(info.ephyra.nlp.semantics.Predicate)

Example 10 with Predicate

use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.

the class PredicateExtractionFilter method apply.

/**
	 * Extracts relevant predicates from documents.
	 * 
	 * @param results array of <code>Result</code> objects containing documents
	 * @return array of <code>Result</code> objects containing predicates
	 */
public Result[] apply(Result[] results) {
    if (results.length == 0)
        return results;
    ArrayList<Result> allResults = new ArrayList<Result>();
    // extract relevant sentences
    // - get sentences that contain relevant verbs,
    //   use weights of verbs as confidence scores
    HashSet<Result> ssSet = new HashSet<Result>();
    for (Result result : results) {
        // only apply this filter to results for the semantic parsing
        // approach
        Query query = result.getQuery();
        Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
        if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() != 0) {
            allResults.add(result);
            continue;
        }
        // get all verb forms and build patterns
        Hashtable<String[], Double> verbFormsMap = getAllVerbForms(ps);
        ArrayList<String> verbPatterns = new ArrayList<String>();
        ArrayList<Double> verbWeights = new ArrayList<Double>();
        for (String[] verbForms : verbFormsMap.keySet()) {
            String verbPattern = "(?i).*?\\b(" + StringUtils.concat(verbForms, "|") + ")\\b.*+";
            verbPatterns.add(verbPattern);
            verbWeights.add(verbFormsMap.get(verbForms));
        }
        String[] paragraphs = result.getAnswer().split("\\n");
        for (String p : paragraphs) {
            // paragraph does not contain relevant verb?
            boolean contains = false;
            for (String verbPattern : verbPatterns) {
                if (p.matches(verbPattern)) {
                    contains = true;
                    break;
                }
            }
            if (!contains)
                continue;
            String[] sentences = LingPipe.sentDetect(p);
            for (String s : sentences) {
                // sentence does not contain relevant verb?
                Double weight = 0d;
                for (int i = 0; i < verbPatterns.size(); i++) {
                    if (s.matches(verbPatterns.get(i))) {
                        weight = verbWeights.get(i);
                        break;
                    }
                }
                if (weight == 0d)
                    continue;
                // replace whitespaces by single blanks and trim
                s = s.replaceAll("\\s++", " ").trim();
                // create sentence-level result object
                Result sentence = result.getCopy();
                sentence.setAnswer(s);
                sentence.setScore(weight.floatValue());
                ssSet.add(sentence);
            }
        }
    }
    // - check if these sentences are relevant,
    //   get MAX_SENTENCES sentences with most relevant verbs
    Result[] ss = ssSet.toArray(new Result[ssSet.size()]);
    ss = (new ScoreSorterFilter()).apply(ss);
    ArrayList<Result> ssList = new ArrayList<Result>();
    for (Result s : ss) {
        s.setScore(0);
        if (checkSentence(s))
            ssList.add(s);
        // get at most MAX_SENTENCES sentences
        if (ssList.size() >= MAX_SENTENCES)
            break;
    }
    ss = ssList.toArray(new Result[ssList.size()]);
    if (ss.length == 0)
        return allResults.toArray(new Result[allResults.size()]);
    // annotate predicates in sentences
    String[] sentences = new String[ss.length];
    for (int i = 0; i < ss.length; i++) sentences[i] = ss[i].getAnswer();
    String[][] ass = ASSERT.annotatePredicates(sentences);
    // extract predicates from annotations
    for (int i = 0; i < ass.length; i++) {
        Term[] terms = ss[i].getTerms();
        Predicate[] questionPs = ss[i].getQuery().getAnalyzedQuestion().getPredicates();
        for (int j = 0; j < ass[i].length; j++) {
            // build predicate
            Predicate predicate = null;
            try {
                predicate = new Predicate(sentences[i], ass[i][j], terms);
            } catch (ParseException e) {
                //					System.exit(1);
                continue;
            }
            // calculate similarity score
            double simScore = 0;
            Predicate simPredicate = null;
            for (Predicate questionP : questionPs) // compare to predicates with missing arguments only
            if (questionP.hasMissingArgs()) {
                double currSimScore = predicate.simScore(questionP);
                if (currSimScore > simScore) {
                    simScore = currSimScore;
                    simPredicate = questionP;
                }
            }
            // keep predicate if it is similar to a question predicate
            if (simScore > 0) {
                predicate.setSimScore(simScore);
                predicate.setSimPredicate(simPredicate);
                Result result = ss[i].getCopy();
                result.setAnswer(ass[i][j]);
                result.setSentence(sentences[i]);
                result.setPredicate(predicate);
                allResults.add(result);
            }
        }
    }
    return allResults.toArray(new Result[allResults.size()]);
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Term(info.ephyra.questionanalysis.Term) Result(info.ephyra.search.Result) Predicate(info.ephyra.nlp.semantics.Predicate) ParseException(java.text.ParseException) HashSet(java.util.HashSet)

Aggregations

Predicate (info.ephyra.nlp.semantics.Predicate)12 Query (info.ephyra.querygeneration.Query)6 Term (info.ephyra.questionanalysis.Term)6 ArrayList (java.util.ArrayList)4 Result (info.ephyra.search.Result)3 HashSet (java.util.HashSet)3 IOException (java.io.IOException)2 ParseException (java.text.ParseException)2 Hashtable (java.util.Hashtable)2 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)1 Dictionary (info.ephyra.util.Dictionary)1 FileCache (info.ephyra.util.FileCache)1 FileOutputStream (java.io.FileOutputStream)1 PrintWriter (java.io.PrintWriter)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1