use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.
the class Logger method logPredicates.
/**
* Logs the predicates in a question.
*
* @param ps predicates
* @return true, iff logging was successful
*/
public static boolean logPredicates(Predicate[] ps) {
// logging is disabled or log file is not specified
if (!enabled || logfile == null)
return false;
try {
PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
for (Predicate p : ps) {
out.println("\t<predicate>");
out.println("\t\t" + p.getAnnotated());
out.println("\t</predicate>");
}
out.close();
} catch (IOException e) {
return false;
}
return true;
}
use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.
the class FactoidsFromPredicatesFilter method apply.
/**
* Extracts factoids from the predicates withing the answer strings of the
* <code>Result</code> objects and creates a new <code>Result</code> for
* each extracted unique answer.
*
* @param results array of <code>Result</code> objects containing predicates
* @return array of <code>Result</code> objects containing factoids
*/
public Result[] apply(Result[] results) {
// old results that are passed along the pipeline
ArrayList<Result> oldResults = new ArrayList<Result>();
// extracted factoid answers and corresponding results
Hashtable<String, Result> factoids = new Hashtable<String, Result>();
// extracted factoid answers and maximum weights of predicates
Hashtable<String, Double> maxScores = new Hashtable<String, Double>();
for (Result result : results) {
// only apply this filter to results for the semantic parsing
// approach
Query query = result.getQuery();
Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
if (!query.extractWith(ID) || ps.length == 0 || result.getScore() != 0) {
oldResults.add(result);
continue;
}
Predicate p = result.getPredicate();
Predicate questionP = p.getSimPredicate();
double simScore = p.getSimScore();
Map<String, String[]> nes = result.getNes();
// get answer strings
ArrayList<String> answers = new ArrayList<String>();
if (nes != null) {
// - allow entities in all arguments
for (String ne : nes.keySet()) for (String arg : p.getArgs()) if (arg.contains(ne)) {
answers.add(ne);
break;
}
// - allow entities in missing arguments only
// for (String ne : nes.keySet())
// for (String missing : questionP.getMissingArgs()) {
// String arg = p.get(missing);
// if (arg != null && arg.contains(ne)) {
// answers.add(ne);
// break;
// }
// }
} else {
// arguments as factoid answers
for (String missing : questionP.getMissingArgs()) {
String arg = p.get(missing);
if (arg != null)
answers.add(arg);
}
}
// create result objects
for (String answer : answers) {
String norm = StringUtils.normalize(answer);
Result factoid = factoids.get(norm);
if (factoid == null) {
// new answer
// query, doc ID and sentence can be ambiguous
factoid = new Result(answer, result.getQuery(), result.getDocID());
factoid.setSentence(result.getSentence());
factoid.addExtractionTechnique(ID);
factoids.put(norm, factoid);
maxScores.put(norm, simScore);
} else if (simScore > maxScores.get(norm)) {
// remember document ID of predicate with highest score
factoid.setDocID(result.getDocID());
maxScores.put(norm, simScore);
}
if (nes != null)
for (String neType : nes.get(answer)) factoid.addNeType(neType);
factoid.incScore((float) simScore);
}
}
// keep old results
Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
Result[] allResults = new Result[oldResults.size() + newResults.length];
oldResults.toArray(allResults);
for (int i = 0; i < newResults.length; i++) allResults[oldResults.size() + i] = newResults[i];
return allResults;
}
use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.
the class PredicateExtractionFilter method checkSentence.
/**
* Decides if predicates should be extracted from this sentence. If the
* sentence passes the tests, NEs of the expected answer types and terms
* are extracted and added to the result.
*
* @param sentence sentence-level result
* @return <code>true</code> iff the sentence is relevant
*/
private boolean checkSentence(Result sentence) {
AnalyzedQuestion aq = sentence.getQuery().getAnalyzedQuestion();
String s = sentence.getAnswer();
// check the length of the sentence against thresholds
if (s.length() > MAX_SENT_LENGTH_CHARS)
return false;
String[] tokens = NETagger.tokenize(s);
if (tokens.length > MAX_SENT_LENGTH_TOKENS)
return false;
// // check if the sentence contains a matching verb term
// boolean match = false;
// Predicate[] questionPs = aq.getPredicates();
// String[] tokens = OpenNLP.tokenize(s);
// String[] pos = OpenNLP.tagPos(tokens);
// for (int i = 0; i < tokens.length; i++) {
// // look for verbs only
// if (!pos[i].startsWith("VB") || !pos[i].matches("[a-zA-Z]*"))
// continue;
// Term sentenceTerm = new Term(tokens[i], pos[i]);
//
// for (Predicate questionP : questionPs) {
// // compare to predicates with missing arguments only
// if (!questionP.hasMissingArgs()) continue;
// Term predicateTerm = questionP.getVerbTerm();
//
// if (predicateTerm.simScore(sentenceTerm.getLemma()) > 0) {
// match = true;
// break;
// }
// }
//
// if (match) break;
// }
// if (!match) return false;
// -> checked in apply() (performance optimized)
// check if the sentence contains NEs of the expected types
String[] answerTypes = aq.getAnswerTypes();
if (answerTypes.length != 0) {
// answer type known
boolean newNE = false;
Map<String, String[]> extracted = extractNes(s, answerTypes);
String questionNorm = StringUtils.normalize(aq.getQuestion());
for (String ne : extracted.keySet()) {
String neNorm = StringUtils.normalize(ne);
if (!StringUtils.isSubsetKeywords(neNorm, questionNorm)) {
newNE = true;
break;
}
}
// no NEs that are not in the question
if (!newNE)
return false;
sentence.setNes(extracted);
}
// check if the sentence contains a matching argument term
// - single-token terms are extracted first to avoid dictionary lookups
boolean match = false;
Term[] singleTerms = TermExtractor.getSingleTokenTerms(s);
Predicate[] questionPs = aq.getPredicates();
for (Term singleTerm : singleTerms) {
for (Predicate questionP : questionPs) {
// compare to predicates with missing arguments only
if (!questionP.hasMissingArgs())
continue;
Term[] predicateTerms = questionP.getArgTerms();
for (Term predicateTerm : predicateTerms) if (predicateTerm.simScore(singleTerm.getLemma()) > 0) {
match = true;
break;
}
if (match)
break;
}
if (match)
break;
}
if (!match)
return false;
// - multi-token terms are extracted from sentences that pass the test
Dictionary[] dicts = QuestionAnalysis.getDictionaries();
Term[] multiTerms = TermExtractor.getTerms(s, dicts);
sentence.setTerms(multiTerms);
return true;
}
use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.
the class PredicateExtractionFilter method getAllVerbForms.
/**
* Gets all forms of the verbs and expansions of predicates with missing
* arguments. The verb forms are associated with their weights.
*
* @param ps predicates
* @return verb forms and their weights
*/
private Hashtable<String[], Double> getAllVerbForms(Predicate[] ps) {
Hashtable<String[], Double> allVerbForms = new Hashtable<String[], Double>();
for (Predicate p : ps) {
// get verbs from predicates with missing arguments only
if (!p.hasMissingArgs())
continue;
// get predicate verb and expansions
Term verbTerm = p.getVerbTerm();
String verb = verbTerm.getText();
Map<String, Double> expansionsMap = verbTerm.getExpansions();
Set<String> expansions = expansionsMap.keySet();
// get all verb forms
String infinitive = WordNet.getLemma(verb, WordNet.VERB);
if (infinitive == null)
infinitive = verb;
String[] verbForms = VerbFormConverter.getAllForms(infinitive);
allVerbForms.put(verbForms, 1d);
for (String expansion : expansions) {
infinitive = WordNet.getLemma(expansion, WordNet.VERB);
if (infinitive == null)
infinitive = expansion;
verbForms = VerbFormConverter.getAllForms(infinitive);
allVerbForms.put(verbForms, expansionsMap.get(expansion));
}
}
return allVerbForms;
}
use of info.ephyra.nlp.semantics.Predicate in project lucida by claritylab.
the class PredicateExtractionFilter method apply.
/**
* Extracts relevant predicates from documents.
*
* @param results array of <code>Result</code> objects containing documents
* @return array of <code>Result</code> objects containing predicates
*/
public Result[] apply(Result[] results) {
if (results.length == 0)
return results;
ArrayList<Result> allResults = new ArrayList<Result>();
// extract relevant sentences
// - get sentences that contain relevant verbs,
// use weights of verbs as confidence scores
HashSet<Result> ssSet = new HashSet<Result>();
for (Result result : results) {
// only apply this filter to results for the semantic parsing
// approach
Query query = result.getQuery();
Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() != 0) {
allResults.add(result);
continue;
}
// get all verb forms and build patterns
Hashtable<String[], Double> verbFormsMap = getAllVerbForms(ps);
ArrayList<String> verbPatterns = new ArrayList<String>();
ArrayList<Double> verbWeights = new ArrayList<Double>();
for (String[] verbForms : verbFormsMap.keySet()) {
String verbPattern = "(?i).*?\\b(" + StringUtils.concat(verbForms, "|") + ")\\b.*+";
verbPatterns.add(verbPattern);
verbWeights.add(verbFormsMap.get(verbForms));
}
String[] paragraphs = result.getAnswer().split("\\n");
for (String p : paragraphs) {
// paragraph does not contain relevant verb?
boolean contains = false;
for (String verbPattern : verbPatterns) {
if (p.matches(verbPattern)) {
contains = true;
break;
}
}
if (!contains)
continue;
String[] sentences = LingPipe.sentDetect(p);
for (String s : sentences) {
// sentence does not contain relevant verb?
Double weight = 0d;
for (int i = 0; i < verbPatterns.size(); i++) {
if (s.matches(verbPatterns.get(i))) {
weight = verbWeights.get(i);
break;
}
}
if (weight == 0d)
continue;
// replace whitespaces by single blanks and trim
s = s.replaceAll("\\s++", " ").trim();
// create sentence-level result object
Result sentence = result.getCopy();
sentence.setAnswer(s);
sentence.setScore(weight.floatValue());
ssSet.add(sentence);
}
}
}
// - check if these sentences are relevant,
// get MAX_SENTENCES sentences with most relevant verbs
Result[] ss = ssSet.toArray(new Result[ssSet.size()]);
ss = (new ScoreSorterFilter()).apply(ss);
ArrayList<Result> ssList = new ArrayList<Result>();
for (Result s : ss) {
s.setScore(0);
if (checkSentence(s))
ssList.add(s);
// get at most MAX_SENTENCES sentences
if (ssList.size() >= MAX_SENTENCES)
break;
}
ss = ssList.toArray(new Result[ssList.size()]);
if (ss.length == 0)
return allResults.toArray(new Result[allResults.size()]);
// annotate predicates in sentences
String[] sentences = new String[ss.length];
for (int i = 0; i < ss.length; i++) sentences[i] = ss[i].getAnswer();
String[][] ass = ASSERT.annotatePredicates(sentences);
// extract predicates from annotations
for (int i = 0; i < ass.length; i++) {
Term[] terms = ss[i].getTerms();
Predicate[] questionPs = ss[i].getQuery().getAnalyzedQuestion().getPredicates();
for (int j = 0; j < ass[i].length; j++) {
// build predicate
Predicate predicate = null;
try {
predicate = new Predicate(sentences[i], ass[i][j], terms);
} catch (ParseException e) {
// System.exit(1);
continue;
}
// calculate similarity score
double simScore = 0;
Predicate simPredicate = null;
for (Predicate questionP : questionPs) // compare to predicates with missing arguments only
if (questionP.hasMissingArgs()) {
double currSimScore = predicate.simScore(questionP);
if (currSimScore > simScore) {
simScore = currSimScore;
simPredicate = questionP;
}
}
// keep predicate if it is similar to a question predicate
if (simScore > 0) {
predicate.setSimScore(simScore);
predicate.setSimPredicate(simPredicate);
Result result = ss[i].getCopy();
result.setAnswer(ass[i][j]);
result.setSentence(sentences[i]);
result.setPredicate(predicate);
allResults.add(result);
}
}
}
return allResults.toArray(new Result[allResults.size()]);
}
Aggregations