use of info.ephyra.questionanalysis.Term in project lucida by claritylab.
the class PredicateG method generateQueries.
/**
* Generates queries from predicate-argument structures extracted from the
* question string.
*
* @param aq analyzed question
* @return <code>Query</code> objects
*/
public Query[] generateQueries(AnalyzedQuestion aq) {
// only generate a query if predicates could be extracted
Predicate[] ps = aq.getPredicates();
if (ps.length == 0)
return new Query[0];
// create query string
Term[] terms = aq.getTerms();
String[] kws = aq.getKeywords();
String queryString = getQueryString(ps, terms, kws);
// create query, set answer types and predicates
Query[] queries = new Query[1];
queries[0] = new Query(queryString, aq, SCORE);
queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
return queries;
}
use of info.ephyra.questionanalysis.Term in project lucida by claritylab.
the class Predicate method setArgTerms.
/**
* Sets the terms in the arguments.
*
* @param terms the terms in the sentence the predicate was extracted from
*/
private void setArgTerms(Term[] terms) {
List<String> tokenizedArgs = new ArrayList<String>();
for (String arg : args) if (arg != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(arg));
if (argLOC != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argLOC));
if (argCAU != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argCAU));
if (argEXT != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argEXT));
if (argTMP != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argTMP));
if (argDIS != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argDIS));
if (argPNC != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argPNC));
if (argADV != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argADV));
if (argMNR != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argMNR));
if (argNEG != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argNEG));
if (argDIR != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argDIR));
if (argMOD != null)
tokenizedArgs.add(NETagger.tokenizeWithSpaces(argMOD));
List<Term> argTerms = new ArrayList<Term>();
// used to eliminate duplicate terms
Set<String> uniqueTerms = new HashSet<String>();
for (Term term : terms) {
String tokenizedTerm = NETagger.tokenizeWithSpaces(term.getText());
Pattern p = Pattern.compile("(^|\\W)" + RegexConverter.strToRegex(tokenizedTerm) + "($|\\W)");
for (String tokenizedArg : tokenizedArgs) {
Matcher m = p.matcher(tokenizedArg);
if (m.find()) {
if (uniqueTerms.add(tokenizedTerm))
argTerms.add(term);
break;
}
}
}
this.argTerms = argTerms.toArray(new Term[argTerms.size()]);
}
use of info.ephyra.questionanalysis.Term in project lucida by claritylab.
the class Predicate method setVerbTerm.
/**
* Sets the term for the verb.
*
* @param terms the terms in the sentence the predicate was extracted from
*/
private void setVerbTerm(Term[] terms) {
String tokenizedVerb = NETagger.tokenizeWithSpaces(verb);
Pattern p = Pattern.compile("(^|\\W)" + RegexConverter.strToRegex(tokenizedVerb) + "($|\\W)");
for (Term term : terms) {
String tokenizedTerm = NETagger.tokenizeWithSpaces(term.getText());
Matcher m = p.matcher(tokenizedTerm);
if (m.find()) {
verbTerm = term;
break;
}
}
}
use of info.ephyra.questionanalysis.Term in project lucida by claritylab.
the class BagOfTermsG method generateQueries.
/**
* Generates a "bag of terms" query from the terms in the question string.
*
* @param aq analyzed question
* @return <code>Query</code> objects
*/
public Query[] generateQueries(AnalyzedQuestion aq) {
// only generate a query if the answer type is known, predicates could
// be extracted or the question is not a factoid question
String[] ats = aq.getAnswerTypes();
Predicate[] ps = aq.getPredicates();
if (ats.length == 0 && ps.length == 0 && aq.isFactoid())
return new Query[0];
// create query string
Term[] terms = aq.getTerms();
String[] kws = aq.getKeywords();
String queryString = getQueryString(terms, kws);
// create query, set answer types
Query[] queries = new Query[1];
queries[0] = new Query(queryString, aq, SCORE);
queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES);
return queries;
}
use of info.ephyra.questionanalysis.Term in project lucida by claritylab.
the class PredicateExtractionFilter method checkSentence.
/**
* Decides if predicates should be extracted from this sentence. If the
* sentence passes the tests, NEs of the expected answer types and terms
* are extracted and added to the result.
*
* @param sentence sentence-level result
* @return <code>true</code> iff the sentence is relevant
*/
private boolean checkSentence(Result sentence) {
AnalyzedQuestion aq = sentence.getQuery().getAnalyzedQuestion();
String s = sentence.getAnswer();
// check the length of the sentence against thresholds
if (s.length() > MAX_SENT_LENGTH_CHARS)
return false;
String[] tokens = NETagger.tokenize(s);
if (tokens.length > MAX_SENT_LENGTH_TOKENS)
return false;
// // check if the sentence contains a matching verb term
// boolean match = false;
// Predicate[] questionPs = aq.getPredicates();
// String[] tokens = OpenNLP.tokenize(s);
// String[] pos = OpenNLP.tagPos(tokens);
// for (int i = 0; i < tokens.length; i++) {
// // look for verbs only
// if (!pos[i].startsWith("VB") || !pos[i].matches("[a-zA-Z]*"))
// continue;
// Term sentenceTerm = new Term(tokens[i], pos[i]);
//
// for (Predicate questionP : questionPs) {
// // compare to predicates with missing arguments only
// if (!questionP.hasMissingArgs()) continue;
// Term predicateTerm = questionP.getVerbTerm();
//
// if (predicateTerm.simScore(sentenceTerm.getLemma()) > 0) {
// match = true;
// break;
// }
// }
//
// if (match) break;
// }
// if (!match) return false;
// -> checked in apply() (performance optimized)
// check if the sentence contains NEs of the expected types
String[] answerTypes = aq.getAnswerTypes();
if (answerTypes.length != 0) {
// answer type known
boolean newNE = false;
Map<String, String[]> extracted = extractNes(s, answerTypes);
String questionNorm = StringUtils.normalize(aq.getQuestion());
for (String ne : extracted.keySet()) {
String neNorm = StringUtils.normalize(ne);
if (!StringUtils.isSubsetKeywords(neNorm, questionNorm)) {
newNE = true;
break;
}
}
// no NEs that are not in the question
if (!newNE)
return false;
sentence.setNes(extracted);
}
// check if the sentence contains a matching argument term
// - single-token terms are extracted first to avoid dictionary lookups
boolean match = false;
Term[] singleTerms = TermExtractor.getSingleTokenTerms(s);
Predicate[] questionPs = aq.getPredicates();
for (Term singleTerm : singleTerms) {
for (Predicate questionP : questionPs) {
// compare to predicates with missing arguments only
if (!questionP.hasMissingArgs())
continue;
Term[] predicateTerms = questionP.getArgTerms();
for (Term predicateTerm : predicateTerms) if (predicateTerm.simScore(singleTerm.getLemma()) > 0) {
match = true;
break;
}
if (match)
break;
}
if (match)
break;
}
if (!match)
return false;
// - multi-token terms are extracted from sentences that pass the test
Dictionary[] dicts = QuestionAnalysis.getDictionaries();
Term[] multiTerms = TermExtractor.getTerms(s, dicts);
sentence.setTerms(multiTerms);
return true;
}
Aggregations