use of info.ephyra.search.Result in project lucida by claritylab.
the class ScoreNormalizationFilter method preserveOrderAveraging.
/**
* Calculates the average normalization factor for each extraction technique
* and normalizes the scores with this factor to ensure that the order
* suggested by the original scores is preserved. The factor is adjusted to
* avoid normalized scores larger 1.
*
* @param results array of <code>Result</code> objects
* @return array of <code>Result</code> objects with new normalized scores
*/
public Result[] preserveOrderAveraging(Result[] results) {
// get answers by extractors
Hashtable<String, ArrayList<Result>> allExtracted = new Hashtable<String, ArrayList<Result>>();
for (Result result : results) {
// only factoid answers with 1 extraction technique
if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY || result.getExtractionTechniques() == null || result.getExtractionTechniques().length != 1)
continue;
String extractor = result.getExtractionTechniques()[0];
ArrayList<Result> extracted = allExtracted.get(extractor);
if (extracted == null) {
extracted = new ArrayList<Result>();
allExtracted.put(extractor, extracted);
}
extracted.add(result);
}
// normalize answer scores for each extractor
for (List<Result> extracted : allExtracted.values()) {
// get average normalization factor
double sumNormFactors = 0;
float maxScore = 0;
for (Result factoid : extracted) {
float score = factoid.getScore();
float normScore = factoid.getNormScore();
sumNormFactors += normScore / score;
if (score > maxScore)
maxScore = score;
}
double avgNormFactor = sumNormFactors / extracted.size();
// normalize scores with average normalization factor
for (Result factoid : extracted) {
float norm = (float) (factoid.getScore() * avgNormFactor);
factoid.setNormScore(norm);
}
}
return results;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class OverlapAnalysisFilter method apply.
/**
* Evaluates the answer candidates and updates the current overlap analysis.
*
* @param results array of <code>Result</code> objects
* @return identical array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
if (pattern == null)
return results;
HashSet<String> techsSet = new HashSet<String>();
for (Result result : results) {
// check if part of answer string matches the pattern
String answer = result.getAnswer();
for (String regex : pattern.getRegexs()) if (answer.matches(".*?" + regex + ".*+")) {
String[] techniques = result.getExtractionTechniques();
if (techniques == null || techniques.length == 0) {
techsSet.add("Passage");
} else {
for (String technique : techniques) techsSet.add(technique);
}
}
}
if (techsSet.size() == 0)
techsSet.add("None");
String[] techs = techsSet.toArray(new String[techsSet.size()]);
Arrays.sort(techs);
String key = StringUtils.concat(techs, ", ");
Integer count = overlapAnalysis.get(key);
if (count != null)
overlapAnalysis.put(key, count + 1);
else
overlapAnalysis.put(key, 1);
if (printing)
printOverlapAnalysis();
// don't do anything to the results
return results;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class PredicateExtractionFilter method apply.
/**
* Extracts relevant predicates from documents.
*
* @param results array of <code>Result</code> objects containing documents
* @return array of <code>Result</code> objects containing predicates
*/
public Result[] apply(Result[] results) {
if (results.length == 0)
return results;
ArrayList<Result> allResults = new ArrayList<Result>();
// extract relevant sentences
// - get sentences that contain relevant verbs,
// use weights of verbs as confidence scores
HashSet<Result> ssSet = new HashSet<Result>();
for (Result result : results) {
// only apply this filter to results for the semantic parsing
// approach
Query query = result.getQuery();
Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() != 0) {
allResults.add(result);
continue;
}
// get all verb forms and build patterns
Hashtable<String[], Double> verbFormsMap = getAllVerbForms(ps);
ArrayList<String> verbPatterns = new ArrayList<String>();
ArrayList<Double> verbWeights = new ArrayList<Double>();
for (String[] verbForms : verbFormsMap.keySet()) {
String verbPattern = "(?i).*?\\b(" + StringUtils.concat(verbForms, "|") + ")\\b.*+";
verbPatterns.add(verbPattern);
verbWeights.add(verbFormsMap.get(verbForms));
}
String[] paragraphs = result.getAnswer().split("\\n");
for (String p : paragraphs) {
// paragraph does not contain relevant verb?
boolean contains = false;
for (String verbPattern : verbPatterns) {
if (p.matches(verbPattern)) {
contains = true;
break;
}
}
if (!contains)
continue;
String[] sentences = LingPipe.sentDetect(p);
for (String s : sentences) {
// sentence does not contain relevant verb?
Double weight = 0d;
for (int i = 0; i < verbPatterns.size(); i++) {
if (s.matches(verbPatterns.get(i))) {
weight = verbWeights.get(i);
break;
}
}
if (weight == 0d)
continue;
// replace whitespaces by single blanks and trim
s = s.replaceAll("\\s++", " ").trim();
// create sentence-level result object
Result sentence = result.getCopy();
sentence.setAnswer(s);
sentence.setScore(weight.floatValue());
ssSet.add(sentence);
}
}
}
// - check if these sentences are relevant,
// get MAX_SENTENCES sentences with most relevant verbs
Result[] ss = ssSet.toArray(new Result[ssSet.size()]);
ss = (new ScoreSorterFilter()).apply(ss);
ArrayList<Result> ssList = new ArrayList<Result>();
for (Result s : ss) {
s.setScore(0);
if (checkSentence(s))
ssList.add(s);
// get at most MAX_SENTENCES sentences
if (ssList.size() >= MAX_SENTENCES)
break;
}
ss = ssList.toArray(new Result[ssList.size()]);
if (ss.length == 0)
return allResults.toArray(new Result[allResults.size()]);
// annotate predicates in sentences
String[] sentences = new String[ss.length];
for (int i = 0; i < ss.length; i++) sentences[i] = ss[i].getAnswer();
String[][] ass = ASSERT.annotatePredicates(sentences);
// extract predicates from annotations
for (int i = 0; i < ass.length; i++) {
Term[] terms = ss[i].getTerms();
Predicate[] questionPs = ss[i].getQuery().getAnalyzedQuestion().getPredicates();
for (int j = 0; j < ass[i].length; j++) {
// build predicate
Predicate predicate = null;
try {
predicate = new Predicate(sentences[i], ass[i][j], terms);
} catch (ParseException e) {
// System.exit(1);
continue;
}
// calculate similarity score
double simScore = 0;
Predicate simPredicate = null;
for (Predicate questionP : questionPs) // compare to predicates with missing arguments only
if (questionP.hasMissingArgs()) {
double currSimScore = predicate.simScore(questionP);
if (currSimScore > simScore) {
simScore = currSimScore;
simPredicate = questionP;
}
}
// keep predicate if it is similar to a question predicate
if (simScore > 0) {
predicate.setSimScore(simScore);
predicate.setSimPredicate(simPredicate);
Result result = ss[i].getCopy();
result.setAnswer(ass[i][j]);
result.setSentence(sentences[i]);
result.setPredicate(predicate);
allResults.add(result);
}
}
}
return allResults.toArray(new Result[allResults.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class PatternLearner method extract.
/**
* Loads target-context-answer-regex tuples from resource files, forms
* queries, fetches text passages, extracts answer patterns and writes them
* to resource files.
*
* @return <code>true</code>, iff the answer patterns could be extracted
*/
public static boolean extract() {
// load tuples and form queries
MsgPrinter.printFormingQueries();
ass = new Hashtable<String, String>();
regexs = new Hashtable<String, String>();
Query[] queries;
ArrayList<Query> queryList = new ArrayList<Query>();
queries = formQueries("res/patternlearning/interpretations");
for (Query query : queries) queryList.add(query);
queries = formQueries("res/patternlearning/interpretations_extract");
for (Query query : queries) queryList.add(query);
queries = queryList.toArray(new Query[queryList.size()]);
// fetch text passages
MsgPrinter.printFetchingPassages();
Result[] results = fetchPassages(queries);
// extract answer patterns
MsgPrinter.printExtractingPatterns();
extractPatterns(results);
// save answer patterns
MsgPrinter.printSavingPatterns();
return savePatterns("res/patternlearning/answerpatterns_extract");
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class PatternLearner method assess.
/**
* Loads target-context-answer-regex tuples and answer patterns from
* resource files, forms queries from the tuples, fetches text passages,
* assesses the answer patterns on the text passages and writes them to
* resource files.
*
* @return <code>true</code>, iff the answer patterns could be assessed
*/
public static boolean assess() {
// load answer patterns
MsgPrinter.printLoadingPatterns();
if (!loadPatterns("res/patternlearning/answerpatterns_extract"))
return false;
// load tuples and form queries
MsgPrinter.printFormingQueries();
ass = new Hashtable<String, String>();
regexs = new Hashtable<String, String>();
Query[] queries;
ArrayList<Query> queryList = new ArrayList<Query>();
queries = formQueries("res/patternlearning/interpretations");
for (Query query : queries) queryList.add(query);
queries = formQueries("res/patternlearning/interpretations_assess");
for (Query query : queries) queryList.add(query);
queries = queryList.toArray(new Query[queryList.size()]);
// fetch text passages
MsgPrinter.printFetchingPassages();
Result[] results = fetchPassages(queries);
// assess answer patterns
MsgPrinter.printAssessingPatterns();
assessPatterns(results);
// save answer patterns
MsgPrinter.printSavingPatterns();
return savePatterns("res/patternlearning/answerpatterns_assess");
}
Aggregations