use of info.ephyra.search.Result in project lucida by claritylab.
the class FactoidSubsetFilter method apply.
/**
* <p>Drops results that are subsets of other results and transfers their
* scores to the remaining results.</p>
*
* @param results array of <code>Result</code> objects
* @return array of <code>Result</code> objects that are not subsets
*/
public Result[] apply(Result[] results) {
// sort results by their scores in ascending order
results = (new ReverseScoreSorterFilter()).apply(results);
// sort results by their lengths in ascending order (stable)
results = (new ResultLengthSorterFilter()).apply(results);
// normalize answer strings
String[] norms = new String[results.length];
for (int i = 0; i < results.length; i++) if (results[i].getScore() != Float.POSITIVE_INFINITY && results[i].getScore() != Float.NEGATIVE_INFINITY)
norms[i] = StringUtils.normalize(results[i].getAnswer());
// check for subset relations, aggregate answers
for (int i = 0; i < results.length - 1; i++) {
if (results[i].getScore() != Float.POSITIVE_INFINITY && results[i].getScore() != Float.NEGATIVE_INFINITY)
for (int j = results.length - 1; j > i; j--) if (results[j].getScore() != Float.POSITIVE_INFINITY && results[j].getScore() != Float.NEGATIVE_INFINITY && results[j].isNamedEntity() && !NETagger.allModelType(results[j].getNeTypes()) && StringUtils.isSubsetKeywords(norms[i], norms[j])) {
// longer answer is a NE not extracted with a
// model-based tagger
results[j].incScore(results[i].getScore());
results[i] = null;
break;
}
}
// get remaining results
ArrayList<Result> remaining = new ArrayList<Result>();
for (Result result : results) if (result != null)
remaining.add(result);
return remaining.toArray(new Result[remaining.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class HitPositionComparator method compare.
/**
* Compares its two arguments for order. Returns a negative integer, zero,
* or a positive integer as the first argument is less than, equal to, or
* greater than the second.
*
* @param o1 the first object to be compared
* @param o2 the second object to be compared
* @return a negative integer, zero, or a positive integer as the first
* argument is less than, equal to, or greater than the second
*/
public int compare(Object o1, Object o2) {
if (!(o1 instanceof Result) || !(o2 instanceof Result))
throw new ClassCastException();
Result r1 = (Result) o1;
Result r2 = (Result) o2;
return r1.getHitPos() - r2.getHitPos();
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class PatternLearner method assessPatterns.
/**
* Assesses the answer patterns on the text passages in the
* <code>Result</code> objects.
*
* @param results search results
*/
private static void assessPatterns(Result[] results) {
String regex;
for (Result result : results) {
regex = regexs.get(result.getQuery().getQueryString());
AnswerPatternFilter.assessPatterns(result, regex);
}
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class PatternLearner method extractPatterns.
/**
* Extracts answer patterns from the text passages in the search results.
*
* @param results search results
*/
private static void extractPatterns(Result[] results) {
String as;
for (Result result : results) {
as = ass.get(result.getQuery().getQueryString());
PatternExtractor.extract(result, as);
}
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class WebTermImportanceFilter method apply.
/**
* Increment the score of each result snippet for each word in it according
* to the number of top-100 web search engine snippets containing this
* particular word. This favors snippets that provide information given
* frequently and thus likely to be more important with regard to the
* target.
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
@SuppressWarnings("unchecked")
public Result[] apply(Result[] results) {
// catch empty result
if (results.length == 0)
return results;
// produce target variations
String target = results[0].getQuery().getOriginalQueryString();
System.out.println("WebTermImportanceFilter:\n processing target '" + target + "'");
HashMap<String, TermCounter> rawTermCounters = this.cacheLookup(target);
// query generation test
if (TEST_TARGET_GENERATION) {
String[] targets = this.getTargets(target);
System.out.println(" generated web serach Strings:");
for (String t : targets) System.out.println(" - " + t);
// query generation test only
return results;
// cache miss
} else if (rawTermCounters == null) {
String[] targets = this.getTargets(target);
System.out.println(" web serach Strings are");
for (String t : targets) System.out.println(" - " + t);
rawTermCounters = this.getTermCounters(targets);
this.cache(target, rawTermCounters);
}
// get target tokens
HashSet<String> rawTargetTerms = new HashSet<String>();
String[] targetTokens = OpenNLP.tokenize(target);
for (String tt : targetTokens) if (Character.isLetterOrDigit(tt.charAt(0)))
rawTargetTerms.add(tt);
// stem terms, collect target terms
//this.getTermCounters(targets);
HashMap<String, TermCounter> termCounters = new HashMap<String, TermCounter>();
HashSet<String> targetTerms = new HashSet<String>();
ArrayList<String> rawTerms = new ArrayList<String>(rawTermCounters.keySet());
for (String rawTerm : rawTerms) {
String stemmedTerm = SnowballStemmer.stem(rawTerm.toLowerCase());
if (!termCounters.containsKey(stemmedTerm))
termCounters.put(stemmedTerm, new TermCounter());
termCounters.get(stemmedTerm).increment(rawTermCounters.get(rawTerm).getValue());
if (rawTargetTerms.contains(rawTerm))
targetTerms.add(stemmedTerm);
}
// get overall recall (since 20070718)
int termCount = this.getCountSum(termCounters);
int termCountLog = ((termCount > 100) ? ((int) Math.log10(termCount)) : 2);
System.out.println("WebTermImportanceFilter: termCountLog is " + termCountLog);
// score results
ArrayList<Result> resultList = new ArrayList<Result>();
boolean goOn;
do {
goOn = false;
ArrayList<Result> rawResults = new ArrayList<Result>();
// score all results
for (Result r : results) {
if (r.getScore() != Float.NEGATIVE_INFINITY) {
// tokenize sentence
String[] sentence = NETagger.tokenize(r.getAnswer());
float importance = 0;
// scan sentence for terms from web result
for (int i = 0; i < sentence.length; i++) {
String term = sentence[i];
if ((term.length() > 1)) /* && !StringUtils.isSubsetKeywords(term, r.getQuery().getAnalyzedQuestion().getQuestion()) && !FunctionWords.lookup(term)*/
{
term = SnowballStemmer.stem(term.toLowerCase());
TermCounter count = termCounters.get(term);
if (count != null) {
// 20070706
double tf;
if (this.tfNormalizationMode == NO_NORMALIZATION)
tf = 1;
else if (this.tfNormalizationMode == LOG_LENGTH_NORMALIZATION) {
tf = WordFrequencies.lookup(sentence[i].toLowerCase());
if (tf > Math.E)
tf = Math.log(tf);
else
tf = 1;
} else if (this.tfNormalizationMode == LOG_LENGTH_NORMALIZATION) {
tf = WordFrequencies.lookup(sentence[i].toLowerCase());
if (tf > 10)
tf = Math.log10(tf);
else
tf = 1;
} else
tf = 1;
importance += (count.getValue() / tf);
}
}
}
// don't throw out 0-scored results for combining approaches
if (this.isCombined || (importance > 0)) {
if (this.normalizationMode == NO_NORMALIZATION)
r.setScore(importance);
else if (this.normalizationMode == LINEAR_LENGTH_NORMALIZATION)
// try normalized score
r.setScore(importance / sentence.length);
else if (this.normalizationMode == SQUARE_ROOT_LENGTH_NORMALIZATION)
// try normalized score
r.setScore(importance / ((float) Math.sqrt(sentence.length)));
else if (this.normalizationMode == LOG_LENGTH_NORMALIZATION)
// try normalized score
r.setScore(importance / (1 + ((float) Math.log(sentence.length))));
else if (this.normalizationMode == LOG_10_LENGTH_NORMALIZATION)
// try normalized score
r.setScore(importance / (1 + ((float) Math.log10(sentence.length))));
rawResults.add(r);
}
}
}
if (rawResults.size() != 0) {
// find top result
Collections.sort(rawResults);
Collections.reverse(rawResults);
Result top = rawResults.remove(0);
resultList.add(top);
// decrement scores of top result terms
String[] sentence = NETagger.tokenize(top.getAnswer());
for (int i = 0; i < sentence.length; i++) {
String term = SnowballStemmer.stem(sentence[i].toLowerCase());
TermCounter count = termCounters.get(term);
if (count != null) {
// 20070718
if (targetTerms.contains(term))
count.divideValue(2);
else
count.divideValue(termCountLog);
if (count.getValue() == 0)
termCounters.remove(term);
}
}
// prepare remaining results for next round
results = rawResults.toArray(new Result[rawResults.size()]);
goOn = true;
}
} while (goOn);
Collections.sort(resultList);
Collections.reverse(resultList);
// set position-dependent extra score for combining approaches
if (this.isCombined) {
float eScore = 100;
for (Result r : resultList) {
r.addExtraScore((this.getClass().getName() + this.normalizationMode), eScore);
eScore *= 0.9f;
}
}
return resultList.toArray(new Result[resultList.size()]);
}
Aggregations