use of info.ephyra.search.Result in project lucida by claritylab.
the class NormalizedScoreSorterFilter method apply.
/**
* Sorts the results by their normalized scores in descending order.
*
* @param results array of <code>Result</code> objects
* @return sorted array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
// switch scores and normalized scores
for (Result result : results) {
float normScore = result.getNormScore();
if (normScore == 0)
continue;
result.setNormScore(result.getScore());
result.setScore(normScore);
}
// sort by normalized scores in descending order
results = (new ScoreSorterFilter()).apply(results);
// switch scores back
for (Result result : results) {
float normScore = result.getNormScore();
if (normScore == 0)
continue;
result.setNormScore(result.getScore());
result.setScore(normScore);
}
return results;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class NounPhraseFilter method apply.
/**
* Increments the score of each result snippet according to the number of
* noun phrases it is the first to contain. This is meant to prefer snippets
* that provide new information over those that repeat information from
* previous snippets.
*
* @param results array of <code>Result</code> objects
* @return modified array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
// raw results returned by the searchers
ArrayList<Result> rawResults = new ArrayList<Result>();
HashSet<String> found = new HashSet<String>();
for (Result r : results) {
if (r.getScore() != Float.NEGATIVE_INFINITY) {
String stemmedQuestion = SnowballStemmer.stemAllTokens(r.getQuery().getAnalyzedQuestion().getQuestion());
String text = r.getAnswer();
// tokenize and tag sentence
String[] sentence = OpenNLP.tokenize(text);
String[] posTags = OpenNLP.tagPos(sentence);
String[] chunkTags = OpenNLP.tagChunks(sentence, posTags);
String np = null;
int numberOfNPs = 0;
// scan sentence for NPs
for (int i = 0; i < sentence.length; i++) {
// start of NP
if ("B-NP".equals(chunkTags[i])) {
np = sentence[i];
// NP continues
} else if ("I-NP".equals(chunkTags[i]) && (np != null)) {
np += " " + sentence[i];
// not part of a NP, remember collected NP if any
} else if (np != null) {
np = SnowballStemmer.stemAllTokens(np);
if (!found.contains(np)) {
found.add(np);
if (!StringUtils.isSubsetKeywords(np, stemmedQuestion)) {
// Result newRes = new Result(np, r.getQuery(), r.getDocID(), r.getHitPos());
// newRes.setScore(r.getScore());
// rawResults.add(newRes);
numberOfNPs++;
}
}
// if (!StringUtils.isSubsetKeywords(np, r.getQuery().getQuestion())) {
// if (resultsByNPs.containsKey(np)) {
// Result res = resultsByNPs.get(np);
// res.setScore(res.getScore() + 1);
// } else resultsByNPs.put(np, r);
// }
//
np = null;
}
}
// remember last NP if any
if (np != null) {
np = SnowballStemmer.stemAllTokens(np);
if (!found.contains(np)) {
found.add(np);
if (!StringUtils.isSubsetKeywords(np, stemmedQuestion)) {
// Result newRes = new Result(np, r.getQuery(), r.getDocID(), r.getHitPos());
// newRes.setScore(r.getScore() + 1);
// rawResults.add(newRes);
numberOfNPs++;
}
}
// if (!StringUtils.isSubsetKeywords(np, r.getQuery().getQuestion())) {
// if (resultsByNPs.containsKey(np)) {
// Result res = resultsByNPs.get(np);
// res.setScore(res.getScore() + 1);
// } else resultsByNPs.put(np, r);
// }
}
if (numberOfNPs != 0) {
// r.incScore(numberOfNPs); // 20060724_2x runs
// 20060725_0x runs
r.incScore(numberOfNPs * (((float) results.length) / ((float) sentence.length)));
rawResults.add(r);
}
}
}
return rawResults.toArray(new Result[rawResults.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class NuggetEvaluationFilter method apply.
/**
* Extracts NEs of particular types from the answer strings of the
* <code>Result</code> objects and creates a new <code>Result</code> for
* each extracted unique answer.
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
if ((results.length == 0) || (targetId == null))
return results;
this.lastTarget = results[0].getQuery().getOriginalQueryString();
int nonWhiteLength = 0;
int notifyLength = 1000;
BufferedWriter br = null;
try {
br = new BufferedWriter(new FileWriter(this.fileName, true));
br.write("===== Assessing target " + targetId + " (" + results[0].getQuery().getOriginalQueryString() + ") =====");
br.newLine();
} catch (Exception e) {
}
BufferedWriter cbr = null;
try {
cbr = new BufferedWriter(new FileWriter(this.conciseFileName, true));
cbr.write("===== Assessing target " + targetId + " (" + results[0].getQuery().getOriginalQueryString() + ") =====");
cbr.newLine();
} catch (Exception e) {
}
float maxScore = results[0].getScore();
boolean maxCutWritten = false;
HashSet<TRECNugget> covered = new LinkedHashSet<TRECNugget>();
HashMap<TRECNugget, Integer> coveredWhen = new HashMap<TRECNugget, Integer>();
int vital = 0;
int ok = 0;
for (int i = 0; i < 7; i++) {
lastVital[i] = 0;
lastOk[i] = 0;
}
for (int r = 0; r < results.length; r++) {
Result res = results[r];
boolean resWritten = false;
String[] tok = res.getAnswer().split("\\s++");
for (int t = 0; t < tok.length; t++) nonWhiteLength += tok[t].length();
// write all snippets for the first 7000 characters
if ((br != null) && (nonWhiteLength < 7000))
try {
br.write("Result " + r + " (" + res.getScore() + ") is: " + res.getAnswer());
br.newLine();
resWritten = true;
} catch (Exception e) {
}
if (nonWhiteLength > notifyLength) {
int index = ((notifyLength - 1) / 1000);
if (index < 7) {
lastVital[index] = vital;
lastOk[index] = ok;
}
if (br != null)
try {
br.write("===== " + notifyLength + " non-white char cutoff ===== ");
br.newLine();
} catch (Exception e) {
}
notifyLength += 1000;
}
if ((br != null) && !maxCutWritten && ((res.getScore() * 2) < maxScore))
try {
br.write("===== half score cutoff ===== ");
br.newLine();
maxCutWritten = true;
} catch (Exception e) {
}
int n = 0;
while (n < nuggets.size()) {
TRECNugget nug = nuggets.get(n);
String[] uncovered = covers(res.getAnswer(), nug.nugget);
if ((uncovered.length * 2) <= nug.size) {
if (br != null)
try {
if (!resWritten) {
br.write("Result " + r + " (" + res.getScore() + ") is: " + res.getAnswer());
br.newLine();
resWritten = true;
}
br.write(" Nugget covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
br.newLine();
if (uncovered.length != 0) {
br.write(" Uncovered:");
for (String u : uncovered) br.write(" " + u);
br.newLine();
}
} catch (Exception e) {
}
res.addCoveredNuggetID(nug.nuggetID);
covered.add(nug);
if ((uncovered.length * 4) <= nug.size)
nuggets.remove(n);
else
n++;
if (!coveredWhen.containsKey(nug)) {
if ("vital".equals(nug.nuggetType))
vital++;
else
ok++;
coveredWhen.put(nug, new Integer(nonWhiteLength));
}
} else {
n++;
}
}
if (resWritten && (br != null))
try {
br.newLine();
} catch (Exception e) {
}
}
if (br != null)
try {
ArrayList<TRECNugget> coveredNugs = new ArrayList<TRECNugget>(covered);
for (TRECNugget nug : coveredNugs) {
int when = -1;
if (coveredWhen.containsKey(nug))
when = coveredWhen.get(nug).intValue();
br.write(" (probably) covered (" + nug.nuggetID + "," + nug.nuggetType + ")" + ((when == -1) ? "" : (" first at " + when)) + ": " + nug.nugget);
br.newLine();
}
for (TRECNugget nug : nuggets) {
br.write(" Not (securely) covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
br.newLine();
}
br.newLine();
br.newLine();
br.flush();
br.close();
} catch (Exception e) {
}
if (cbr != null)
try {
ArrayList<TRECNugget> coveredNugs = new ArrayList<TRECNugget>(covered);
for (TRECNugget nug : coveredNugs) {
int when = -1;
if (coveredWhen.containsKey(nug))
when = coveredWhen.get(nug).intValue();
cbr.write(" (probably) covered (" + nug.nuggetID + "," + nug.nuggetType + ")" + ((when == -1) ? "" : (" first at " + when)) + ": " + nug.nugget);
cbr.newLine();
}
for (TRECNugget nug : nuggets) {
cbr.write(" Not (securely) covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
cbr.newLine();
}
cbr.newLine();
cbr.newLine();
cbr.flush();
cbr.close();
} catch (Exception e) {
}
return results;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class ScoreNormalizationFilter method preserveOrderTop.
/**
* Calculates the normalization factor of the top answer for each extraction
* technique and normalizes the scores with this factor to ensure that the
* order suggested by the original scores is preserved.
*
* @param results array of <code>Result</code> objects
* @return array of <code>Result</code> objects with new normalized scores
*/
public Result[] preserveOrderTop(Result[] results) {
// get answers by extractors
Hashtable<String, ArrayList<Result>> allExtracted = new Hashtable<String, ArrayList<Result>>();
for (Result result : results) {
// only factoid answers with 1 extraction technique
if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY || result.getExtractionTechniques() == null || result.getExtractionTechniques().length != 1)
continue;
String extractor = result.getExtractionTechniques()[0];
ArrayList<Result> extracted = allExtracted.get(extractor);
if (extracted == null) {
extracted = new ArrayList<Result>();
allExtracted.put(extractor, extracted);
}
extracted.add(result);
}
// normalize answer scores for each extractor
for (List<Result> extracted : allExtracted.values()) {
// get normalization factor of top answer
float maxScore = 0;
float maxNormScore = 0;
for (Result factoid : extracted) {
float score = factoid.getScore();
float normScore = factoid.getNormScore();
if (score > maxScore) {
maxScore = score;
maxNormScore = normScore;
}
}
double topNormFactor = maxNormScore / maxScore;
// normalize scores with average normalization factor
for (Result factoid : extracted) {
float norm = (float) (factoid.getScore() * topNormFactor);
factoid.setNormScore(norm);
}
}
return results;
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class ScoreNormalizationFilter method addMeanScoreFeature.
/**
* Adds the mean score of all factoid answers from the same extractor as a
* feature to the instance.
*/
private static void addMeanScoreFeature(MutableInstance instance, Result result, Result[] results) {
// calculate mean score
double meanScore = 0;
int numFactoid = 0;
// String extractor = result.getExtractionTechniques()[0];
for (Result r : results) if (r.getScore() > 0 && r.getScore() < Float.POSITIVE_INFINITY) {
// if (r.extractedWith(extractor)) {
meanScore += r.getScore();
numFactoid++;
// }
}
meanScore /= numFactoid;
Feature feature = new Feature(MEAN_SCORE_F);
instance.addNumeric(feature, meanScore);
}
Aggregations