Search in sources :

Example 31 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class NormalizedScoreSorterFilter method apply.

/**
	 * Sorts the results by their normalized scores in descending order.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return sorted array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    // switch scores and normalized scores
    for (Result result : results) {
        float normScore = result.getNormScore();
        if (normScore == 0)
            continue;
        result.setNormScore(result.getScore());
        result.setScore(normScore);
    }
    // sort by normalized scores in descending order
    results = (new ScoreSorterFilter()).apply(results);
    // switch scores back
    for (Result result : results) {
        float normScore = result.getNormScore();
        if (normScore == 0)
            continue;
        result.setNormScore(result.getScore());
        result.setScore(normScore);
    }
    return results;
}
Also used : Result(info.ephyra.search.Result)

Example 32 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class NounPhraseFilter method apply.

/**
	 * Increments the score of each result snippet according to the number of
	 * noun phrases it is the first to contain. This is meant to prefer snippets
	 * that provide new information over those that repeat information from
	 * previous snippets.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return modified array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    // raw results returned by the searchers
    ArrayList<Result> rawResults = new ArrayList<Result>();
    HashSet<String> found = new HashSet<String>();
    for (Result r : results) {
        if (r.getScore() != Float.NEGATIVE_INFINITY) {
            String stemmedQuestion = SnowballStemmer.stemAllTokens(r.getQuery().getAnalyzedQuestion().getQuestion());
            String text = r.getAnswer();
            //	tokenize and tag sentence
            String[] sentence = OpenNLP.tokenize(text);
            String[] posTags = OpenNLP.tagPos(sentence);
            String[] chunkTags = OpenNLP.tagChunks(sentence, posTags);
            String np = null;
            int numberOfNPs = 0;
            //	scan sentence for NPs
            for (int i = 0; i < sentence.length; i++) {
                //	start of NP
                if ("B-NP".equals(chunkTags[i])) {
                    np = sentence[i];
                //	NP continues
                } else if ("I-NP".equals(chunkTags[i]) && (np != null)) {
                    np += " " + sentence[i];
                //	not part of a NP, remember collected NP if any
                } else if (np != null) {
                    np = SnowballStemmer.stemAllTokens(np);
                    if (!found.contains(np)) {
                        found.add(np);
                        if (!StringUtils.isSubsetKeywords(np, stemmedQuestion)) {
                            //								Result newRes = new Result(np, r.getQuery(), r.getDocID(), r.getHitPos());
                            //								newRes.setScore(r.getScore());
                            //								rawResults.add(newRes);
                            numberOfNPs++;
                        }
                    }
                    //						if (!StringUtils.isSubsetKeywords(np, r.getQuery().getQuestion())) {
                    //							if (resultsByNPs.containsKey(np)) {
                    //								Result res = resultsByNPs.get(np);
                    //								res.setScore(res.getScore() + 1);
                    //							} else resultsByNPs.put(np, r);
                    //						}
                    //						
                    np = null;
                }
            }
            //	remember last NP if any
            if (np != null) {
                np = SnowballStemmer.stemAllTokens(np);
                if (!found.contains(np)) {
                    found.add(np);
                    if (!StringUtils.isSubsetKeywords(np, stemmedQuestion)) {
                        //							Result newRes = new Result(np, r.getQuery(), r.getDocID(), r.getHitPos());
                        //							newRes.setScore(r.getScore() + 1);
                        //							rawResults.add(newRes);
                        numberOfNPs++;
                    }
                }
            //					if (!StringUtils.isSubsetKeywords(np, r.getQuery().getQuestion())) {
            //						if (resultsByNPs.containsKey(np)) {
            //							Result res = resultsByNPs.get(np);
            //							res.setScore(res.getScore() + 1);
            //						} else resultsByNPs.put(np, r);
            //					}
            }
            if (numberOfNPs != 0) {
                //					r.incScore(numberOfNPs);	//	20060724_2x runs
                //	20060725_0x runs
                r.incScore(numberOfNPs * (((float) results.length) / ((float) sentence.length)));
                rawResults.add(r);
            }
        }
    }
    return rawResults.toArray(new Result[rawResults.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) HashSet(java.util.HashSet)

Example 33 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class NuggetEvaluationFilter method apply.

/**
	 * Extracts NEs of particular types from the answer strings of the
	 * <code>Result</code> objects and creates a new <code>Result</code> for
	 * each extracted unique answer.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    if ((results.length == 0) || (targetId == null))
        return results;
    this.lastTarget = results[0].getQuery().getOriginalQueryString();
    int nonWhiteLength = 0;
    int notifyLength = 1000;
    BufferedWriter br = null;
    try {
        br = new BufferedWriter(new FileWriter(this.fileName, true));
        br.write("===== Assessing target " + targetId + " (" + results[0].getQuery().getOriginalQueryString() + ") =====");
        br.newLine();
    } catch (Exception e) {
    }
    BufferedWriter cbr = null;
    try {
        cbr = new BufferedWriter(new FileWriter(this.conciseFileName, true));
        cbr.write("===== Assessing target " + targetId + " (" + results[0].getQuery().getOriginalQueryString() + ") =====");
        cbr.newLine();
    } catch (Exception e) {
    }
    float maxScore = results[0].getScore();
    boolean maxCutWritten = false;
    HashSet<TRECNugget> covered = new LinkedHashSet<TRECNugget>();
    HashMap<TRECNugget, Integer> coveredWhen = new HashMap<TRECNugget, Integer>();
    int vital = 0;
    int ok = 0;
    for (int i = 0; i < 7; i++) {
        lastVital[i] = 0;
        lastOk[i] = 0;
    }
    for (int r = 0; r < results.length; r++) {
        Result res = results[r];
        boolean resWritten = false;
        String[] tok = res.getAnswer().split("\\s++");
        for (int t = 0; t < tok.length; t++) nonWhiteLength += tok[t].length();
        //	write all snippets for the first 7000 characters
        if ((br != null) && (nonWhiteLength < 7000))
            try {
                br.write("Result " + r + " (" + res.getScore() + ") is: " + res.getAnswer());
                br.newLine();
                resWritten = true;
            } catch (Exception e) {
            }
        if (nonWhiteLength > notifyLength) {
            int index = ((notifyLength - 1) / 1000);
            if (index < 7) {
                lastVital[index] = vital;
                lastOk[index] = ok;
            }
            if (br != null)
                try {
                    br.write("===== " + notifyLength + " non-white char cutoff ===== ");
                    br.newLine();
                } catch (Exception e) {
                }
            notifyLength += 1000;
        }
        if ((br != null) && !maxCutWritten && ((res.getScore() * 2) < maxScore))
            try {
                br.write("===== half score cutoff ===== ");
                br.newLine();
                maxCutWritten = true;
            } catch (Exception e) {
            }
        int n = 0;
        while (n < nuggets.size()) {
            TRECNugget nug = nuggets.get(n);
            String[] uncovered = covers(res.getAnswer(), nug.nugget);
            if ((uncovered.length * 2) <= nug.size) {
                if (br != null)
                    try {
                        if (!resWritten) {
                            br.write("Result " + r + " (" + res.getScore() + ") is: " + res.getAnswer());
                            br.newLine();
                            resWritten = true;
                        }
                        br.write("  Nugget covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
                        br.newLine();
                        if (uncovered.length != 0) {
                            br.write("      Uncovered:");
                            for (String u : uncovered) br.write(" " + u);
                            br.newLine();
                        }
                    } catch (Exception e) {
                    }
                res.addCoveredNuggetID(nug.nuggetID);
                covered.add(nug);
                if ((uncovered.length * 4) <= nug.size)
                    nuggets.remove(n);
                else
                    n++;
                if (!coveredWhen.containsKey(nug)) {
                    if ("vital".equals(nug.nuggetType))
                        vital++;
                    else
                        ok++;
                    coveredWhen.put(nug, new Integer(nonWhiteLength));
                }
            } else {
                n++;
            }
        }
        if (resWritten && (br != null))
            try {
                br.newLine();
            } catch (Exception e) {
            }
    }
    if (br != null)
        try {
            ArrayList<TRECNugget> coveredNugs = new ArrayList<TRECNugget>(covered);
            for (TRECNugget nug : coveredNugs) {
                int when = -1;
                if (coveredWhen.containsKey(nug))
                    when = coveredWhen.get(nug).intValue();
                br.write("  (probably) covered (" + nug.nuggetID + "," + nug.nuggetType + ")" + ((when == -1) ? "" : (" first at " + when)) + ": " + nug.nugget);
                br.newLine();
            }
            for (TRECNugget nug : nuggets) {
                br.write("  Not (securely) covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
                br.newLine();
            }
            br.newLine();
            br.newLine();
            br.flush();
            br.close();
        } catch (Exception e) {
        }
    if (cbr != null)
        try {
            ArrayList<TRECNugget> coveredNugs = new ArrayList<TRECNugget>(covered);
            for (TRECNugget nug : coveredNugs) {
                int when = -1;
                if (coveredWhen.containsKey(nug))
                    when = coveredWhen.get(nug).intValue();
                cbr.write("  (probably) covered (" + nug.nuggetID + "," + nug.nuggetType + ")" + ((when == -1) ? "" : (" first at " + when)) + ": " + nug.nugget);
                cbr.newLine();
            }
            for (TRECNugget nug : nuggets) {
                cbr.write("  Not (securely) covered (" + nug.nuggetID + "," + nug.nuggetType + "): " + nug.nugget);
                cbr.newLine();
            }
            cbr.newLine();
            cbr.newLine();
            cbr.flush();
            cbr.close();
        } catch (Exception e) {
        }
    return results;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TRECNugget(info.ephyra.trec.TRECNugget) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) BufferedWriter(java.io.BufferedWriter) Result(info.ephyra.search.Result)

Example 34 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ScoreNormalizationFilter method preserveOrderTop.

/**
	 * Calculates the normalization factor of the top answer for each extraction
	 * technique and normalizes the scores with this factor to ensure that the
	 * order suggested by the original scores is preserved.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return array of <code>Result</code> objects with new normalized scores
	 */
public Result[] preserveOrderTop(Result[] results) {
    // get answers by extractors
    Hashtable<String, ArrayList<Result>> allExtracted = new Hashtable<String, ArrayList<Result>>();
    for (Result result : results) {
        // only factoid answers with 1 extraction technique
        if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY || result.getExtractionTechniques() == null || result.getExtractionTechniques().length != 1)
            continue;
        String extractor = result.getExtractionTechniques()[0];
        ArrayList<Result> extracted = allExtracted.get(extractor);
        if (extracted == null) {
            extracted = new ArrayList<Result>();
            allExtracted.put(extractor, extracted);
        }
        extracted.add(result);
    }
    // normalize answer scores for each extractor
    for (List<Result> extracted : allExtracted.values()) {
        // get 	normalization factor of top answer
        float maxScore = 0;
        float maxNormScore = 0;
        for (Result factoid : extracted) {
            float score = factoid.getScore();
            float normScore = factoid.getNormScore();
            if (score > maxScore) {
                maxScore = score;
                maxNormScore = normScore;
            }
        }
        double topNormFactor = maxNormScore / maxScore;
        // normalize scores with average normalization factor
        for (Result factoid : extracted) {
            float norm = (float) (factoid.getScore() * topNormFactor);
            factoid.setNormScore(norm);
        }
    }
    return results;
}
Also used : Hashtable(java.util.Hashtable) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 35 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ScoreNormalizationFilter method addMeanScoreFeature.

/**
	 * Adds the mean score of all factoid answers from the same extractor as a
	 * feature to the instance.
	 */
private static void addMeanScoreFeature(MutableInstance instance, Result result, Result[] results) {
    // calculate mean score
    double meanScore = 0;
    int numFactoid = 0;
    //		String extractor = result.getExtractionTechniques()[0];
    for (Result r : results) if (r.getScore() > 0 && r.getScore() < Float.POSITIVE_INFINITY) {
        //				if (r.extractedWith(extractor)) {
        meanScore += r.getScore();
        numFactoid++;
    //				}
    }
    meanScore /= numFactoid;
    Feature feature = new Feature(MEAN_SCORE_F);
    instance.addNumeric(feature, meanScore);
}
Also used : Feature(edu.cmu.minorthird.classify.Feature) Result(info.ephyra.search.Result)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2