Search in sources :

Example 16 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class FactoidSubsetFilter method apply.

/**
	 * <p>Drops results that are subsets of other results and transfers their
	 * scores to the remaining results.</p>
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return array of <code>Result</code> objects that are not subsets
	 */
public Result[] apply(Result[] results) {
    // sort results by their scores in ascending order
    results = (new ReverseScoreSorterFilter()).apply(results);
    // sort results by their lengths in ascending order (stable)
    results = (new ResultLengthSorterFilter()).apply(results);
    // normalize answer strings
    String[] norms = new String[results.length];
    for (int i = 0; i < results.length; i++) if (results[i].getScore() != Float.POSITIVE_INFINITY && results[i].getScore() != Float.NEGATIVE_INFINITY)
        norms[i] = StringUtils.normalize(results[i].getAnswer());
    // check for subset relations, aggregate answers
    for (int i = 0; i < results.length - 1; i++) {
        if (results[i].getScore() != Float.POSITIVE_INFINITY && results[i].getScore() != Float.NEGATIVE_INFINITY)
            for (int j = results.length - 1; j > i; j--) if (results[j].getScore() != Float.POSITIVE_INFINITY && results[j].getScore() != Float.NEGATIVE_INFINITY && results[j].isNamedEntity() && !NETagger.allModelType(results[j].getNeTypes()) && StringUtils.isSubsetKeywords(norms[i], norms[j])) {
                // longer answer is a NE not extracted with a
                // model-based tagger
                results[j].incScore(results[i].getScore());
                results[i] = null;
                break;
            }
    }
    // get remaining results
    ArrayList<Result> remaining = new ArrayList<Result>();
    for (Result result : results) if (result != null)
        remaining.add(result);
    return remaining.toArray(new Result[remaining.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 17 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class HitPositionComparator method compare.

/**
	 * Compares its two arguments for order. Returns a negative integer, zero,
	 * or a positive integer as the first argument is less than, equal to, or
	 * greater than the second.
	 * 
	 * @param o1 the first object to be compared
	 * @param o2 the second object to be compared
	 * @return a negative integer, zero, or a positive integer as the first
	 *         argument is less than, equal to, or greater than the second
	 */
public int compare(Object o1, Object o2) {
    if (!(o1 instanceof Result) || !(o2 instanceof Result))
        throw new ClassCastException();
    Result r1 = (Result) o1;
    Result r2 = (Result) o2;
    return r1.getHitPos() - r2.getHitPos();
}
Also used : Result(info.ephyra.search.Result)

Example 18 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class PatternLearner method assessPatterns.

/**
	 * Assesses the answer patterns on the text passages in the
	 * <code>Result</code> objects.
	 * 
	 * @param results search results
	 */
private static void assessPatterns(Result[] results) {
    String regex;
    for (Result result : results) {
        regex = regexs.get(result.getQuery().getQueryString());
        AnswerPatternFilter.assessPatterns(result, regex);
    }
}
Also used : Result(info.ephyra.search.Result)

Example 19 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class PatternLearner method extractPatterns.

/**
	 * Extracts answer patterns from the text passages in the search results.
	 * 
	 * @param results search results
	 */
private static void extractPatterns(Result[] results) {
    String as;
    for (Result result : results) {
        as = ass.get(result.getQuery().getQueryString());
        PatternExtractor.extract(result, as);
    }
}
Also used : Result(info.ephyra.search.Result)

Example 20 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class WebTermImportanceFilter method apply.

/**
	 * Increment the score of each result snippet for each word in it according
	 * to the number of top-100 web search engine snippets containing this
	 * particular word. This favors snippets that provide information given
	 * frequently and thus likely to be more important with regard to the
	 * target.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
@SuppressWarnings("unchecked")
public Result[] apply(Result[] results) {
    //	catch empty result 
    if (results.length == 0)
        return results;
    //	produce target variations
    String target = results[0].getQuery().getOriginalQueryString();
    System.out.println("WebTermImportanceFilter:\n processing target '" + target + "'");
    HashMap<String, TermCounter> rawTermCounters = this.cacheLookup(target);
    //	query generation test
    if (TEST_TARGET_GENERATION) {
        String[] targets = this.getTargets(target);
        System.out.println(" generated web serach Strings:");
        for (String t : targets) System.out.println(" - " + t);
        //	query generation test only
        return results;
    //	cache miss
    } else if (rawTermCounters == null) {
        String[] targets = this.getTargets(target);
        System.out.println(" web serach Strings are");
        for (String t : targets) System.out.println(" - " + t);
        rawTermCounters = this.getTermCounters(targets);
        this.cache(target, rawTermCounters);
    }
    //	get target tokens
    HashSet<String> rawTargetTerms = new HashSet<String>();
    String[] targetTokens = OpenNLP.tokenize(target);
    for (String tt : targetTokens) if (Character.isLetterOrDigit(tt.charAt(0)))
        rawTargetTerms.add(tt);
    //	stem terms, collect target terms
    //this.getTermCounters(targets);
    HashMap<String, TermCounter> termCounters = new HashMap<String, TermCounter>();
    HashSet<String> targetTerms = new HashSet<String>();
    ArrayList<String> rawTerms = new ArrayList<String>(rawTermCounters.keySet());
    for (String rawTerm : rawTerms) {
        String stemmedTerm = SnowballStemmer.stem(rawTerm.toLowerCase());
        if (!termCounters.containsKey(stemmedTerm))
            termCounters.put(stemmedTerm, new TermCounter());
        termCounters.get(stemmedTerm).increment(rawTermCounters.get(rawTerm).getValue());
        if (rawTargetTerms.contains(rawTerm))
            targetTerms.add(stemmedTerm);
    }
    //	get overall recall (since 20070718)
    int termCount = this.getCountSum(termCounters);
    int termCountLog = ((termCount > 100) ? ((int) Math.log10(termCount)) : 2);
    System.out.println("WebTermImportanceFilter: termCountLog is " + termCountLog);
    //	score results
    ArrayList<Result> resultList = new ArrayList<Result>();
    boolean goOn;
    do {
        goOn = false;
        ArrayList<Result> rawResults = new ArrayList<Result>();
        //	score all results
        for (Result r : results) {
            if (r.getScore() != Float.NEGATIVE_INFINITY) {
                //	tokenize sentence
                String[] sentence = NETagger.tokenize(r.getAnswer());
                float importance = 0;
                //	scan sentence for terms from web result
                for (int i = 0; i < sentence.length; i++) {
                    String term = sentence[i];
                    if ((term.length() > 1)) /* && !StringUtils.isSubsetKeywords(term, r.getQuery().getAnalyzedQuestion().getQuestion()) && !FunctionWords.lookup(term)*/
                    {
                        term = SnowballStemmer.stem(term.toLowerCase());
                        TermCounter count = termCounters.get(term);
                        if (count != null) {
                            // 20070706
                            double tf;
                            if (this.tfNormalizationMode == NO_NORMALIZATION)
                                tf = 1;
                            else if (this.tfNormalizationMode == LOG_LENGTH_NORMALIZATION) {
                                tf = WordFrequencies.lookup(sentence[i].toLowerCase());
                                if (tf > Math.E)
                                    tf = Math.log(tf);
                                else
                                    tf = 1;
                            } else if (this.tfNormalizationMode == LOG_LENGTH_NORMALIZATION) {
                                tf = WordFrequencies.lookup(sentence[i].toLowerCase());
                                if (tf > 10)
                                    tf = Math.log10(tf);
                                else
                                    tf = 1;
                            } else
                                tf = 1;
                            importance += (count.getValue() / tf);
                        }
                    }
                }
                //	don't throw out 0-scored results for combining approaches
                if (this.isCombined || (importance > 0)) {
                    if (this.normalizationMode == NO_NORMALIZATION)
                        r.setScore(importance);
                    else if (this.normalizationMode == LINEAR_LENGTH_NORMALIZATION)
                        // try normalized score
                        r.setScore(importance / sentence.length);
                    else if (this.normalizationMode == SQUARE_ROOT_LENGTH_NORMALIZATION)
                        // try normalized score
                        r.setScore(importance / ((float) Math.sqrt(sentence.length)));
                    else if (this.normalizationMode == LOG_LENGTH_NORMALIZATION)
                        // try normalized score
                        r.setScore(importance / (1 + ((float) Math.log(sentence.length))));
                    else if (this.normalizationMode == LOG_10_LENGTH_NORMALIZATION)
                        // try normalized score
                        r.setScore(importance / (1 + ((float) Math.log10(sentence.length))));
                    rawResults.add(r);
                }
            }
        }
        if (rawResults.size() != 0) {
            //	find top result
            Collections.sort(rawResults);
            Collections.reverse(rawResults);
            Result top = rawResults.remove(0);
            resultList.add(top);
            //	decrement scores of top result terms
            String[] sentence = NETagger.tokenize(top.getAnswer());
            for (int i = 0; i < sentence.length; i++) {
                String term = SnowballStemmer.stem(sentence[i].toLowerCase());
                TermCounter count = termCounters.get(term);
                if (count != null) {
                    //	20070718
                    if (targetTerms.contains(term))
                        count.divideValue(2);
                    else
                        count.divideValue(termCountLog);
                    if (count.getValue() == 0)
                        termCounters.remove(term);
                }
            }
            //	prepare remaining results for next round
            results = rawResults.toArray(new Result[rawResults.size()]);
            goOn = true;
        }
    } while (goOn);
    Collections.sort(resultList);
    Collections.reverse(resultList);
    //	set position-dependent extra score for combining approaches
    if (this.isCombined) {
        float eScore = 100;
        for (Result r : resultList) {
            r.addExtraScore((this.getClass().getName() + this.normalizationMode), eScore);
            eScore *= 0.9f;
        }
    }
    return resultList.toArray(new Result[resultList.size()]);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2