Search in sources :

Example 41 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class AnswerSelection method getResults.

/**
	 * Applies <code>Filters</code> to the <code>Results</code> from the search
	 * component and returns up to <code>maxResults</code> results with a score
	 * of at least <code>minScore</code>.
	 * 
	 * @param results search results
	 * @param maxResults maximum number of results to be returned
	 * @param minScore minimum score of a result that is returned
	 * @return up to <code>maxResults</code> results
	 */
public static Result[] getResults(Result[] results, int maxResults, float minScore) {
    // apply filters
    for (Filter filter : filters) {
        MsgPrinter.printFilterStarted(filter, results.length);
        results = filter.apply(results);
        MsgPrinter.printFilterFinished(filter, results.length);
    }
    // get up to maxResults results with a score of at least minScore
    ArrayList<Result> resultsList = new ArrayList<Result>();
    for (Result result : results) {
        if (maxResults == 0)
            break;
        if (result.getScore() >= minScore) {
            resultsList.add(result);
            maxResults--;
        }
    }
    return resultsList.toArray(new Result[resultsList.size()]);
}
Also used : Filter(info.ephyra.answerselection.filters.Filter) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 42 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class AnswerTypeFilter method apply.

/**
	 * Extracts NEs of particular types from the answer strings of the
	 * <code>Result</code> objects and creates a new <code>Result</code> for
	 * each extracted unique answer.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    // extracted factoid answers and corresponding results
    Hashtable<String, Result> factoids = new Hashtable<String, Result>();
    for (Result result : results) {
        // only apply this filter to results for the answer type testing
        // approach
        Query query = result.getQuery();
        String[] answerTypes = query.getAnalyzedQuestion().getAnswerTypes();
        if (!query.extractWith(ID) || answerTypes.length == 0 || result.getScore() > Float.NEGATIVE_INFINITY)
            continue;
        // split answer string into sentences and tokenize sentences
        String answer = result.getAnswer();
        String[] sentences = OpenNLP.sentDetect(answer);
        String[][] tokens = new String[sentences.length][];
        for (int i = 0; i < sentences.length; i++) tokens[i] = NETagger.tokenize(sentences[i]);
        for (String answerType : answerTypes) {
            // get IDs of the taggers for the most specific NE type that can
            // be tagged
            String[] neTypes = answerType.split("->");
            int[] neIds = new int[0];
            for (String neType : neTypes) {
                int[] thisIds = NETagger.getNeIds(neType);
                if (thisIds.length > 0)
                    neIds = thisIds;
            }
            // extract NEs of that type
            for (int neId : neIds) {
                String neType = NETagger.getNeType(neId);
                String[][] nes = NETagger.extractNes(tokens, neId);
                for (int i = 0; i < sentences.length; i++) {
                    // untokenize NEs
                    for (int j = 0; j < nes[i].length; j++) nes[i][j] = OpenNLP.untokenize(nes[i][j], sentences[i]);
                    // create new result for each unique normalized NE
                    for (String ne : nes[i]) {
                        String norm = StringUtils.normalize(ne);
                        Result factoid = factoids.get(norm);
                        if (factoid == null) {
                            // new answer
                            // query, doc ID and sentence can be ambiguous
                            factoid = new Result(ne, result.getQuery(), result.getDocID());
                            factoid.setSentence(sentences[i]);
                            factoid.addExtractionTechnique(ID);
                            factoids.put(norm, factoid);
                        }
                        factoid.addNeType(neType);
                        factoid.incScore(1);
                    // TODO consider query score, #keywords, hit pos
                    }
                }
            }
        }
    }
    // keep old results
    Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
    Result[] allResults = new Result[results.length + newResults.length];
    for (int i = 0; i < results.length; i++) allResults[i] = results[i];
    for (int i = 0; i < newResults.length; i++) allResults[results.length + i] = newResults[i];
    return allResults;
}
Also used : Query(info.ephyra.querygeneration.Query) Hashtable(java.util.Hashtable) Result(info.ephyra.search.Result)

Example 43 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class CacheResultsFilter method apply.

/**
	 * Resets the result array to a previous state if the query string matches.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return modified array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    //	catch empty result
    if (results.length == 0)
        return results;
    //	get query string
    String query = results[0].getQuery().getOriginalQueryString();
    // look up in cache
    Result[] res = this.cacheLookup(query);
    //	initializer call
    if (res == null) {
        int c = 0;
        Result[] copy = new Result[results.length];
        for (Result r : results) copy[c++] = r.getCopy();
        this.cache(query, copy);
        return results;
    //	lookup call
    } else
        return res;
}
Also used : Result(info.ephyra.search.Result)

Example 44 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class PreferNamedEntitiesFilter method apply.

/**
	 * If there are named entities among the factoid answers then answers that
	 * are not named entities are dropped.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return results that are named entities
	 */
public Result[] apply(Result[] results) {
    // raw results returned by the searchers
    ArrayList<Result> rawResults = new ArrayList<Result>();
    // results that are named entities
    ArrayList<Result> namedEntities = new ArrayList<Result>();
    // results that are not named entities
    ArrayList<Result> notNamedEntities = new ArrayList<Result>();
    // get only named entities if there are any
    for (Result result : results) if (result.getScore() == Float.NEGATIVE_INFINITY || result.getScore() == Float.POSITIVE_INFINITY) {
        rawResults.add(result);
    } else {
        if (result.isNamedEntity())
            namedEntities.add(result);
        else
            notNamedEntities.add(result);
    }
    ArrayList<Result> preferred = (namedEntities.size() > 0) ? namedEntities : notNamedEntities;
    // keep raw results
    Result[] allResults = new Result[preferred.size() + rawResults.size()];
    preferred.toArray(allResults);
    for (int i = 0; i < rawResults.size(); i++) allResults[preferred.size() + i] = rawResults.get(i);
    return allResults;
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 45 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ResultLengthFilter method apply.

/**
	 * Ensures that the total number of non-whitespace characters in all answer
	 * strings does not exceed <code>cutoffLength</code>.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return filtered array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    ArrayList<Result> filtered = new ArrayList<Result>();
    int length = 0;
    for (Result r : results) {
        length += r.getAnswer().replaceAll("\\s", "").length();
        if (length <= this.cutoffLength)
            filtered.add(r);
        else
            break;
    }
    return filtered.toArray(new Result[filtered.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2