Search in sources :

Example 36 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class SentenceExtractionFilter method apply.

/**
	 * Splits the answer strings of the results into sentences and creates a
	 * new result for each sentence.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    ArrayList<Result> newResults = new ArrayList<Result>();
    for (Result result : results) {
        // do not apply this filter to results from knowledge annotators
        if (result.getScore() == Float.POSITIVE_INFINITY) {
            newResults.add(result);
            continue;
        }
        // split the answer string into sentences
        String answer = result.getAnswer();
        String[] sentences = LingPipe.sentDetect(answer);
        // create a new result for each sentence
        for (String sentence : sentences) {
            Result newResult = result.getCopy();
            result.setAnswer(sentence);
            newResults.add(newResult);
        }
    }
    return newResults.toArray(new Result[newResults.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 37 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class Logger method logResults.

/**
	 * Logs the results returned by the QA engine.
	 * 
	 * @param results the results
	 * @return true, iff logging was successful
	 */
public static boolean logResults(Result[] results) {
    // logging is disabled or log file is not specified
    if (!enabled || logfile == null)
        return false;
    try {
        PrintWriter out = new PrintWriter(new FileOutputStream(logfile, true));
        for (Result result : results) {
            out.println("\t<result>");
            out.println("\t\t<answer>");
            out.println("\t\t\t" + result.getAnswer());
            out.println("\t\t</answer>");
            out.println("\t\t<score>");
            out.println("\t\t\t" + result.getScore());
            out.println("\t\t</score>");
            if (result.getDocID() != null) {
                out.println("\t\t<docid>");
                out.println("\t\t\t" + result.getDocID());
                out.println("\t\t</docid>");
            }
            QuestionInterpretation qi = result.getQuery().getInterpretation();
            if (qi != null) {
                out.println("\t\t<interpretation>");
                out.println("\t\t\t<property>");
                out.println("\t\t\t\t" + qi.getProperty());
                out.println("\t\t\t</property>");
                out.println("\t\t\t<target>");
                out.println("\t\t\t\t" + qi.getTarget());
                out.println("\t\t\t</target>");
                for (String context : qi.getContext()) {
                    out.println("\t\t\t<context>");
                    out.println("\t\t\t\t" + context);
                    out.println("\t\t\t</context>");
                }
                out.println("\t\t</interpretation>");
            }
            out.println("\t</result>");
        }
        out.close();
    } catch (IOException e) {
        return false;
    }
    return true;
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter) Result(info.ephyra.search.Result)

Example 38 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class DuplicateFilter method apply.

/**
	 * Filters duplicate results and increments the scores of the remaining
	 * results by the scores of the dropped results.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return array of <code>Result</code> objects without duplicates
	 */
public Result[] apply(Result[] results) {
    // sort results by their scores in descending order
    results = (new ScoreSorterFilter()).apply(results);
    // drop results with forbidden answer strings
    for (String as : forbidden) for (int i = 0; i < results.length; i++) {
        if (results[i] == null || results[i].getScore() == Float.POSITIVE_INFINITY)
            continue;
        if (results[i].getScore() == Float.NEGATIVE_INFINITY)
            break;
        if (StringUtils.equalsCommonNorm(as, results[i].getAnswer()))
            results[i] = null;
    }
    // drop duplicates
    for (int i = 0; i < results.length - 1; i++) {
        if (results[i] == null || results[i].getScore() == Float.POSITIVE_INFINITY)
            continue;
        if (results[i].getScore() == Float.NEGATIVE_INFINITY)
            break;
        for (int j = i + 1; j < results.length; j++) {
            if (results[j] == null || results[j].getScore() == Float.POSITIVE_INFINITY)
                continue;
            if (results[j].getScore() == Float.NEGATIVE_INFINITY)
                break;
            if (StringUtils.equalsCommonNorm(results[i].getAnswer(), results[j].getAnswer())) {
                // increment score of higher-scored result
                results[i].incScore(results[j].getScore());
                // drop lower-scored result
                results[j] = null;
            }
        }
    }
    // return remaining results
    ArrayList<Result> noDups = new ArrayList<Result>();
    for (Result result : results) if (result != null)
        noDups.add(result);
    return noDups.toArray(new Result[noDups.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 39 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class DuplicateSnippetFilter method apply.

/**
	 * Filters duplicate results and increments the scores of the remaining
	 * results by the scores of the dropped results.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return array of <code>Result</code> objects without duplicates
	 */
public Result[] apply(Result[] results) {
    // sort results by their scores in descending order
    results = (new ScoreSorterFilter()).apply(results);
    // return remaining results
    ArrayList<Result> rawResults = new ArrayList<Result>();
    HashSet<String> contained = new HashSet<String>();
    // drop duplicates
    for (Result res : results) {
        String text = res.getAnswer();
        if (text != null) {
            //	remove meaningless drivel
            text = text.toLowerCase().trim();
            text = text.replaceAll("(\\'|\\\"|\\`|\\_)", "");
            text = SnowballStemmer.stemAllTokens(text);
            //	produce and store keywords for subset elimination
            String[] tokens = NETagger.tokenize(text);
            HashSet<String> keywords = new HashSet<String>();
            for (String term : tokens) if ((term.length() > 1) && !FunctionWords.lookup(term))
                keywords.add(term);
            //	produce term string
            ArrayList<String> sortedKeywords = new ArrayList<String>(keywords);
            Collections.sort(sortedKeywords);
            StringBuffer keywordString = new StringBuffer();
            for (String term : sortedKeywords) keywordString.append(" " + term);
            //	check if same keywords contained in previous snippet
            if (contained.add(keywordString.toString().trim()))
                rawResults.add(res);
        }
    }
    return rawResults.toArray(new Result[rawResults.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) HashSet(java.util.HashSet)

Example 40 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class FactoidsFromPredicatesFilter method apply.

/**
	 * Extracts factoids from the predicates withing the answer strings of the
	 * <code>Result</code> objects and creates a new <code>Result</code> for
	 * each extracted unique answer.
	 * 
	 * @param results array of <code>Result</code> objects containing predicates
	 * @return array of <code>Result</code> objects containing factoids
	 */
public Result[] apply(Result[] results) {
    // old results that are passed along the pipeline
    ArrayList<Result> oldResults = new ArrayList<Result>();
    // extracted factoid answers and corresponding results
    Hashtable<String, Result> factoids = new Hashtable<String, Result>();
    // extracted factoid answers and maximum weights of predicates
    Hashtable<String, Double> maxScores = new Hashtable<String, Double>();
    for (Result result : results) {
        // only apply this filter to results for the semantic parsing
        // approach
        Query query = result.getQuery();
        Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
        if (!query.extractWith(ID) || ps.length == 0 || result.getScore() != 0) {
            oldResults.add(result);
            continue;
        }
        Predicate p = result.getPredicate();
        Predicate questionP = p.getSimPredicate();
        double simScore = p.getSimScore();
        Map<String, String[]> nes = result.getNes();
        // get answer strings
        ArrayList<String> answers = new ArrayList<String>();
        if (nes != null) {
            // - allow entities in all arguments
            for (String ne : nes.keySet()) for (String arg : p.getArgs()) if (arg.contains(ne)) {
                answers.add(ne);
                break;
            }
        // - allow entities in missing arguments only
        //				for (String ne : nes.keySet())
        //					for (String missing : questionP.getMissingArgs()) {
        //						String arg = p.get(missing);
        //						if (arg != null && arg.contains(ne)) {
        //							answers.add(ne);
        //							break;
        //						}
        //					}
        } else {
            // arguments as factoid answers
            for (String missing : questionP.getMissingArgs()) {
                String arg = p.get(missing);
                if (arg != null)
                    answers.add(arg);
            }
        }
        // create result objects
        for (String answer : answers) {
            String norm = StringUtils.normalize(answer);
            Result factoid = factoids.get(norm);
            if (factoid == null) {
                // new answer
                // query, doc ID and sentence can be ambiguous
                factoid = new Result(answer, result.getQuery(), result.getDocID());
                factoid.setSentence(result.getSentence());
                factoid.addExtractionTechnique(ID);
                factoids.put(norm, factoid);
                maxScores.put(norm, simScore);
            } else if (simScore > maxScores.get(norm)) {
                // remember document ID of predicate with highest score
                factoid.setDocID(result.getDocID());
                maxScores.put(norm, simScore);
            }
            if (nes != null)
                for (String neType : nes.get(answer)) factoid.addNeType(neType);
            factoid.incScore((float) simScore);
        }
    }
    // keep old results
    Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
    Result[] allResults = new Result[oldResults.size() + newResults.length];
    oldResults.toArray(allResults);
    for (int i = 0; i < newResults.length; i++) allResults[oldResults.size() + i] = newResults[i];
    return allResults;
}
Also used : Query(info.ephyra.querygeneration.Query) Hashtable(java.util.Hashtable) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) Predicate(info.ephyra.nlp.semantics.Predicate)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2