Search in sources :

Example 46 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ScoreNormalizationFilter method preserveOrderAveraging.

/**
	 * Calculates the average normalization factor for each extraction technique
	 * and normalizes the scores with this factor to ensure that the order
	 * suggested by the original scores is preserved. The factor is adjusted to
	 * avoid normalized scores larger 1.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return array of <code>Result</code> objects with new normalized scores
	 */
public Result[] preserveOrderAveraging(Result[] results) {
    // get answers by extractors
    Hashtable<String, ArrayList<Result>> allExtracted = new Hashtable<String, ArrayList<Result>>();
    for (Result result : results) {
        // only factoid answers with 1 extraction technique
        if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY || result.getExtractionTechniques() == null || result.getExtractionTechniques().length != 1)
            continue;
        String extractor = result.getExtractionTechniques()[0];
        ArrayList<Result> extracted = allExtracted.get(extractor);
        if (extracted == null) {
            extracted = new ArrayList<Result>();
            allExtracted.put(extractor, extracted);
        }
        extracted.add(result);
    }
    // normalize answer scores for each extractor
    for (List<Result> extracted : allExtracted.values()) {
        // get average normalization factor
        double sumNormFactors = 0;
        float maxScore = 0;
        for (Result factoid : extracted) {
            float score = factoid.getScore();
            float normScore = factoid.getNormScore();
            sumNormFactors += normScore / score;
            if (score > maxScore)
                maxScore = score;
        }
        double avgNormFactor = sumNormFactors / extracted.size();
        // normalize scores with average normalization factor
        for (Result factoid : extracted) {
            float norm = (float) (factoid.getScore() * avgNormFactor);
            factoid.setNormScore(norm);
        }
    }
    return results;
}
Also used : Hashtable(java.util.Hashtable) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 47 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class OverlapAnalysisFilter method apply.

/**
	 * Evaluates the answer candidates and updates the current overlap analysis.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return identical array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    if (pattern == null)
        return results;
    HashSet<String> techsSet = new HashSet<String>();
    for (Result result : results) {
        // check if part of answer string matches the pattern
        String answer = result.getAnswer();
        for (String regex : pattern.getRegexs()) if (answer.matches(".*?" + regex + ".*+")) {
            String[] techniques = result.getExtractionTechniques();
            if (techniques == null || techniques.length == 0) {
                techsSet.add("Passage");
            } else {
                for (String technique : techniques) techsSet.add(technique);
            }
        }
    }
    if (techsSet.size() == 0)
        techsSet.add("None");
    String[] techs = techsSet.toArray(new String[techsSet.size()]);
    Arrays.sort(techs);
    String key = StringUtils.concat(techs, ", ");
    Integer count = overlapAnalysis.get(key);
    if (count != null)
        overlapAnalysis.put(key, count + 1);
    else
        overlapAnalysis.put(key, 1);
    if (printing)
        printOverlapAnalysis();
    // don't do anything to the results
    return results;
}
Also used : HashSet(java.util.HashSet) Result(info.ephyra.search.Result)

Example 48 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class PredicateExtractionFilter method apply.

/**
	 * Extracts relevant predicates from documents.
	 * 
	 * @param results array of <code>Result</code> objects containing documents
	 * @return array of <code>Result</code> objects containing predicates
	 */
public Result[] apply(Result[] results) {
    if (results.length == 0)
        return results;
    ArrayList<Result> allResults = new ArrayList<Result>();
    // extract relevant sentences
    // - get sentences that contain relevant verbs,
    //   use weights of verbs as confidence scores
    HashSet<Result> ssSet = new HashSet<Result>();
    for (Result result : results) {
        // only apply this filter to results for the semantic parsing
        // approach
        Query query = result.getQuery();
        Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
        if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() != 0) {
            allResults.add(result);
            continue;
        }
        // get all verb forms and build patterns
        Hashtable<String[], Double> verbFormsMap = getAllVerbForms(ps);
        ArrayList<String> verbPatterns = new ArrayList<String>();
        ArrayList<Double> verbWeights = new ArrayList<Double>();
        for (String[] verbForms : verbFormsMap.keySet()) {
            String verbPattern = "(?i).*?\\b(" + StringUtils.concat(verbForms, "|") + ")\\b.*+";
            verbPatterns.add(verbPattern);
            verbWeights.add(verbFormsMap.get(verbForms));
        }
        String[] paragraphs = result.getAnswer().split("\\n");
        for (String p : paragraphs) {
            // paragraph does not contain relevant verb?
            boolean contains = false;
            for (String verbPattern : verbPatterns) {
                if (p.matches(verbPattern)) {
                    contains = true;
                    break;
                }
            }
            if (!contains)
                continue;
            String[] sentences = LingPipe.sentDetect(p);
            for (String s : sentences) {
                // sentence does not contain relevant verb?
                Double weight = 0d;
                for (int i = 0; i < verbPatterns.size(); i++) {
                    if (s.matches(verbPatterns.get(i))) {
                        weight = verbWeights.get(i);
                        break;
                    }
                }
                if (weight == 0d)
                    continue;
                // replace whitespaces by single blanks and trim
                s = s.replaceAll("\\s++", " ").trim();
                // create sentence-level result object
                Result sentence = result.getCopy();
                sentence.setAnswer(s);
                sentence.setScore(weight.floatValue());
                ssSet.add(sentence);
            }
        }
    }
    // - check if these sentences are relevant,
    //   get MAX_SENTENCES sentences with most relevant verbs
    Result[] ss = ssSet.toArray(new Result[ssSet.size()]);
    ss = (new ScoreSorterFilter()).apply(ss);
    ArrayList<Result> ssList = new ArrayList<Result>();
    for (Result s : ss) {
        s.setScore(0);
        if (checkSentence(s))
            ssList.add(s);
        // get at most MAX_SENTENCES sentences
        if (ssList.size() >= MAX_SENTENCES)
            break;
    }
    ss = ssList.toArray(new Result[ssList.size()]);
    if (ss.length == 0)
        return allResults.toArray(new Result[allResults.size()]);
    // annotate predicates in sentences
    String[] sentences = new String[ss.length];
    for (int i = 0; i < ss.length; i++) sentences[i] = ss[i].getAnswer();
    String[][] ass = ASSERT.annotatePredicates(sentences);
    // extract predicates from annotations
    for (int i = 0; i < ass.length; i++) {
        Term[] terms = ss[i].getTerms();
        Predicate[] questionPs = ss[i].getQuery().getAnalyzedQuestion().getPredicates();
        for (int j = 0; j < ass[i].length; j++) {
            // build predicate
            Predicate predicate = null;
            try {
                predicate = new Predicate(sentences[i], ass[i][j], terms);
            } catch (ParseException e) {
                //					System.exit(1);
                continue;
            }
            // calculate similarity score
            double simScore = 0;
            Predicate simPredicate = null;
            for (Predicate questionP : questionPs) // compare to predicates with missing arguments only
            if (questionP.hasMissingArgs()) {
                double currSimScore = predicate.simScore(questionP);
                if (currSimScore > simScore) {
                    simScore = currSimScore;
                    simPredicate = questionP;
                }
            }
            // keep predicate if it is similar to a question predicate
            if (simScore > 0) {
                predicate.setSimScore(simScore);
                predicate.setSimPredicate(simPredicate);
                Result result = ss[i].getCopy();
                result.setAnswer(ass[i][j]);
                result.setSentence(sentences[i]);
                result.setPredicate(predicate);
                allResults.add(result);
            }
        }
    }
    return allResults.toArray(new Result[allResults.size()]);
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Term(info.ephyra.questionanalysis.Term) Result(info.ephyra.search.Result) Predicate(info.ephyra.nlp.semantics.Predicate) ParseException(java.text.ParseException) HashSet(java.util.HashSet)

Example 49 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class PatternLearner method extract.

/**
	 * Loads target-context-answer-regex tuples from resource files, forms
	 * queries, fetches text passages, extracts answer patterns and writes them
	 * to resource files.
	 * 
	 * @return <code>true</code>, iff the answer patterns could be extracted
	 */
public static boolean extract() {
    // load tuples and form queries
    MsgPrinter.printFormingQueries();
    ass = new Hashtable<String, String>();
    regexs = new Hashtable<String, String>();
    Query[] queries;
    ArrayList<Query> queryList = new ArrayList<Query>();
    queries = formQueries("res/patternlearning/interpretations");
    for (Query query : queries) queryList.add(query);
    queries = formQueries("res/patternlearning/interpretations_extract");
    for (Query query : queries) queryList.add(query);
    queries = queryList.toArray(new Query[queryList.size()]);
    // fetch text passages
    MsgPrinter.printFetchingPassages();
    Result[] results = fetchPassages(queries);
    // extract answer patterns
    MsgPrinter.printExtractingPatterns();
    extractPatterns(results);
    // save answer patterns
    MsgPrinter.printSavingPatterns();
    return savePatterns("res/patternlearning/answerpatterns_extract");
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 50 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class PatternLearner method assess.

/**
	 * Loads target-context-answer-regex tuples and answer patterns from
	 * resource files, forms queries from the tuples, fetches text passages,
	 * assesses the answer patterns on the text passages and writes them to
	 * resource files.
	 * 
	 * @return <code>true</code>, iff the answer patterns could be assessed
	 */
public static boolean assess() {
    // load answer patterns
    MsgPrinter.printLoadingPatterns();
    if (!loadPatterns("res/patternlearning/answerpatterns_extract"))
        return false;
    // load tuples and form queries
    MsgPrinter.printFormingQueries();
    ass = new Hashtable<String, String>();
    regexs = new Hashtable<String, String>();
    Query[] queries;
    ArrayList<Query> queryList = new ArrayList<Query>();
    queries = formQueries("res/patternlearning/interpretations");
    for (Query query : queries) queryList.add(query);
    queries = formQueries("res/patternlearning/interpretations_assess");
    for (Query query : queries) queryList.add(query);
    queries = queryList.toArray(new Query[queryList.size()]);
    // fetch text passages
    MsgPrinter.printFetchingPassages();
    Result[] results = fetchPassages(queries);
    // assess answer patterns
    MsgPrinter.printAssessingPatterns();
    assessPatterns(results);
    // save answer patterns
    MsgPrinter.printSavingPatterns();
    return savePatterns("res/patternlearning/answerpatterns_assess");
}
Also used : Query(info.ephyra.querygeneration.Query) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2