Search in sources :

Example 11 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class OpenEphyraServer method askFactoid.

/**
	 * Asks Ephyra a factoid question and returns up to <code>maxAnswers</code>
	 * results that have a score of at least <code>absThresh</code>.
	 * 
	 * @param question factoid question
	 * @param maxAnswers maximum number of answers
	 * @param absThresh absolute threshold for scores
	 * @return array of results
	 */
public Result[] askFactoid(String question, int maxAnswers, float absThresh) {
    // initialize pipeline
    initFactoid();
    // analyze question
    MsgPrinter.printAnalyzingQuestion();
    AnalyzedQuestion aq = QuestionAnalysis.analyze(question);
    // get answers
    Result[] results = runPipeline(aq, maxAnswers, absThresh);
    return results;
}
Also used : AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) Result(info.ephyra.search.Result)

Example 12 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class AdaptiveNumberOfKeywordsFilter method apply.

/**
	 * Score result snippets according to the number of keywords (target terms)
	 * they contain. Within two snippets from the same document, transfer score
	 * from a snippet to the subsequent one if the former contains many of the
	 * keywords. The idea is that a subsequent snippet might use a pronoun for
	 * the target (thus not contain the target itself), but provide useful
	 * information anyway.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    // raw results returned by the searchers
    ArrayList<Result> rawResults = new ArrayList<Result>();
    int lastScore = 0;
    String lastDocID = "";
    int keywordCount = 1;
    for (Result result : results) {
        if (result.getScore() != Float.NEGATIVE_INFINITY) {
            String[] keywords = NETagger.tokenize(result.getQuery().getQueryString());
            for (int k = 0; k < keywords.length; k++) keywords[k] = SnowballStemmer.stem(keywords[k]);
            int k = keywords.length;
            keywordCount = k;
            String[] wordsInResult = NETagger.tokenize(result.getAnswer());
            for (int r = 0; r < wordsInResult.length; r++) wordsInResult[r] = SnowballStemmer.stem(wordsInResult[r]);
            int m = getNumberOfMatches(keywords, wordsInResult);
            if (m >= Math.floor(Math.sqrt(k - 1) + 1)) {
                //	remember doc ID so score is propagated only within same document
                lastDocID = result.getDocID();
                if (lastDocID == null)
                    lastDocID = "";
                //	remember score
                lastScore = ((m * m + 1) / 2);
                //					lastScore = ((m + 1) / 2);	//	remember score
                // manipulate score
                result.incScore(m * m);
                //					result.incScore(m);  // manipulate score
                // keep result
                rawResults.add(result);
            } else if ((lastScore > 0) && lastDocID.equalsIgnoreCase(result.getDocID())) {
                // manipulate score
                result.incScore(lastScore);
                // keep result
                rawResults.add(result);
                //	decay last score
                lastScore = (lastScore / 2);
            } else {
                //	reset remembered score
                lastScore = 0;
            }
        }
    }
    //	if too little results, match againg and consider only proper names
    if (rawResults.size() < 100) {
        for (Result result : results) {
            if (result.getScore() != Float.NEGATIVE_INFINITY) {
                String[] keywords = NETagger.tokenize(result.getQuery().getQueryString());
                ArrayList<String> keywordList = new ArrayList<String>();
                for (int k = 0; k < keywords.length; k++) if (keywords[k].matches("[A-Z]++.*+"))
                    keywordList.add(SnowballStemmer.stem(keywords[k]));
                keywords = keywordList.toArray(new String[keywordList.size()]);
                int k = keywords.length;
                //	do this only if now less keywords
                if ((keywords.length != 0) && (k < keywordCount)) {
                    String[] wordsInResult = NETagger.tokenize(result.getAnswer());
                    for (int r = 0; r < wordsInResult.length; r++) wordsInResult[r] = SnowballStemmer.stem(wordsInResult[r]);
                    int m = getNumberOfMatches(keywords, wordsInResult);
                    if (m >= Math.floor(Math.sqrt(k - 1) + 1)) {
                        // manipulate score
                        result.incScore(m * m);
                        //							result.incScore(m);  // manipulate score
                        // keep result
                        rawResults.add(result);
                    }
                }
            }
        }
    }
    return rawResults.toArray(new Result[rawResults.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 13 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class AnswerPatternFilter method apply.

/**
	 * Applies the answer patterns to the answer strings of the
	 * <code>Result</code> objects and creates a new <code>Result</code> for
	 * each extracted unique answer.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    // extracted factoid answers and corresponding results
    Hashtable<String, Result> factoids = new Hashtable<String, Result>();
    for (Result result : results) {
        // only apply this filter to results for the pattern matching
        // approach
        Query query = result.getQuery();
        QuestionInterpretation qi = query.getInterpretation();
        if (!query.extractWith(ID) || qi == null || result.getScore() > Float.NEGATIVE_INFINITY)
            continue;
        // extract PROPERTY objects
        extractPos(result);
        // create new result for each unique normalized PROPERTY object
        for (int i = 0; i < extr.size(); i++) {
            String po = extr.get(i);
            String[] neTypes = types.get(i);
            String norm = StringUtils.normalize(po);
            String sentence = sents.get(i);
            float conf = aps.get(i).getConfidence();
            Result factoid = factoids.get(norm);
            if (factoid == null) {
                // new answer
                // query, doc ID and sentence can be ambiguous
                factoid = new Result(po, result.getQuery(), result.getDocID());
                factoid.setSentence(sentence);
                factoid.addExtractionTechnique(ID);
                factoids.put(norm, factoid);
            }
            if (neTypes != null)
                for (String neType : neTypes) factoid.addNeType(neType);
            factoid.incScore(conf);
        }
    }
    // keep old results
    Result[] newResults = factoids.values().toArray(new Result[factoids.size()]);
    Result[] allResults = new Result[results.length + newResults.length];
    for (int i = 0; i < results.length; i++) allResults[i] = results[i];
    for (int i = 0; i < newResults.length; i++) allResults[results.length + i] = newResults[i];
    return allResults;
}
Also used : QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation) Query(info.ephyra.querygeneration.Query) Hashtable(java.util.Hashtable) Result(info.ephyra.search.Result)

Example 14 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class AnswerProjectionFilter method apply.

/**
	 * Projects Web answers onto the corpus.
	 * 
	 * @param results array of <code>Result</code> objects from the Web
	 * @return array of <code>Result</code> objects from the corpus
	 */
public Result[] apply(Result[] results) {
    // split corpus results into factoid answers and raw results
    Hashtable<String, Result> factoids = new Hashtable<String, Result>();
    Hashtable<String, Result> sentences = new Hashtable<String, Result>();
    ArrayList<String> normSentences = new ArrayList<String>();
    Filter sorter = new HitPositionSorterFilter();
    // sort by hit position
    resultsCorp = sorter.apply(resultsCorp);
    for (Result resultCorp : resultsCorp) {
        if (resultCorp.getScore() > 0) {
            // factoid answer
            String norm = StringUtils.normalize(resultCorp.getAnswer());
            Result factoid = factoids.get(norm);
            if (factoid != null) {
                if (hasHigherPreference(resultCorp, factoid)) {
                    factoids.put(norm, resultCorp);
                    String[] neTypes = factoid.getNeTypes();
                    if (neTypes != null)
                        for (String neType : neTypes) resultCorp.addNeType(neType);
                } else {
                    String[] neTypes = resultCorp.getNeTypes();
                    if (neTypes != null)
                        for (String neType : neTypes) factoid.addNeType(neType);
                }
            } else {
                factoids.put(norm, resultCorp);
            }
        } else {
            // raw result
            String[] sents = OpenNLP.sentDetect(resultCorp.getAnswer());
            for (String sent : sents) {
                // one result for each sentence
                String norm = StringUtils.normalize(sent);
                if (!sentences.containsKey(norm)) {
                    Result sentence = resultCorp.getCopy();
                    sentence.setAnswer(sent);
                    sentences.put(norm, sentence);
                    normSentences.add(norm);
                }
            }
        }
    }
    // project web results onto corpus
    ArrayList<Result> projected = new ArrayList<Result>();
    for (Result resultWeb : results) {
        // only project factoids
        if (resultWeb.getScore() <= 0)
            continue;
        String norm = StringUtils.normalize(resultWeb.getAnswer());
        // Answer projection rules:
        // - first try to find a matching factoid answer extracted from the
        //   corpus, only if this attempt fails browse the raw results
        // - a named entity from a model-based tagger is projected only if
        //   the same named entity was extracted from the corpus (this takes
        //   the poor performance of the model-based NE taggers on the noisy
        //   Web data into account)
        // - if a factoid answer was extracted from the corpus with more
        //   than one technique, then the first extraction technique in
        //   'EXTRACTION_TECHNIQUES' determines the supporting document
        Result factoid = factoids.get(norm);
        if (factoid != null && (!NETagger.allModelType(resultWeb.getNeTypes()) || factoid.isNamedEntity())) {
            // factoid answer also extracted from corpus:
            // if web answer not a named entity from a model-based tagger or
            // corpus answer also a named entity
            // -> project answer
            Result result = resultWeb.getCopy();
            result.setAnswer(factoid.getAnswer());
            result.setDocID(factoid.getDocID());
            result.setSentence(factoid.getSentence());
            projected.add(result);
        } else if (!NETagger.allModelType(resultWeb.getNeTypes())) {
            // factoid answer not extracted from corpus:
            // if answer not a named entity from a model-based tagger
            // -> browse sentences for answer
            String normRegex = RegexConverter.strToRegexWithBounds(norm);
            for (String normSentence : normSentences) {
                String[] truncs = normSentence.split(normRegex, -1);
                if (truncs.length > 1) {
                    // sentence contains answer?
                    // undo normalization
                    Result sentence = sentences.get(normSentence);
                    String sent = sentence.getAnswer();
                    int start = truncs[0].split(" ", -1).length - 1;
                    int end = start + norm.split(" ").length;
                    String[] tokens = NETagger.tokenize(sent);
                    String answer = tokens[start];
                    for (int i = start + 1; i < end; i++) answer += " " + tokens[i];
                    answer = OpenNLP.untokenize(answer, sent);
                    if (norm.equals(StringUtils.normalize(answer))) {
                        Result result = resultWeb.getCopy();
                        result.setAnswer(answer);
                        result.setDocID(sentence.getDocID());
                        result.setSentence(sentence.getAnswer());
                        projected.add(result);
                        break;
                    } else {
                        MsgPrinter.printErrorMsg("\nNormalization could " + "not be undone:\n" + norm);
                    }
                }
            }
        }
    }
    return projected.toArray(new Result[projected.size()]);
}
Also used : Hashtable(java.util.Hashtable) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 15 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class DeserializationFilter method apply.

/**
	 * Filters an array of <code>Result</code> objects.
	 * 
	 * @param results results to filter
	 * @return filtered results
	 */
public Result[] apply(Result[] results) {
    // any input file set?
    if (serialFiles == null || serialFiles.length == 0)
        return results;
    // keep old results
    ArrayList<Result> resultsL = new ArrayList<Result>();
    for (Result result : results) resultsL.add(result);
    // deserialize and add results
    for (File serialFile : serialFiles) {
        // input file exists?
        if (!serialFile.exists())
            continue;
        try {
            FileInputStream fis = new FileInputStream(serialFile);
            ObjectInputStream ois = new ObjectInputStream(fis);
            try {
                while (true) {
                    Object o = ois.readObject();
                    if (o instanceof Result) {
                        Result result = (Result) o;
                        resultsL.add(result);
                    }
                }
            } catch (EOFException e) {
            /* end of file reached */
            }
            ois.close();
        } catch (Exception e) {
            MsgPrinter.printErrorMsg("Could not read serialized results:");
            MsgPrinter.printErrorMsg(e.toString());
            System.exit(1);
        }
    }
    return resultsL.toArray(new Result[resultsL.size()]);
}
Also used : ArrayList(java.util.ArrayList) EOFException(java.io.EOFException) File(java.io.File) FileInputStream(java.io.FileInputStream) EOFException(java.io.EOFException) Result(info.ephyra.search.Result) ObjectInputStream(java.io.ObjectInputStream)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2