Search in sources :

Example 21 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class WebTermImportanceFilter method main.

public static void main(String[] args) {
    TEST_TARGET_GENERATION = true;
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    // create sentence detector
    //		MsgPrinter.printStatusMsg("Creating sentence detector...");
    //		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    // create part of speech tagger
    MsgPrinter.printStatusMsg("Creating POS tagger...");
    if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict"))
        MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    // create chunker
    MsgPrinter.printStatusMsg("Creating chunker...");
    if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" + "EnglishChunk.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create chunker.");
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
        MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    WebTermImportanceFilter wtif = new TargetGeneratorTest(NO_NORMALIZATION);
    TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
    for (TRECTarget target : targets) {
        String question = target.getTargetDesc();
        // query generation
        MsgPrinter.printGeneratingQueries();
        String qn = QuestionNormalizer.normalize(question);
        // print normalized question string
        MsgPrinter.printNormalization(qn);
        // log normalized question string
        Logger.logNormalization(qn);
        String[] kws = KeywordExtractor.getKeywords(qn);
        AnalyzedQuestion aq = new AnalyzedQuestion(question);
        aq.setKeywords(kws);
        aq.setFactoid(false);
        Query[] queries = new BagOfWordsG().generateQueries(aq);
        for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
        Result[] results = new Result[1];
        results[0] = new Result("This would be the answer", queries[0]);
        wtif.apply(results);
    }
}
Also used : Query(info.ephyra.querygeneration.Query) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result) TRECTarget(info.ephyra.trec.TRECTarget) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion)

Example 22 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ProperNameFilter method apply.

/**
	 * Filter out result snippets that contain too many proper names. This is to
	 * get rid of enumerations of named entities that happen to include the
	 * target. This might, for instance, be the track list of a compilation LP,
	 * which has a song by the target artist on it.
	 * 
	 * @param results array of <code>Result</code> objects
	 * @return extended array of <code>Result</code> objects
	 */
public Result[] apply(Result[] results) {
    // raw results returned by the searchers
    ArrayList<Result> rawResults = new ArrayList<Result>();
    for (Result r : results) {
        if (r.getScore() != Float.NEGATIVE_INFINITY) {
            String text = r.getAnswer();
            //	tokenize and tag sentence
            String[] sentence = NETagger.tokenize(text);
            int upperCase = 0;
            int lowerCase = 0;
            //	scan sentence for tokens in upper case
            for (int i = 1; i < sentence.length; i++) {
                String term = sentence[i];
                if (term.matches("[A-Z]++.*+")) {
                    upperCase++;
                    //sentence.length;
                    if (FunctionWords.lookup(term.toLowerCase()))
                        upperCase += 2;
                } else if (term.matches("[a-z]++.*+"))
                    lowerCase++;
                else if (term.matches("[0-9]++"))
                    lowerCase++;
            }
            if (upperCase < lowerCase)
                rawResults.add(r);
        //				else System.out.println("ProperNameFilter: " + text);
        }
    }
    return rawResults.toArray(new Result[rawResults.size()]);
}
Also used : ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result)

Example 23 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ResultLengthComparator method compare.

/**
	 * Compares its two arguments for order. Returns a negative integer, zero,
	 * or a positive integer as the first argument is less than, equal to, or
	 * greater than the second.
	 * 
	 * @param o1 the first object to be compared
	 * @param o2 the second object to be compared
	 * @return a negative integer, zero, or a positive integer as the first
	 *         argument is less than, equal to, or greater than the second
	 */
public int compare(Object o1, Object o2) {
    if (!(o1 instanceof Result) || !(o2 instanceof Result))
        throw new ClassCastException();
    Result r1 = (Result) o1;
    Result r2 = (Result) o2;
    return r1.getAnswer().length() - r2.getAnswer().length();
}
Also used : Result(info.ephyra.search.Result)

Example 24 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ScoreCombinationFilter method apply.

/**
	 * Filters an array of <code>Result</code> objects.
	 * 
	 * @param results results to filter
	 * @return filtered results
	 */
public Result[] apply(Result[] results) {
    // all results that pass the filter
    List<Result> filtered = new ArrayList<Result>();
    // sort results by their scores in descending order
    results = (new ScoreSorterFilter()).apply(results);
    // separate factoid answers by extractors
    List<Result> factoids = new ArrayList<Result>();
    Hashtable<String, Hashtable<String, Result>> allExtractors = new Hashtable<String, Hashtable<String, Result>>();
    for (Result result : results) {
        // only merge factoid answers
        if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY) {
            filtered.add(result);
            continue;
        }
        // make sure that answers come from a single extractor
        String[] extractors = result.getExtractionTechniques();
        if (extractors == null || extractors.length != 1) {
            filtered.add(result);
            continue;
        }
        String extractor = extractors[0];
        factoids.add(result);
        Hashtable<String, Result> sameExtractor = allExtractors.get(extractor);
        if (sameExtractor == null) {
            sameExtractor = new Hashtable<String, Result>();
            allExtractors.put(extractor, sameExtractor);
        }
        String norm = StringUtils.normalize(result.getAnswer());
        sameExtractor.put(norm, result);
    }
    // merge answers from different extractors
    String[] extractors = allExtractors.keySet().toArray(new String[allExtractors.size()]);
    Set<String> covered = new HashSet<String>();
    for (Result result : factoids) {
        String norm = StringUtils.normalize(result.getAnswer());
        if (!covered.add(norm))
            continue;
        // get all extractors for the result and the normalized scores
        ArrayList<String> exs = new ArrayList<String>();
        ArrayList<Float> scores = new ArrayList<Float>();
        for (String extractor : extractors) {
            Result r = allExtractors.get(extractor).get(norm);
            if (r != null) {
                exs.add(extractor);
                scores.add(r.getNormScore());
            }
        }
        // set extractors
        result.setExtractionTechniques(exs.toArray(new String[exs.size()]));
        // combine their normalized scores
        float[] scoresA = new float[scores.size()];
        for (int i = 0; i < scoresA.length; i++) scoresA[i] = scores.get(i);
        int totalExtractors = extractors.length;
        float combinedScore = //				combANZ(scoresA, totalExtractors);
        combMNZ(scoresA, totalExtractors);
        //				combCP(scoresA, totalExtractors);
        result.setScore(combinedScore);
        result.setNormScore(combinedScore);
        filtered.add(result);
    }
    return filtered.toArray(new Result[filtered.size()]);
}
Also used : Hashtable(java.util.Hashtable) ArrayList(java.util.ArrayList) Result(info.ephyra.search.Result) HashSet(java.util.HashSet)

Example 25 with Result

use of info.ephyra.search.Result in project lucida by claritylab.

the class ScoreNormalizationFilter method readSerializedResults.

/**
	 * Reads serialized results from a file.
	 * 
	 * @param input input file
	 * @return result objects
	 */
private static Result[] readSerializedResults(File input) {
    ArrayList<Result> results = new ArrayList<Result>();
    try {
        FileInputStream fis = new FileInputStream(input);
        ObjectInputStream ois = new ObjectInputStream(fis);
        // then discard it
        if (!(ois.readObject() instanceof AnalyzedQuestion)) {
            MsgPrinter.printErrorMsg("First serialized object is not an" + "AnalyzedQuestion.");
            System.exit(1);
        }
        try {
            while (true) results.add((Result) ois.readObject());
        } catch (EOFException e) {
        /* end of file reached */
        }
        ois.close();
    } catch (Exception e) {
        MsgPrinter.printErrorMsg("Could not read serialized results:");
        MsgPrinter.printErrorMsg(e.toString());
        System.exit(1);
    }
    return results.toArray(new Result[results.size()]);
}
Also used : ArrayList(java.util.ArrayList) EOFException(java.io.EOFException) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) Result(info.ephyra.search.Result) ObjectInputStream(java.io.ObjectInputStream)

Aggregations

Result (info.ephyra.search.Result)68 ArrayList (java.util.ArrayList)36 Query (info.ephyra.querygeneration.Query)11 HashSet (java.util.HashSet)9 Hashtable (java.util.Hashtable)9 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)8 IOException (java.io.IOException)7 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)5 Feature (edu.cmu.minorthird.classify.Feature)4 HashMap (java.util.HashMap)4 Predicate (info.ephyra.nlp.semantics.Predicate)3 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)3 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 URL (java.net.URL)3 TRECTarget (info.ephyra.trec.TRECTarget)2 EOFException (java.io.EOFException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2