Search in sources :

Example 1 with TRECTarget

use of info.ephyra.trec.TRECTarget in project lucida by claritylab.

the class WebTermImportanceFilter method main.

public static void main(String[] args) {
    TEST_TARGET_GENERATION = true;
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    // create sentence detector
    //		MsgPrinter.printStatusMsg("Creating sentence detector...");
    //		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    // create part of speech tagger
    MsgPrinter.printStatusMsg("Creating POS tagger...");
    if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict"))
        MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    // create chunker
    MsgPrinter.printStatusMsg("Creating chunker...");
    if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" + "EnglishChunk.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create chunker.");
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
        MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    WebTermImportanceFilter wtif = new TargetGeneratorTest(NO_NORMALIZATION);
    TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
    for (TRECTarget target : targets) {
        String question = target.getTargetDesc();
        // query generation
        MsgPrinter.printGeneratingQueries();
        String qn = QuestionNormalizer.normalize(question);
        // print normalized question string
        MsgPrinter.printNormalization(qn);
        // log normalized question string
        Logger.logNormalization(qn);
        String[] kws = KeywordExtractor.getKeywords(qn);
        AnalyzedQuestion aq = new AnalyzedQuestion(question);
        aq.setKeywords(kws);
        aq.setFactoid(false);
        Query[] queries = new BagOfWordsG().generateQueries(aq);
        for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
        Result[] results = new Result[1];
        results[0] = new Result("This would be the answer", queries[0]);
        wtif.apply(results);
    }
}
Also used : Query(info.ephyra.querygeneration.Query) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result) TRECTarget(info.ephyra.trec.TRECTarget) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion)

Example 2 with TRECTarget

use of info.ephyra.trec.TRECTarget in project lucida by claritylab.

the class WikipediaTermImportanceFilter method main.

public static void main(String[] args) {
    TEST_TERM_DOWMLOD = true;
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);
    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
        MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();
    //		// create sentence detector
    //		MsgPrinter.printStatusMsg("Creating sentence detector...");
    //		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create sentence detector.");
    //		LingPipe.createSentenceDetector();
    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();
    //		// create part of speech tagger
    //		MsgPrinter.printStatusMsg("Creating POS tagger...");
    //		if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
    //									 "res/nlp/postagger/opennlp/tagdict"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
    //				"train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
    //		// create chunker
    //		MsgPrinter.printStatusMsg("Creating chunker...");
    //		if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" +
    //								   "EnglishChunk.bin.gz"))
    //			MsgPrinter.printErrorMsg("Could not create chunker.");
    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers("res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //		if (!NETagger.loadNameFinders("res/nlp/netagger/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    //		if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
    //			MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");
    WikipediaTermImportanceFilter wtif = new WikipediaTermImportanceFilter(NO_NORMALIZATION, NO_NORMALIZATION, false);
    TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
    for (TRECTarget target : targets) {
        String question = target.getTargetDesc();
        // query generation
        MsgPrinter.printGeneratingQueries();
        String qn = QuestionNormalizer.normalize(question);
        // print normalized question string
        MsgPrinter.printNormalization(qn);
        // log normalized question string
        Logger.logNormalization(qn);
        String[] kws = KeywordExtractor.getKeywords(qn);
        AnalyzedQuestion aq = new AnalyzedQuestion(question);
        aq.setKeywords(kws);
        aq.setFactoid(false);
        Query[] queries = new BagOfWordsG().generateQueries(aq);
        for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
        Result[] results = new Result[1];
        results[0] = new Result("This would be the answer", queries[0]);
        wtif.apply(results);
    }
}
Also used : Query(info.ephyra.querygeneration.Query) TRECTarget(info.ephyra.trec.TRECTarget) AnalyzedQuestion(info.ephyra.questionanalysis.AnalyzedQuestion) BagOfWordsG(info.ephyra.querygeneration.generators.BagOfWordsG) Result(info.ephyra.search.Result)

Aggregations

Query (info.ephyra.querygeneration.Query)2 BagOfWordsG (info.ephyra.querygeneration.generators.BagOfWordsG)2 AnalyzedQuestion (info.ephyra.questionanalysis.AnalyzedQuestion)2 Result (info.ephyra.search.Result)2 TRECTarget (info.ephyra.trec.TRECTarget)2