use of info.ephyra.search.Result in project lucida by claritylab.
the class WebTermImportanceFilter method main.
public static void main(String[] args) {
TEST_TARGET_GENERATION = true;
MsgPrinter.enableStatusMsgs(true);
MsgPrinter.enableErrorMsgs(true);
// create tokenizer
MsgPrinter.printStatusMsg("Creating tokenizer...");
if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
MsgPrinter.printErrorMsg("Could not create tokenizer.");
// LingPipe.createTokenizer();
// create sentence detector
// MsgPrinter.printStatusMsg("Creating sentence detector...");
// if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
// MsgPrinter.printErrorMsg("Could not create sentence detector.");
// LingPipe.createSentenceDetector();
// create stemmer
MsgPrinter.printStatusMsg("Creating stemmer...");
SnowballStemmer.create();
// create part of speech tagger
MsgPrinter.printStatusMsg("Creating POS tagger...");
if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict"))
MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
// if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
// "train-wsj-0-18.holder"))
// MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
// create chunker
MsgPrinter.printStatusMsg("Creating chunker...");
if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" + "EnglishChunk.bin.gz"))
MsgPrinter.printErrorMsg("Could not create chunker.");
// create named entity taggers
MsgPrinter.printStatusMsg("Creating NE taggers...");
NETagger.loadListTaggers("res/nlp/netagger/lists/");
NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
MsgPrinter.printStatusMsg(" ...loading models");
// MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
MsgPrinter.printStatusMsg(" ...done");
WebTermImportanceFilter wtif = new TargetGeneratorTest(NO_NORMALIZATION);
TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
for (TRECTarget target : targets) {
String question = target.getTargetDesc();
// query generation
MsgPrinter.printGeneratingQueries();
String qn = QuestionNormalizer.normalize(question);
// print normalized question string
MsgPrinter.printNormalization(qn);
// log normalized question string
Logger.logNormalization(qn);
String[] kws = KeywordExtractor.getKeywords(qn);
AnalyzedQuestion aq = new AnalyzedQuestion(question);
aq.setKeywords(kws);
aq.setFactoid(false);
Query[] queries = new BagOfWordsG().generateQueries(aq);
for (int q = 0; q < queries.length; q++) queries[q].setOriginalQueryString(question);
Result[] results = new Result[1];
results[0] = new Result("This would be the answer", queries[0]);
wtif.apply(results);
}
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class ProperNameFilter method apply.
/**
* Filter out result snippets that contain too many proper names. This is to
* get rid of enumerations of named entities that happen to include the
* target. This might, for instance, be the track list of a compilation LP,
* which has a song by the target artist on it.
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
// raw results returned by the searchers
ArrayList<Result> rawResults = new ArrayList<Result>();
for (Result r : results) {
if (r.getScore() != Float.NEGATIVE_INFINITY) {
String text = r.getAnswer();
// tokenize and tag sentence
String[] sentence = NETagger.tokenize(text);
int upperCase = 0;
int lowerCase = 0;
// scan sentence for tokens in upper case
for (int i = 1; i < sentence.length; i++) {
String term = sentence[i];
if (term.matches("[A-Z]++.*+")) {
upperCase++;
//sentence.length;
if (FunctionWords.lookup(term.toLowerCase()))
upperCase += 2;
} else if (term.matches("[a-z]++.*+"))
lowerCase++;
else if (term.matches("[0-9]++"))
lowerCase++;
}
if (upperCase < lowerCase)
rawResults.add(r);
// else System.out.println("ProperNameFilter: " + text);
}
}
return rawResults.toArray(new Result[rawResults.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class ResultLengthComparator method compare.
/**
* Compares its two arguments for order. Returns a negative integer, zero,
* or a positive integer as the first argument is less than, equal to, or
* greater than the second.
*
* @param o1 the first object to be compared
* @param o2 the second object to be compared
* @return a negative integer, zero, or a positive integer as the first
* argument is less than, equal to, or greater than the second
*/
public int compare(Object o1, Object o2) {
if (!(o1 instanceof Result) || !(o2 instanceof Result))
throw new ClassCastException();
Result r1 = (Result) o1;
Result r2 = (Result) o2;
return r1.getAnswer().length() - r2.getAnswer().length();
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class ScoreCombinationFilter method apply.
/**
* Filters an array of <code>Result</code> objects.
*
* @param results results to filter
* @return filtered results
*/
public Result[] apply(Result[] results) {
// all results that pass the filter
List<Result> filtered = new ArrayList<Result>();
// sort results by their scores in descending order
results = (new ScoreSorterFilter()).apply(results);
// separate factoid answers by extractors
List<Result> factoids = new ArrayList<Result>();
Hashtable<String, Hashtable<String, Result>> allExtractors = new Hashtable<String, Hashtable<String, Result>>();
for (Result result : results) {
// only merge factoid answers
if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY) {
filtered.add(result);
continue;
}
// make sure that answers come from a single extractor
String[] extractors = result.getExtractionTechniques();
if (extractors == null || extractors.length != 1) {
filtered.add(result);
continue;
}
String extractor = extractors[0];
factoids.add(result);
Hashtable<String, Result> sameExtractor = allExtractors.get(extractor);
if (sameExtractor == null) {
sameExtractor = new Hashtable<String, Result>();
allExtractors.put(extractor, sameExtractor);
}
String norm = StringUtils.normalize(result.getAnswer());
sameExtractor.put(norm, result);
}
// merge answers from different extractors
String[] extractors = allExtractors.keySet().toArray(new String[allExtractors.size()]);
Set<String> covered = new HashSet<String>();
for (Result result : factoids) {
String norm = StringUtils.normalize(result.getAnswer());
if (!covered.add(norm))
continue;
// get all extractors for the result and the normalized scores
ArrayList<String> exs = new ArrayList<String>();
ArrayList<Float> scores = new ArrayList<Float>();
for (String extractor : extractors) {
Result r = allExtractors.get(extractor).get(norm);
if (r != null) {
exs.add(extractor);
scores.add(r.getNormScore());
}
}
// set extractors
result.setExtractionTechniques(exs.toArray(new String[exs.size()]));
// combine their normalized scores
float[] scoresA = new float[scores.size()];
for (int i = 0; i < scoresA.length; i++) scoresA[i] = scores.get(i);
int totalExtractors = extractors.length;
float combinedScore = // combANZ(scoresA, totalExtractors);
combMNZ(scoresA, totalExtractors);
// combCP(scoresA, totalExtractors);
result.setScore(combinedScore);
result.setNormScore(combinedScore);
filtered.add(result);
}
return filtered.toArray(new Result[filtered.size()]);
}
use of info.ephyra.search.Result in project lucida by claritylab.
the class ScoreNormalizationFilter method readSerializedResults.
/**
* Reads serialized results from a file.
*
* @param input input file
* @return result objects
*/
private static Result[] readSerializedResults(File input) {
ArrayList<Result> results = new ArrayList<Result>();
try {
FileInputStream fis = new FileInputStream(input);
ObjectInputStream ois = new ObjectInputStream(fis);
// then discard it
if (!(ois.readObject() instanceof AnalyzedQuestion)) {
MsgPrinter.printErrorMsg("First serialized object is not an" + "AnalyzedQuestion.");
System.exit(1);
}
try {
while (true) results.add((Result) ois.readObject());
} catch (EOFException e) {
/* end of file reached */
}
ois.close();
} catch (Exception e) {
MsgPrinter.printErrorMsg("Could not read serialized results:");
MsgPrinter.printErrorMsg(e.toString());
System.exit(1);
}
return results.toArray(new Result[results.size()]);
}
Aggregations