use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.
the class AnswerPatternFilter method assessPatterns.
/**
* Assesses the answer patterns by applying them to the answer string of the
* <code>Result</code> object and comparing the extracted answers to the
* regular expression <cdoe>regex</code>.
*
* @param result <code>Result</code> object
* @param regex regular expression that describes a correct answer
*/
public static void assessPatterns(Result result, String regex) {
// increment the number of passages used to assess the patterns
String prop = result.getQuery().getInterpretation().getProperty();
int n = (nOfPassages.get(prop) == null) ? 1 : nOfPassages.get(prop) + 1;
nOfPassages.put(prop, n);
// extract PROPERTY objects
extractPos(result);
// use the regular expression regex to assess the PROPERTY objects
for (int i = 0; i < extr.size(); i++) {
String po = extr.get(i);
AnswerPattern ap = aps.get(i);
if (po.matches("(?i)" + regex))
ap.incCorrect();
else
ap.incWrong();
}
}
use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.
the class AnswerPatternFilter method extractPos.
/**
* Applies the answer patterns to the answer string of the
* <code>Result</code> object to extract PROPERTY objects.
*
* @param result a <code>Result</code> object
*/
private static void extractPos(Result result) {
extr = new ArrayList<String>();
types = new ArrayList<String[]>();
sents = new ArrayList<String>();
aps = new ArrayList<AnswerPattern>();
// get interpretation and answer string
QuestionInterpretation qi = result.getQuery().getInterpretation();
String to = qi.getTarget();
// String[] cos = qi.getContext();
// CONTEXT objects are ignored
String[] cos = new String[0];
String prop = qi.getProperty();
String answer = result.getAnswer();
// get answer patterns
HashSet<AnswerPattern> patterns = props.get(prop);
if (patterns == null)
return;
// tokenize interpretation
to = NETagger.tokenizeWithSpaces(to);
for (int i = 0; i < cos.length; i++) cos[i] = NETagger.tokenizeWithSpaces(cos[i]);
// split answer string into sentences and tokenize sentences
String[] originalSentences = OpenNLP.sentDetect(answer);
String[][] tokens = new String[originalSentences.length][];
String[] sentences = new String[originalSentences.length];
for (int i = 0; i < originalSentences.length; i++) {
tokens[i] = NETagger.tokenize(originalSentences[i]);
sentences[i] = StringUtils.concatWithSpaces(tokens[i]);
}
/* PrintWriter pw = null;
try {
pw = new PrintWriter(new FileOutputStream(new File("netagger_data.txt"),true));
} catch (FileNotFoundException ex) {
System.out.println("File not found exception!!");
}*/
// extract named entities
String[][][] nes = NETagger.extractNes(tokens);
for (int i = 0; i < sentences.length; i++) {
// prepare sentence for answer extraction
sentences[i] = prepSentence(sentences[i], to, cos, nes[i]);
if (sentences[i] == null)
continue;
for (AnswerPattern pattern : patterns) {
// apply answer pattern
String[] pos = pattern.apply(sentences[i]);
// pw2.printf("%s ----- %s ----- %s\n", pattern.getDesc(), sentences[i], pos);
// get NE types of PROPERTY objects
String[][] neTypes = new String[pos.length][];
for (int j = 0; j < pos.length; j++) neTypes[j] = getNeTypes(pos[j], pattern);
// replace tags and untokenize PROPERTY objects
for (int j = 0; j < pos.length; j++) {
pos[j] = replaceTags(pos[j]);
pos[j] = OpenNLP.untokenize(pos[j], originalSentences[i]);
}
// from, the patterns used to extract them and the NE types
for (int j = 0; j < pos.length; j++) {
extr.add(pos[j]);
types.add(neTypes[j]);
sents.add(originalSentences[i]);
aps.add(pattern);
}
}
}
// pw.close();
// pw2.close();
}
use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.
the class AnswerPatternFilter method loadPatterns.
/**
* Loads the answer patterns from a directory of PROPERTY files. The first
* line of each file is the total number of passages used to assess the
* patterns. It is followed by a list of pattern descriptors, along with
* their number of correct and wrong applications. The format of the
* descriptors is described in the documentation of the class
* <code>AnswerPattern</code>.
*
* @param dir directory of the answer patterns
* @return true, iff the answer patterns were loaded successfully
*/
public static boolean loadPatterns(String dir) {
File[] files = FileUtils.getFiles(dir);
try {
BufferedReader in;
String prop, expr;
int passages, correct, wrong;
HashSet<AnswerPattern> patterns;
for (File file : files) {
MsgPrinter.printStatusMsg(" ...for " + file.getName());
prop = file.getName();
in = new BufferedReader(new FileReader(file));
// total number of passages used to assess the patterns
passages = Integer.parseInt(in.readLine().split(" ")[1]);
nOfPassages.put(prop, passages);
patterns = new HashSet<AnswerPattern>();
while (in.ready()) {
in.readLine();
// pattern descriptor
expr = in.readLine();
// number of correct applications
correct = Integer.parseInt(in.readLine().split(" ")[1]);
// number of wrong applications
wrong = Integer.parseInt(in.readLine().split(" ")[1]);
try {
patterns.add(new AnswerPattern(expr, prop, correct, wrong));
} catch (PatternSyntaxException pse) {
MsgPrinter.printErrorMsg("Problem loading pattern:\n" + prop + " " + expr);
MsgPrinter.printErrorMsg(pse.getMessage());
}
}
props.put(prop, patterns);
in.close();
}
MsgPrinter.printStatusMsg(" ...done");
} catch (IOException e) {
return false;
}
return true;
}
use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.
the class AnswerPatternFilter method savePatterns.
/**
* Saves the answer patterns to resource files. A separate file is created
* for each PROPERTY. The first line is the total number of passages used to
* assess the answer patterns. It is followed by a list of pattern
* descriptors along with their number of correct and wrong applications.
*
* @param dir directory of the answer patterns
* @return true, iff the answer patterns were saved successfully
*/
public static boolean savePatterns(String dir) {
File file;
PrintWriter out;
try {
for (String prop : props.keySet()) {
// sort answer patterns
HashSet<AnswerPattern> ps = props.get(prop);
AnswerPattern[] patterns = ps.toArray(new AnswerPattern[ps.size()]);
Arrays.sort(patterns);
file = new File(dir + "/" + prop);
out = new PrintWriter(file, "UTF-8");
// total number of passages used to assess the patterns
out.println("#passages: " + nOfPassages.get(prop));
for (int i = patterns.length - 1; i >= 0; i--) {
out.println();
// pattern descriptor
out.println(patterns[i].getDesc());
// number of correct applications
out.println("#correct: " + patterns[i].getCorrect());
// number of wrong applications
out.println("#incorrect: " + patterns[i].getWrong());
}
out.close();
}
} catch (IOException e) {
return false;
}
return true;
}
use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.
the class AnswerPatternFilter method dropLowConfidence.
/**
* Drops answer patterns that have a confidence of
* <code>confidenceThresh</code> or less.
*
* @param confidenceThresh the confidence threshold
*/
public static void dropLowConfidence(float confidenceThresh) {
// for each PROPERTY
for (String prop : props.keySet()) {
HashSet<AnswerPattern> patterns = props.get(prop);
HashSet<AnswerPattern> remaining = new HashSet<AnswerPattern>();
// check the threshold for each answer pattern
for (AnswerPattern pattern : patterns) if (pattern.getConfidence() >= confidenceThresh)
remaining.add(pattern);
props.put(prop, remaining);
}
}
Aggregations