Search in sources :

Example 1 with AnswerPattern

use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.

the class AnswerPatternFilter method assessPatterns.

/**
	 * Assesses the answer patterns by applying them to the answer string of the
	 * <code>Result</code> object and comparing the extracted answers to the
	 * regular expression <cdoe>regex</code>.
	 * 
	 * @param result <code>Result</code> object
	 * @param regex regular expression that describes a correct answer
	 */
public static void assessPatterns(Result result, String regex) {
    // increment the number of passages used to assess the patterns
    String prop = result.getQuery().getInterpretation().getProperty();
    int n = (nOfPassages.get(prop) == null) ? 1 : nOfPassages.get(prop) + 1;
    nOfPassages.put(prop, n);
    // extract PROPERTY objects
    extractPos(result);
    // use the regular expression regex to assess the PROPERTY objects
    for (int i = 0; i < extr.size(); i++) {
        String po = extr.get(i);
        AnswerPattern ap = aps.get(i);
        if (po.matches("(?i)" + regex))
            ap.incCorrect();
        else
            ap.incWrong();
    }
}
Also used : AnswerPattern(info.ephyra.answerselection.AnswerPattern)

Example 2 with AnswerPattern

use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.

the class AnswerPatternFilter method extractPos.

/**
	 * Applies the answer patterns to the answer string of the
	 * <code>Result</code> object to extract PROPERTY objects.
	 * 
	 * @param result a <code>Result</code> object
	 */
private static void extractPos(Result result) {
    extr = new ArrayList<String>();
    types = new ArrayList<String[]>();
    sents = new ArrayList<String>();
    aps = new ArrayList<AnswerPattern>();
    // get interpretation and answer string
    QuestionInterpretation qi = result.getQuery().getInterpretation();
    String to = qi.getTarget();
    //		String[] cos = qi.getContext();
    // CONTEXT objects are ignored
    String[] cos = new String[0];
    String prop = qi.getProperty();
    String answer = result.getAnswer();
    // get answer patterns
    HashSet<AnswerPattern> patterns = props.get(prop);
    if (patterns == null)
        return;
    // tokenize interpretation
    to = NETagger.tokenizeWithSpaces(to);
    for (int i = 0; i < cos.length; i++) cos[i] = NETagger.tokenizeWithSpaces(cos[i]);
    // split answer string into sentences and tokenize sentences
    String[] originalSentences = OpenNLP.sentDetect(answer);
    String[][] tokens = new String[originalSentences.length][];
    String[] sentences = new String[originalSentences.length];
    for (int i = 0; i < originalSentences.length; i++) {
        tokens[i] = NETagger.tokenize(originalSentences[i]);
        sentences[i] = StringUtils.concatWithSpaces(tokens[i]);
    }
    /*   PrintWriter pw = null;   
                try {
                    pw = new PrintWriter(new FileOutputStream(new File("netagger_data.txt"),true));
                } catch (FileNotFoundException ex) {
                    System.out.println("File not found exception!!");
                }*/
    // extract named entities
    String[][][] nes = NETagger.extractNes(tokens);
    for (int i = 0; i < sentences.length; i++) {
        // prepare sentence for answer extraction
        sentences[i] = prepSentence(sentences[i], to, cos, nes[i]);
        if (sentences[i] == null)
            continue;
        for (AnswerPattern pattern : patterns) {
            // apply answer pattern
            String[] pos = pattern.apply(sentences[i]);
            //  pw2.printf("%s ----- %s ----- %s\n", pattern.getDesc(), sentences[i], pos);
            // get NE types of PROPERTY objects
            String[][] neTypes = new String[pos.length][];
            for (int j = 0; j < pos.length; j++) neTypes[j] = getNeTypes(pos[j], pattern);
            // replace tags and untokenize PROPERTY objects
            for (int j = 0; j < pos.length; j++) {
                pos[j] = replaceTags(pos[j]);
                pos[j] = OpenNLP.untokenize(pos[j], originalSentences[i]);
            }
            // from, the patterns used to extract them and the NE types
            for (int j = 0; j < pos.length; j++) {
                extr.add(pos[j]);
                types.add(neTypes[j]);
                sents.add(originalSentences[i]);
                aps.add(pattern);
            }
        }
    }
//   pw.close();
//     pw2.close();
}
Also used : AnswerPattern(info.ephyra.answerselection.AnswerPattern) QuestionInterpretation(info.ephyra.questionanalysis.QuestionInterpretation)

Example 3 with AnswerPattern

use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.

the class AnswerPatternFilter method loadPatterns.

/**
	 * Loads the answer patterns from a directory of PROPERTY files. The first
	 * line of each file is the total number of passages used to assess the
	 * patterns. It is followed by a list of pattern descriptors, along with
	 * their number of correct and wrong applications. The format of the
	 * descriptors is described in the documentation of the class
	 * <code>AnswerPattern</code>.
	 * 
	 * @param dir directory of the answer patterns
	 * @return true, iff the answer patterns were loaded successfully
	 */
public static boolean loadPatterns(String dir) {
    File[] files = FileUtils.getFiles(dir);
    try {
        BufferedReader in;
        String prop, expr;
        int passages, correct, wrong;
        HashSet<AnswerPattern> patterns;
        for (File file : files) {
            MsgPrinter.printStatusMsg("  ...for " + file.getName());
            prop = file.getName();
            in = new BufferedReader(new FileReader(file));
            // total number of passages used to assess the patterns
            passages = Integer.parseInt(in.readLine().split(" ")[1]);
            nOfPassages.put(prop, passages);
            patterns = new HashSet<AnswerPattern>();
            while (in.ready()) {
                in.readLine();
                // pattern descriptor
                expr = in.readLine();
                // number of correct applications
                correct = Integer.parseInt(in.readLine().split(" ")[1]);
                // number of wrong applications
                wrong = Integer.parseInt(in.readLine().split(" ")[1]);
                try {
                    patterns.add(new AnswerPattern(expr, prop, correct, wrong));
                } catch (PatternSyntaxException pse) {
                    MsgPrinter.printErrorMsg("Problem loading pattern:\n" + prop + " " + expr);
                    MsgPrinter.printErrorMsg(pse.getMessage());
                }
            }
            props.put(prop, patterns);
            in.close();
        }
        MsgPrinter.printStatusMsg("  ...done");
    } catch (IOException e) {
        return false;
    }
    return true;
}
Also used : AnswerPattern(info.ephyra.answerselection.AnswerPattern) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) IOException(java.io.IOException) File(java.io.File) PatternSyntaxException(java.util.regex.PatternSyntaxException)

Example 4 with AnswerPattern

use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.

the class AnswerPatternFilter method savePatterns.

/**
	 * Saves the answer patterns to resource files. A separate file is created
	 * for each PROPERTY. The first line is the total number of passages used to
	 * assess the answer patterns. It is followed by a list of pattern
	 * descriptors along with their number of correct and wrong applications.
	 * 
	 * @param dir directory of the answer patterns
	 * @return true, iff the answer patterns were saved successfully
	 */
public static boolean savePatterns(String dir) {
    File file;
    PrintWriter out;
    try {
        for (String prop : props.keySet()) {
            // sort answer patterns
            HashSet<AnswerPattern> ps = props.get(prop);
            AnswerPattern[] patterns = ps.toArray(new AnswerPattern[ps.size()]);
            Arrays.sort(patterns);
            file = new File(dir + "/" + prop);
            out = new PrintWriter(file, "UTF-8");
            // total number of passages used to assess the patterns
            out.println("#passages: " + nOfPassages.get(prop));
            for (int i = patterns.length - 1; i >= 0; i--) {
                out.println();
                // pattern descriptor
                out.println(patterns[i].getDesc());
                // number of correct applications
                out.println("#correct: " + patterns[i].getCorrect());
                // number of wrong applications
                out.println("#incorrect: " + patterns[i].getWrong());
            }
            out.close();
        }
    } catch (IOException e) {
        return false;
    }
    return true;
}
Also used : AnswerPattern(info.ephyra.answerselection.AnswerPattern) IOException(java.io.IOException) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 5 with AnswerPattern

use of info.ephyra.answerselection.AnswerPattern in project lucida by claritylab.

the class AnswerPatternFilter method dropLowConfidence.

/**
	 * Drops answer patterns that have a confidence of
	 * <code>confidenceThresh</code> or less.
	 * 
	 * @param confidenceThresh the confidence threshold
	 */
public static void dropLowConfidence(float confidenceThresh) {
    // for each PROPERTY
    for (String prop : props.keySet()) {
        HashSet<AnswerPattern> patterns = props.get(prop);
        HashSet<AnswerPattern> remaining = new HashSet<AnswerPattern>();
        // check the threshold for each answer pattern
        for (AnswerPattern pattern : patterns) if (pattern.getConfidence() >= confidenceThresh)
            remaining.add(pattern);
        props.put(prop, remaining);
    }
}
Also used : AnswerPattern(info.ephyra.answerselection.AnswerPattern) HashSet(java.util.HashSet)

Aggregations

AnswerPattern (info.ephyra.answerselection.AnswerPattern)7 File (java.io.File)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 QuestionInterpretation (info.ephyra.questionanalysis.QuestionInterpretation)1 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1 PrintWriter (java.io.PrintWriter)1 PatternSyntaxException (java.util.regex.PatternSyntaxException)1