Search in sources :

Example 1 with Term

use of edu.cmu.lti.javelin.qa.Term in project lucida by claritylab.

the class EnglishFeatureExtractor method addSyntacticFeatures.

private static void addSyntacticFeatures(MutableInstance instance, List<Term> terms, String parseTree, Term focusTerm) {
    if (parseTree == null) {
        log.error("Syntactic parse of the question is null.");
        return;
    }
    Tree tree = TreeHelper.buildTree(parseTree, Tree.ENGLISH);
    // MAIN_VERB
    TreeHelper.markHeadNode(tree);
    String mainVerb = tree.getHeadWord();
    //mainVerb = WordnetInterface.getLemma("VERB",mainVerb);
    try {
        IndexWord word = Dictionary.getInstance().lookupIndexWord(POS.VERB, mainVerb);
        String lemma = null;
        if (word != null)
            lemma = word.getLemma();
        if (lemma != null)
            mainVerb = lemma;
    } catch (Exception e) {
        log.warn("Failed to get lemma for verb '" + mainVerb + "'", e);
    }
    if (mainVerb == null)
        mainVerb = "-";
    instance.addBinary(new Feature("MAIN_VERB" + "." + mainVerb));
    // WH_DET
    if (focusTerm != null && focusTerm.getText() != null) {
        String focus = focusTerm.getText();
        String question = "";
        for (Term term : terms) question += term.getText() + " ";
        question = question.trim();
        for (String ptrn : whPtrns) {
            Matcher m = Pattern.compile(ptrn + SPACE_PTRN + focus + REST_PTRN).matcher(question);
            if (m.matches()) {
                instance.addBinary(new Feature("WH_DET" + ".+"));
                break;
            }
        }
    }
    // FOCUS_ADJ
    Tree focusNode = TreeHelper.findFirstPreterminalWithPrecedingPreterminal(tree, "RB|JJ", "WRB");
    if (focusNode != null)
        instance.addBinary(new Feature("FOCUS_ADJ" + "." + focusNode.getHeadWord()));
}
Also used : Matcher(java.util.regex.Matcher) Tree(edu.cmu.lti.chineseNLP.util.Tree) Term(edu.cmu.lti.javelin.qa.Term) IndexWord(net.didion.jwnl.data.IndexWord) Feature(edu.cmu.minorthird.classify.Feature)

Example 2 with Term

use of edu.cmu.lti.javelin.qa.Term in project lucida by claritylab.

the class EnglishFeatureExtractor method createInstance.

/**
     * Creates and populates an Instance from a QuestionAnalysis object.  All
     * features are binary features of one of the following types:</p>
     * 
     * Word-level features:
     * <ul>
     *   <li>UNIGRAM : individual words in the question
     *   <li>BIGRAM : pairs of adjacent words in the question
     *   <li>WH_WORD : the wh-word in the question if one exists
     * </ul>
     * 
     * Syntactic features:
     * <ul>
     *   <li>MAIN_VERB: the syntactic head of the sentence, as defined in 
     *   {@link edu.cmu.lti.chineseNLP.util.TreeHelper TreeHelper}
     *   <li>FOCUS_ADJ : the adjective following a wh-word (e.g. 'long' in 'How long is it?') 
     *   <li>WH_DET : whether or not the wh-word is the determiner of a noun phrase, as in 'which printer'
     * </ul>
     * 
     * Semantic features:
     * <ul>
     *   <li>FOCUS_TYPE : the semantic type of the focus word, 
     * </ul>
     * 
     * @throws Exception
     */
public Instance createInstance(List<Term> terms, String parseTree) {
    String question = "";
    for (Term term : terms) question += term + " ";
    question = question.trim();
    MutableInstance instance = new MutableInstance(question);
    // find the focus word
    log.debug("Parse: " + parseTree);
    Tree tree = TreeHelper.buildTree(parseTree, Tree.ENGLISH);
    Term focus = FocusFinder.findFocusTerm(tree);
    if (focus != null)
        log.debug("Focus: " + focus.getText());
    addWordLevelFeatures(instance, terms, focus);
    addSyntacticFeatures(instance, terms, parseTree, focus);
    addSemanticFeatures(instance, focus);
    return instance;
}
Also used : MutableInstance(edu.cmu.minorthird.classify.MutableInstance) Tree(edu.cmu.lti.chineseNLP.util.Tree) Term(edu.cmu.lti.javelin.qa.Term)

Example 3 with Term

use of edu.cmu.lti.javelin.qa.Term in project lucida by claritylab.

the class EnglishFeatureExtractor method addWordLevelFeatures.

private static void addWordLevelFeatures(MutableInstance instance, List<Term> terms, Term focus) {
    String[] words = new String[terms.size()];
    for (int i = 0; i < terms.size(); i++) {
        Term term = terms.get(i);
        if (term.getText() != null)
            words[i] = term.getText().replaceAll("\\s+", "_");
        else
            words[i] = "-";
    }
    // UNIGRAM
    for (int i = 0; i < words.length; i++) {
        instance.addBinary(new Feature("UNIGRAM" + "." + words[i]));
    }
    // BIGRAM
    for (int i = 0; i < words.length - 1; i++) {
        instance.addBinary(new Feature("BIGRAM" + "." + words[i] + "-" + words[i + 1]));
    }
    // WH_WORD
    String question = "";
    for (Term term : terms) question += term.getText() + " ";
    question = question.trim();
    String whWord = null;
    // first look at sentence beginning
    for (String ptrn : whPtrns) {
        Matcher m = Pattern.compile("^" + ptrn + REST_PTRN).matcher(question);
        if (m.matches()) {
            whWord = m.group(1).toLowerCase().replaceAll("\\s+", "_");
            instance.addBinary(new Feature("WH_WORD" + "." + whWord));
            break;
        }
    }
    if (whWord == null) {
        // then look anywhere in the sentence
        for (String ptrn : whPtrns) {
            Matcher m = Pattern.compile(ptrn + REST_PTRN).matcher(question);
            if (m.find()) {
                whWord = m.group(1).toLowerCase().replaceAll("\\s+", "_");
                instance.addBinary(new Feature("WH_WORD" + "." + whWord));
                break;
            }
        }
    }
    // OF_HEAD
    if (focus == null)
        return;
    for (String word : OF_HEAD_WORDS) {
        Matcher m = Pattern.compile(word + "s? of " + focus.getText()).matcher(question);
        if (m.find()) {
            instance.addBinary(new Feature("OF_HEAD" + "." + word));
            break;
        }
    }
}
Also used : Matcher(java.util.regex.Matcher) Term(edu.cmu.lti.javelin.qa.Term) Feature(edu.cmu.minorthird.classify.Feature)

Example 4 with Term

use of edu.cmu.lti.javelin.qa.Term in project lucida by claritylab.

the class FeatureExtractor method createInstance.

/**
     * Convenience method that tokenizes the given question by whitespace, creates
     * Terms, and calls {@link #createInstance(List, String)}.
     * 
     * @param question the question to create an Instance from
     * @param parseTree the syntactic parse tree of the question
     */
public Instance createInstance(String question, String parseTree) {
    String[] tokens = question.split("\\s+");
    List<Term> terms = new ArrayList<Term>();
    for (String token : tokens) {
        terms.add(new Term(0, 0, token));
    }
    return createInstance(terms, parseTree);
}
Also used : ArrayList(java.util.ArrayList) Term(edu.cmu.lti.javelin.qa.Term)

Example 5 with Term

use of edu.cmu.lti.javelin.qa.Term in project lucida by claritylab.

the class FocusFinder method findFocusTerm.

/**
     * Given a list of Terms, builds a parse tree using Charniak's parser, and 
     * then uses the resulting parse tree to find the focus words.
     * 
     * @param terms The list of Terms in the question.
     * @return the focus word as a Term or null, if one does not exist
     */
public static Term findFocusTerm(List<Term> terms) {
    try {
        String question = "";
        for (Term term : terms) {
            question += term + " ";
        }
        Tree t = findFocusNode(TreeHelper.buildTree(StanfordParser.parse(question), Tree.ENGLISH));
        if (t != null) {
            Term res = new Term(0, 0, TreeHelper.getLeaves(t));
            res.setPOS(t.getLabel());
            return res;
        }
        return null;
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}
Also used : Tree(edu.cmu.lti.chineseNLP.util.Tree) Term(edu.cmu.lti.javelin.qa.Term)

Aggregations

Term (edu.cmu.lti.javelin.qa.Term)11 Tree (edu.cmu.lti.chineseNLP.util.Tree)6 MutableInstance (edu.cmu.minorthird.classify.MutableInstance)3 ArrayList (java.util.ArrayList)3 Feature (edu.cmu.minorthird.classify.Feature)2 IOException (java.io.IOException)2 Matcher (java.util.regex.Matcher)2 Instance (edu.cmu.minorthird.classify.Instance)1 List (java.util.List)1 IndexWord (net.didion.jwnl.data.IndexWord)1