Search in sources :

Example 1 with IndexWord

use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.

the class EnglishFeatureExtractor method addSyntacticFeatures.

private static void addSyntacticFeatures(MutableInstance instance, List<Term> terms, String parseTree, Term focusTerm) {
    if (parseTree == null) {
        log.error("Syntactic parse of the question is null.");
        return;
    }
    Tree tree = TreeHelper.buildTree(parseTree, Tree.ENGLISH);
    // MAIN_VERB
    TreeHelper.markHeadNode(tree);
    String mainVerb = tree.getHeadWord();
    //mainVerb = WordnetInterface.getLemma("VERB",mainVerb);
    try {
        IndexWord word = Dictionary.getInstance().lookupIndexWord(POS.VERB, mainVerb);
        String lemma = null;
        if (word != null)
            lemma = word.getLemma();
        if (lemma != null)
            mainVerb = lemma;
    } catch (Exception e) {
        log.warn("Failed to get lemma for verb '" + mainVerb + "'", e);
    }
    if (mainVerb == null)
        mainVerb = "-";
    instance.addBinary(new Feature("MAIN_VERB" + "." + mainVerb));
    // WH_DET
    if (focusTerm != null && focusTerm.getText() != null) {
        String focus = focusTerm.getText();
        String question = "";
        for (Term term : terms) question += term.getText() + " ";
        question = question.trim();
        for (String ptrn : whPtrns) {
            Matcher m = Pattern.compile(ptrn + SPACE_PTRN + focus + REST_PTRN).matcher(question);
            if (m.matches()) {
                instance.addBinary(new Feature("WH_DET" + ".+"));
                break;
            }
        }
    }
    // FOCUS_ADJ
    Tree focusNode = TreeHelper.findFirstPreterminalWithPrecedingPreterminal(tree, "RB|JJ", "WRB");
    if (focusNode != null)
        instance.addBinary(new Feature("FOCUS_ADJ" + "." + focusNode.getHeadWord()));
}
Also used : Matcher(java.util.regex.Matcher) Tree(edu.cmu.lti.chineseNLP.util.Tree) Term(edu.cmu.lti.javelin.qa.Term) IndexWord(net.didion.jwnl.data.IndexWord) Feature(edu.cmu.minorthird.classify.Feature)

Example 2 with IndexWord

use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.

the class WordNet method isCompoundWord.

/**
	 * Checks if the word exists in WordNet. Supports multi-token terms.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is in WordNet
	 */
public static boolean isCompoundWord(String word) {
    if (dict == null)
        return false;
    // do not look up words with special characters other than '.'
    if (word.matches(".*?[^\\w\\s\\.].*+"))
        return false;
    IndexWordSet indexWordSet = null;
    try {
        indexWordSet = dict.lookupAllIndexWords(word);
    } catch (JWNLException e) {
    }
    // ensure that the word, and not just a substring, was found in WordNet
    int wordTokens = word.split("\\s", -1).length;
    int wordDots = word.split("\\.", -1).length;
    for (IndexWord indexWord : indexWordSet.getIndexWordArray()) {
        String lemma = indexWord.getLemma();
        int lemmaTokens = lemma.split("\\s", -1).length;
        int lemmaDots = lemma.split("\\.", -1).length;
        if (wordTokens == lemmaTokens && wordDots == lemmaDots)
            return true;
    }
    return false;
}
Also used : IndexWordSet(net.didion.jwnl.data.IndexWordSet) JWNLException(net.didion.jwnl.JWNLException) IndexWord(net.didion.jwnl.data.IndexWord)

Example 3 with IndexWord

use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.

the class WordNet method getCommonSynset.

/**
	 * Looks up the most common synset of a word.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return synset or <code>null</code> if lookup failed
	 */
private static Synset getCommonSynset(String word, POS pos) {
    if (dict == null)
        return null;
    Synset synset = null;
    try {
        IndexWord indexWord = dict.lookupIndexWord(pos, word);
        if (indexWord == null)
            return null;
        synset = indexWord.getSense(1);
    } catch (JWNLException e) {
    }
    return synset;
}
Also used : Synset(net.didion.jwnl.data.Synset) JWNLException(net.didion.jwnl.JWNLException) IndexWord(net.didion.jwnl.data.IndexWord)

Example 4 with IndexWord

use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.

the class WordNet method isCompoundNoun.

/**
	 * Checks if the word exists as a noun. Supports multi-token terms.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is a noun
	 */
public static boolean isCompoundNoun(String word) {
    if (dict == null)
        return false;
    // do not look up words with special characters other than '.'
    if (word.matches(".*?[^\\w\\s\\.].*+"))
        return false;
    IndexWord indexWord = null;
    try {
        indexWord = dict.lookupIndexWord(POS.NOUN, word);
    } catch (JWNLException e) {
    }
    if (indexWord == null)
        return false;
    // ensure that the word, and not just a substring, was found in WordNet
    int wordTokens = word.split("\\s", -1).length;
    int wordDots = word.split("\\.", -1).length;
    String lemma = indexWord.getLemma();
    int lemmaTokens = lemma.split("\\s", -1).length;
    int lemmaDots = lemma.split("\\.", -1).length;
    return wordTokens == lemmaTokens && wordDots == lemmaDots;
}
Also used : JWNLException(net.didion.jwnl.JWNLException) IndexWord(net.didion.jwnl.data.IndexWord)

Example 5 with IndexWord

use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.

the class WordNet method getLemma.

/**
	 * Looks up the lemma of a word.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return lemma or <code>null</code> if lookup failed
	 */
public static String getLemma(String word, POS pos) {
    if (dict == null)
        return null;
    IndexWord indexWord = null;
    try {
        indexWord = dict.lookupIndexWord(pos, word);
    } catch (JWNLException e) {
    }
    if (indexWord == null)
        return null;
    String lemma = indexWord.getLemma();
    lemma = lemma.replace("_", " ");
    return lemma;
}
Also used : JWNLException(net.didion.jwnl.JWNLException) IndexWord(net.didion.jwnl.data.IndexWord)

Aggregations

IndexWord (net.didion.jwnl.data.IndexWord)7 JWNLException (net.didion.jwnl.JWNLException)4 Tree (edu.cmu.lti.chineseNLP.util.Tree)2 ArrayList (java.util.ArrayList)2 Matcher (java.util.regex.Matcher)2 Synset (net.didion.jwnl.data.Synset)2 Term (edu.cmu.lti.javelin.qa.Term)1 Feature (edu.cmu.minorthird.classify.Feature)1 IndexWordSet (net.didion.jwnl.data.IndexWordSet)1