use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.
the class EnglishFeatureExtractor method addSyntacticFeatures.
private static void addSyntacticFeatures(MutableInstance instance, List<Term> terms, String parseTree, Term focusTerm) {
if (parseTree == null) {
log.error("Syntactic parse of the question is null.");
return;
}
Tree tree = TreeHelper.buildTree(parseTree, Tree.ENGLISH);
// MAIN_VERB
TreeHelper.markHeadNode(tree);
String mainVerb = tree.getHeadWord();
//mainVerb = WordnetInterface.getLemma("VERB",mainVerb);
try {
IndexWord word = Dictionary.getInstance().lookupIndexWord(POS.VERB, mainVerb);
String lemma = null;
if (word != null)
lemma = word.getLemma();
if (lemma != null)
mainVerb = lemma;
} catch (Exception e) {
log.warn("Failed to get lemma for verb '" + mainVerb + "'", e);
}
if (mainVerb == null)
mainVerb = "-";
instance.addBinary(new Feature("MAIN_VERB" + "." + mainVerb));
// WH_DET
if (focusTerm != null && focusTerm.getText() != null) {
String focus = focusTerm.getText();
String question = "";
for (Term term : terms) question += term.getText() + " ";
question = question.trim();
for (String ptrn : whPtrns) {
Matcher m = Pattern.compile(ptrn + SPACE_PTRN + focus + REST_PTRN).matcher(question);
if (m.matches()) {
instance.addBinary(new Feature("WH_DET" + ".+"));
break;
}
}
}
// FOCUS_ADJ
Tree focusNode = TreeHelper.findFirstPreterminalWithPrecedingPreterminal(tree, "RB|JJ", "WRB");
if (focusNode != null)
instance.addBinary(new Feature("FOCUS_ADJ" + "." + focusNode.getHeadWord()));
}
use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.
the class WordNet method isCompoundWord.
/**
* Checks if the word exists in WordNet. Supports multi-token terms.
*
* @param word a word
* @return <code>true</code> iff the word is in WordNet
*/
public static boolean isCompoundWord(String word) {
if (dict == null)
return false;
// do not look up words with special characters other than '.'
if (word.matches(".*?[^\\w\\s\\.].*+"))
return false;
IndexWordSet indexWordSet = null;
try {
indexWordSet = dict.lookupAllIndexWords(word);
} catch (JWNLException e) {
}
// ensure that the word, and not just a substring, was found in WordNet
int wordTokens = word.split("\\s", -1).length;
int wordDots = word.split("\\.", -1).length;
for (IndexWord indexWord : indexWordSet.getIndexWordArray()) {
String lemma = indexWord.getLemma();
int lemmaTokens = lemma.split("\\s", -1).length;
int lemmaDots = lemma.split("\\.", -1).length;
if (wordTokens == lemmaTokens && wordDots == lemmaDots)
return true;
}
return false;
}
use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.
the class WordNet method getCommonSynset.
/**
* Looks up the most common synset of a word.
*
* @param word a word
* @param pos its part of speech
* @return synset or <code>null</code> if lookup failed
*/
private static Synset getCommonSynset(String word, POS pos) {
if (dict == null)
return null;
Synset synset = null;
try {
IndexWord indexWord = dict.lookupIndexWord(pos, word);
if (indexWord == null)
return null;
synset = indexWord.getSense(1);
} catch (JWNLException e) {
}
return synset;
}
use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.
the class WordNet method isCompoundNoun.
/**
* Checks if the word exists as a noun. Supports multi-token terms.
*
* @param word a word
* @return <code>true</code> iff the word is a noun
*/
public static boolean isCompoundNoun(String word) {
if (dict == null)
return false;
// do not look up words with special characters other than '.'
if (word.matches(".*?[^\\w\\s\\.].*+"))
return false;
IndexWord indexWord = null;
try {
indexWord = dict.lookupIndexWord(POS.NOUN, word);
} catch (JWNLException e) {
}
if (indexWord == null)
return false;
// ensure that the word, and not just a substring, was found in WordNet
int wordTokens = word.split("\\s", -1).length;
int wordDots = word.split("\\.", -1).length;
String lemma = indexWord.getLemma();
int lemmaTokens = lemma.split("\\s", -1).length;
int lemmaDots = lemma.split("\\.", -1).length;
return wordTokens == lemmaTokens && wordDots == lemmaDots;
}
use of net.didion.jwnl.data.IndexWord in project lucida by claritylab.
the class WordNet method getLemma.
/**
* Looks up the lemma of a word.
*
* @param word a word
* @param pos its part of speech
* @return lemma or <code>null</code> if lookup failed
*/
public static String getLemma(String word, POS pos) {
if (dict == null)
return null;
IndexWord indexWord = null;
try {
indexWord = dict.lookupIndexWord(pos, word);
} catch (JWNLException e) {
}
if (indexWord == null)
return null;
String lemma = indexWord.getLemma();
lemma = lemma.replace("_", " ");
return lemma;
}
Aggregations