use of net.didion.jwnl.data.IndexWordSet in project lucida by claritylab.
the class WordNet method isCompoundWord.
/**
* Checks if the word exists in WordNet. Supports multi-token terms.
*
* @param word a word
* @return <code>true</code> iff the word is in WordNet
*/
public static boolean isCompoundWord(String word) {
if (dict == null)
return false;
// do not look up words with special characters other than '.'
if (word.matches(".*?[^\\w\\s\\.].*+"))
return false;
IndexWordSet indexWordSet = null;
try {
indexWordSet = dict.lookupAllIndexWords(word);
} catch (JWNLException e) {
}
// ensure that the word, and not just a substring, was found in WordNet
int wordTokens = word.split("\\s", -1).length;
int wordDots = word.split("\\.", -1).length;
for (IndexWord indexWord : indexWordSet.getIndexWordArray()) {
String lemma = indexWord.getLemma();
int lemmaTokens = lemma.split("\\s", -1).length;
int lemmaDots = lemma.split("\\.", -1).length;
if (wordTokens == lemmaTokens && wordDots == lemmaDots)
return true;
}
return false;
}
Aggregations