use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class WordNetFeatureExtractor method getWordFeatures.
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int tokenPosition) throws EdisonException {
String token = ta.getToken(tokenPosition).toLowerCase().trim();
String pos = WordHelpers.getPOS(ta, tokenPosition);
POS wnPOS = WordNetHelper.getWNPOS(pos);
if (wnPOS == null) {
return new LinkedHashSet<>();
}
try {
IndexWord iw = wnManager.getIndexWord(wnPOS, token);
Set<String> feats = new LinkedHashSet<>();
if (this.featureClasses.contains(WordNetFeatureClass.existsEntry)) {
if (iw != null) {
feats.add("exists");
if (POSUtils.isPOSNoun(pos))
feats.add("nn+exists");
else if (POSUtils.isPOSVerb(pos))
feats.add("vb+exists");
else if (POSUtils.isPOSAdjective(pos))
feats.add("adj+exists");
else if (POSUtils.isPOSAdverb(pos))
feats.add("adv+exists");
}
}
if (iw == null)
return FeatureUtilities.getFeatures(feats);
if (featureClasses.contains(WordNetFeatureClass.lemma))
feats.add("lemma:" + iw.getLemma());
boolean first = true;
for (Synset synset : iw.getSenses()) {
if (first) {
first = false;
addSynsetFeature(feats, synset, WordNetFeatureClass.synsetsFirstSense, "syns1:");
addLexFileNameFeature(feats, synset, WordNetFeatureClass.lexicographerFileNamesFirstSense, "lex-file1:");
addVerbFrameFeature(feats, synset, WordNetFeatureClass.verbFramesFirstSense, "verb-frame1:");
addSynonymFeature(feats, synset, WordNetFeatureClass.synonymsFirstSense, "syn1:");
addRelatedWordsFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymsFirstSense, "hyp1:");
addRelatedWordsFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsFirstSense, "part-holo1:");
addRelatedWordsFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsFirstSense, "subs-holo1:");
addRelatedWordsFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsFirstSense, "mem-holo1:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymFirstSenseLexicographerFileNames, "hyp1-lex-file:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsFirstSenseLexicographerFileNames, "part-holo1-lex-file:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsFirstSenseLexicographerFileNames, "subst-holo1-lex-file:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsFirstSenseLexicographerFileNames, "mem-holo1-lex-file:");
addPointerFeature(feats, synset, WordNetFeatureClass.pointersFirstSense, "ptrs1:");
}
addSynsetFeature(feats, synset, WordNetFeatureClass.synsetsAllSenses, "syns:");
addLexFileNameFeature(feats, synset, WordNetFeatureClass.lexicographerFileNamesAllSenses, "lex-file:");
addVerbFrameFeature(feats, synset, WordNetFeatureClass.verbFramesAllSenses, "vb-frame:");
addSynonymFeature(feats, synset, WordNetFeatureClass.synonymsAllSenses, "syn:");
addRelatedWordsFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymsAllSenses, "hyp:");
addRelatedWordsFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsAllSenses, "part-holo:");
addRelatedWordsFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsAllSenses, "subst-holo:");
addRelatedWordsFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsAllSenses, "mem-holo:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymAllSensesLexicographerFileNames, "hyp-lex-file:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsAllSensesLexicographerFileNames, "part-holo-lex-file:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsAllSensesLexicographerFileNames, "subst-holo-lex-file:");
addRelatedWordsLexFileFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsAllSensesLexicographerFileNames, "mem-holo-lex-file:");
addPointerFeature(feats, synset, WordNetFeatureClass.pointersAllSenses, "ptrs:");
}
return FeatureUtilities.getFeatures(feats);
} catch (Exception ex) {
throw new EdisonException("Error accessing WordNet: " + ex.getMessage());
}
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class BrownClusterFeatureExtractor method getWordFeatures.
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
lazyLoadClusters(brownClustersFile);
if (!ta.hasView(viewGenerator.getViewName())) {
synchronized (BrownClusterFeatureExtractor.class) {
View view = null;
try {
view = viewGenerator.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
throw new EdisonException(e.getMessage());
}
ta.addView(viewGenerator.getViewName(), view);
}
}
SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
String word = ta.getToken(wordPosition);
// What follows has a subtle bug: view.getLabel only gets the first
// label for the word. A word can have multiple brown clusters though!
// This has been fixed below.
// String cluster = view.getLabel(wordPosition);
//
// return getBrownClusters(word, cluster);
Set<Feature> features = new LinkedHashSet<>();
for (Constituent c : view.getConstituentsCoveringToken(wordPosition)) {
String cluster = c.getLabel();
features.addAll(getBrownClusters(word, cluster));
}
return features;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class CorelexFeatureExtractor method loadDataFromClassPath.
private static synchronized void loadDataFromClassPath() throws EdisonException {
if (data.size() > 0)
return;
List<String> lines;
try {
lines = LineIO.readFromClasspath(CORLEX_FILE);
logger.info("\n");
} catch (FileNotFoundException e) {
throw new EdisonException("CORLEX not found in class path at " + CORLEX_FILE);
}
log.info("Loading CORLEX from {}", CORLEX_FILE);
for (String line : lines) {
if (line.length() == 0)
continue;
if (line.startsWith("#"))
continue;
String[] parts = line.split("\t");
if (parts.length == 2) {
String lemma = parts[0].trim();
String type = parts[1].trim().intern();
data.put(lemma, type);
}
}
log.info("Finished loading CORLEX. Found {} nouns", data.size());
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class ParsePhraseTypeOnly method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView tree = (TreeView) ta.getView(parseViewname);
Constituent phrase;
try {
phrase = tree.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
if (phrase != null)
features.add(DiscreteFeature.create(phrase.getLabel()));
return features;
}
use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.
the class RogetThesaurusFeatures method loadFromClassPath.
// old way: loading the resourcres from classpath
private synchronized void loadFromClassPath() throws Exception {
if (loaded)
return;
List<URL> urls = IOUtils.lsResources(RogetThesaurusFeatures.class, fileName);
if (urls.size() == 0)
throw new EdisonException("Cannot find " + fileName + " in the classpath");
loadWithURL(urls.get(0));
}
Aggregations