Search in sources :

Example 36 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class WordNetFeatureExtractor method getWordFeatures.

@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int tokenPosition) throws EdisonException {
    String token = ta.getToken(tokenPosition).toLowerCase().trim();
    String pos = WordHelpers.getPOS(ta, tokenPosition);
    POS wnPOS = WordNetHelper.getWNPOS(pos);
    if (wnPOS == null) {
        return new LinkedHashSet<>();
    }
    try {
        IndexWord iw = wnManager.getIndexWord(wnPOS, token);
        Set<String> feats = new LinkedHashSet<>();
        if (this.featureClasses.contains(WordNetFeatureClass.existsEntry)) {
            if (iw != null) {
                feats.add("exists");
                if (POSUtils.isPOSNoun(pos))
                    feats.add("nn+exists");
                else if (POSUtils.isPOSVerb(pos))
                    feats.add("vb+exists");
                else if (POSUtils.isPOSAdjective(pos))
                    feats.add("adj+exists");
                else if (POSUtils.isPOSAdverb(pos))
                    feats.add("adv+exists");
            }
        }
        if (iw == null)
            return FeatureUtilities.getFeatures(feats);
        if (featureClasses.contains(WordNetFeatureClass.lemma))
            feats.add("lemma:" + iw.getLemma());
        boolean first = true;
        for (Synset synset : iw.getSenses()) {
            if (first) {
                first = false;
                addSynsetFeature(feats, synset, WordNetFeatureClass.synsetsFirstSense, "syns1:");
                addLexFileNameFeature(feats, synset, WordNetFeatureClass.lexicographerFileNamesFirstSense, "lex-file1:");
                addVerbFrameFeature(feats, synset, WordNetFeatureClass.verbFramesFirstSense, "verb-frame1:");
                addSynonymFeature(feats, synset, WordNetFeatureClass.synonymsFirstSense, "syn1:");
                addRelatedWordsFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymsFirstSense, "hyp1:");
                addRelatedWordsFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsFirstSense, "part-holo1:");
                addRelatedWordsFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsFirstSense, "subs-holo1:");
                addRelatedWordsFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsFirstSense, "mem-holo1:");
                addRelatedWordsLexFileFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymFirstSenseLexicographerFileNames, "hyp1-lex-file:");
                addRelatedWordsLexFileFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsFirstSenseLexicographerFileNames, "part-holo1-lex-file:");
                addRelatedWordsLexFileFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsFirstSenseLexicographerFileNames, "subst-holo1-lex-file:");
                addRelatedWordsLexFileFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsFirstSenseLexicographerFileNames, "mem-holo1-lex-file:");
                addPointerFeature(feats, synset, WordNetFeatureClass.pointersFirstSense, "ptrs1:");
            }
            addSynsetFeature(feats, synset, WordNetFeatureClass.synsetsAllSenses, "syns:");
            addLexFileNameFeature(feats, synset, WordNetFeatureClass.lexicographerFileNamesAllSenses, "lex-file:");
            addVerbFrameFeature(feats, synset, WordNetFeatureClass.verbFramesAllSenses, "vb-frame:");
            addSynonymFeature(feats, synset, WordNetFeatureClass.synonymsAllSenses, "syn:");
            addRelatedWordsFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymsAllSenses, "hyp:");
            addRelatedWordsFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsAllSenses, "part-holo:");
            addRelatedWordsFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsAllSenses, "subst-holo:");
            addRelatedWordsFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsAllSenses, "mem-holo:");
            addRelatedWordsLexFileFeatures(feats, synset, PointerType.HYPERNYM, WordNetFeatureClass.hypernymAllSensesLexicographerFileNames, "hyp-lex-file:");
            addRelatedWordsLexFileFeatures(feats, synset, PointerType.PART_HOLONYM, WordNetFeatureClass.partHolonymsAllSensesLexicographerFileNames, "part-holo-lex-file:");
            addRelatedWordsLexFileFeatures(feats, synset, PointerType.SUBSTANCE_HOLONYM, WordNetFeatureClass.substanceHolonymsAllSensesLexicographerFileNames, "subst-holo-lex-file:");
            addRelatedWordsLexFileFeatures(feats, synset, PointerType.MEMBER_HOLONYM, WordNetFeatureClass.memberHolonymsAllSensesLexicographerFileNames, "mem-holo-lex-file:");
            addPointerFeature(feats, synset, WordNetFeatureClass.pointersAllSenses, "ptrs:");
        }
        return FeatureUtilities.getFeatures(feats);
    } catch (Exception ex) {
        throw new EdisonException("Error accessing WordNet: " + ex.getMessage());
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) JWNLException(net.didion.jwnl.JWNLException) FileNotFoundException(java.io.FileNotFoundException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 37 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class BrownClusterFeatureExtractor method getWordFeatures.

@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
    lazyLoadClusters(brownClustersFile);
    if (!ta.hasView(viewGenerator.getViewName())) {
        synchronized (BrownClusterFeatureExtractor.class) {
            View view = null;
            try {
                view = viewGenerator.getView(ta);
            } catch (AnnotatorException e) {
                e.printStackTrace();
                throw new EdisonException(e.getMessage());
            }
            ta.addView(viewGenerator.getViewName(), view);
        }
    }
    SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    String word = ta.getToken(wordPosition);
    // What follows has a subtle bug: view.getLabel only gets the first
    // label for the word. A word can have multiple brown clusters though!
    // This has been fixed below.
    // String cluster = view.getLabel(wordPosition);
    //
    // return getBrownClusters(word, cluster);
    Set<Feature> features = new LinkedHashSet<>();
    for (Constituent c : view.getConstituentsCoveringToken(wordPosition)) {
        String cluster = c.getLabel();
        features.addAll(getBrownClusters(word, cluster));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 38 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class CorelexFeatureExtractor method loadDataFromClassPath.

private static synchronized void loadDataFromClassPath() throws EdisonException {
    if (data.size() > 0)
        return;
    List<String> lines;
    try {
        lines = LineIO.readFromClasspath(CORLEX_FILE);
        logger.info("\n");
    } catch (FileNotFoundException e) {
        throw new EdisonException("CORLEX not found in class path at " + CORLEX_FILE);
    }
    log.info("Loading CORLEX from {}", CORLEX_FILE);
    for (String line : lines) {
        if (line.length() == 0)
            continue;
        if (line.startsWith("#"))
            continue;
        String[] parts = line.split("\t");
        if (parts.length == 2) {
            String lemma = parts[0].trim();
            String type = parts[1].trim().intern();
            data.put(lemma, type);
        }
    }
    log.info("Finished loading CORLEX. Found {} nouns", data.size());
}
Also used : FileNotFoundException(java.io.FileNotFoundException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 39 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class ParsePhraseTypeOnly method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView tree = (TreeView) ta.getView(parseViewname);
    Constituent phrase;
    try {
        phrase = tree.getParsePhrase(c);
    } catch (Exception e) {
        throw new EdisonException(e);
    }
    Set<Feature> features = new LinkedHashSet<>();
    if (phrase != null)
        features.add(DiscreteFeature.create(phrase.getLabel()));
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 40 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class RogetThesaurusFeatures method loadFromClassPath.

// old way: loading the resourcres from classpath
private synchronized void loadFromClassPath() throws Exception {
    if (loaded)
        return;
    List<URL> urls = IOUtils.lsResources(RogetThesaurusFeatures.class, fileName);
    if (urls.size() == 0)
        throw new EdisonException("Cannot find " + fileName + " in the classpath");
    loadWithURL(urls.get(0));
}
Also used : EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) URL(java.net.URL)

Aggregations

EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)41 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)22 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)22 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)17 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)15 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)13 LinkedHashSet (java.util.LinkedHashSet)12 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)8 WordNetFeatureExtractor (edu.illinois.cs.cogcomp.edison.features.factory.WordNetFeatureExtractor)8 HashSet (java.util.HashSet)7 Test (org.junit.Test)6 Set (java.util.Set)5 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)4 ArrayList (java.util.ArrayList)4 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)3 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)1 BrownClusterFeatureExtractor (edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor)1