Search in sources :

Example 56 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class WordTypeInformation method getFeatures.

@Override
public /**
     * This feature extractor assumes that the TOKEN View has been generated in the Constituents TextAnnotation. 
     * It generate a feature for a window [-2, +2] of Forms (original text) for each constituent.
     *
     **/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    int k = 2;
    // All our constituents are words(tokens)
    String[] forms = getwindowkfrom(TOKENS, startspan, endspan, 2);
    String __id, __value;
    String classifier = "WordTypeInformation";
    Set<Feature> __result = new LinkedHashSet<Feature>();
    for (int i = 0; i < forms.length; i++) {
        if (forms[i] != null) {
            boolean allCapitalized = true, allDigits = true, allNonLetters = true;
            for (int j = 0; j < forms[i].length(); ++j) {
                allCapitalized &= Character.isUpperCase(forms[i].charAt(j));
                allDigits &= Character.isDigit(forms[i].charAt(j));
                allNonLetters &= !Character.isLetter(forms[i].charAt(j));
            }
            __id = classifier + ":" + ("c" + i);
            __value = "(" + (allCapitalized) + ")";
            __result.add(new DiscreteFeature(__id + __value));
            __id = classifier + ":" + ("d" + i);
            __value = "(" + (allDigits) + ")";
            __result.add(new DiscreteFeature(__id + __value));
            __id = classifier + ":" + ("c" + i);
            __value = "(" + (allNonLetters) + ")";
            __result.add(new DiscreteFeature(__id + __value));
        }
    }
    return __result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature)

Example 57 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class Affixes method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier = "Affixes";
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);
    Set<Feature> result = new LinkedHashSet<Feature>();
    String id;
    String value;
    String word = c.getSurfaceForm();
    for (int i = 3; i <= 4; ++i) {
        if (word.length() > i) {
            id = "p|";
            value = "" + (word.substring(0, i));
            result.add(new DiscreteFeature(classifier + ":" + id + "(" + value + ")"));
        }
    }
    for (int i = 1; i <= 4; ++i) {
        if (word.length() > i) {
            id = "s|";
            value = "" + (word.substring(word.length() - i));
            result.add(new DiscreteFeature(classifier + ":" + id + "(" + value + ")"));
        }
    }
    return result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature)

Example 58 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class ChunkWindowThreeBefore method getFeatures.

@Override
public /**
     * This feature extractor assumes that the TOKEN View and the SHALLOW_PARSE View have been
     * generated in the Constituents TextAnnotation. It will generate discrete features from
     * the chunk labels of the previous two tokens.
     *
     **/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier = "ChunkWindowThreeBefore";
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);
    SHALLOW_PARSE = ta.getView(ViewNames.SHALLOW_PARSE);
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // All our constituents are words(tokens)
    // two words before
    int k = -2;
    List<Constituent> wordstwobefore = getwordskfrom(TOKENS, startspan, endspan, k);
    String[] labels = new String[2];
    Set<Feature> result = new LinkedHashSet<Feature>();
    int i = 0;
    if (wordstwobefore.size() == 0) {
        return result;
    }
    for (Constituent token : wordstwobefore) {
        // Should only be one POS tag for each token
        List<String> Chunk_label = SHALLOW_PARSE.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
        if (Chunk_label.size() != 1) {
            logger.warn("Error token has more than one POS tag or Chunk Label.");
        }
        labels[i] = Chunk_label.get(0);
        String __value = "(" + labels[i] + ")";
        String __id = classifier + ":" + (i++);
        result.add(new DiscreteFeature(__id + __value));
    }
    return result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 59 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class PreExtractor method countFeatures.

/**
     * This is where actual feature extraction is taking place. The features are defined in the
     * <b>features.fex</b> file and are read by {@link FeatureExtractor}
     * 
     * @param x The predicate to extract features from.
     * @throws EdisonException
     */
public void countFeatures(SenseInstance x) throws EdisonException {
    ModelInfo modelInfo = manager.getModelInfo();
    Set<Feature> feats = modelInfo.fex.getFeatures(x.getConstituent());
    // This is the only place where a new feature can be added to the lexicon.
    List<Integer> ids = new ArrayList<>();
    List<Float> values = new ArrayList<>();
    synchronized (lexicon) {
        for (Feature f : feats) {
            if (addNewFeatures) {
                if (!lexicon.contains(f.getName())) {
                    lexicon.previewFeature(f.getName());
                }
            } else if (!lexicon.contains(f.getName())) {
                continue;
            }
            int featureId = lexicon.lookupId(f.getName());
            lexicon.countFeature(featureId);
            ids.add(featureId);
            values.add(f.getValue());
        }
    }
    x.cacheFeatureVector(new FeatureVector(ArrayUtilities.asIntArray(ids), ArrayUtilities.asFloatArray(values)));
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FeatureVector(edu.illinois.cs.cogcomp.sl.util.FeatureVector) ModelInfo(edu.illinois.cs.cogcomp.verbsense.core.ModelInfo) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 60 with Feature

use of edu.illinois.cs.cogcomp.edison.features.Feature in project cogcomp-nlp by CogComp.

the class WordBigrams method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
    Set<Feature> features = new LinkedHashSet<>();
    View tokens = instance.getTextAnnotation().getView(ViewNames.TOKENS);
    List<Constituent> list = tokens.getConstituentsCoveringSpan(instance.getStartSpan(), instance.getEndSpan());
    list.sort(TextAnnotationUtilities.constituentStartComparator);
    ITransformer<Constituent, String> surfaceFormTransformer = new ITransformer<Constituent, String>() {

        public String transform(Constituent input) {
            return input.getSurfaceForm();
        }
    };
    features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 1, surfaceFormTransformer));
    features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 2, surfaceFormTransformer));
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ITransformer(edu.illinois.cs.cogcomp.core.transformers.ITransformer) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Feature (edu.illinois.cs.cogcomp.edison.features.Feature)71 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)48 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)44 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)41 LinkedHashSet (java.util.LinkedHashSet)24 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)22 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)17 Test (org.junit.Test)13 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)12 HashSet (java.util.HashSet)11 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)10 ArrayList (java.util.ArrayList)9 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)8 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)8 Set (java.util.Set)6 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)5 POSBaseLineCounter (edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter)5 POSMikheevCounter (edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter)5 ModelInfo (edu.illinois.cs.cogcomp.verbsense.core.ModelInfo)3 List (java.util.List)3