Search in sources :

Example 11 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class LabeledDepFeatureGenerator method ChunkConj.

private Set<Feature> ChunkConj(int head, int dep, DepInst sent, String deprel) {
    String header = "POSChunk: ";
    String chunkhead = sent.strChunk[head] + " ";
    String poshead = sent.strPos[head] + " ";
    String chunkdep = sent.strChunk[dep] + " ";
    String posdep = sent.strPos[dep] + " ";
    String arcdir = "Arc-dir: " + (head < dep) + " ";
    String arclength = "Arc-length " + (head - dep) + " ";
    Set<Feature> feats = new HashSet<>();
    feats.add(new DiscreteFeature(header + chunkhead + chunkdep + arcdir + deprel));
    feats.add(new DiscreteFeature(header + chunkhead + chunkdep + arclength + deprel));
    feats.add(new DiscreteFeature(header + chunkhead + poshead + chunkdep + posdep + arcdir + deprel));
    feats.add(new DiscreteFeature(header + chunkhead + poshead + chunkdep + posdep + arclength + deprel));
    return feats;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) HashSet(java.util.HashSet)

Example 12 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class Affixes method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier = "Affixes";
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);
    Set<Feature> result = new LinkedHashSet<Feature>();
    String id;
    String value;
    String word = c.getSurfaceForm();
    for (int i = 3; i <= 4; ++i) {
        if (word.length() > i) {
            id = "p|";
            value = "" + (word.substring(0, i));
            result.add(new DiscreteFeature(classifier + ":" + id + "(" + value + ")"));
        }
    }
    for (int i = 1; i <= 4; ++i) {
        if (word.length() > i) {
            id = "s|";
            value = "" + (word.substring(word.length() - i));
            result.add(new DiscreteFeature(classifier + ":" + id + "(" + value + ")"));
        }
    }
    return result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature)

Example 13 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class POSWindow method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier;
    String prefix = "POSWindow";
    TextAnnotation ta = c.getTextAnnotation();
    int lenOfTokens = ta.getTokens().length;
    int start = c.getStartSpan();
    int end = c.getEndSpan();
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        int curr = i, last = i;
        // for (; last <= i+2 && last < lenOfTokens; ++last)
        for (int j = 0; j < 2 && curr > 0; j++) curr -= 1;
        for (int j = 0; j < 2 && last < lenOfTokens - 1; j++) last += 1;
        if (!isPOSFromCounting) {
            classifier = prefix + "_" + "POS";
            for (; curr <= last; curr++) {
                TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
                String form = ta.getToken(curr);
                String tag = POSView.getLabel(curr);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            }
        } else if (isBaseLineCounting) {
            classifier = prefix + "_" + "BaselinePOS";
            for (; curr <= last; curr++) {
                String form = ta.getToken(curr);
                String tag = counter.tag(curr, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            }
        } else {
            classifier = prefix + "_" + "MikheevPOS";
            for (; curr <= last; curr++) {
                String form = ta.getToken(curr);
                String tag = counter.tag(curr, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            }
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 14 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class POSMikheevFeatureExtractor method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier = "MikheevPOS";
    TextAnnotation ta = c.getTextAnnotation();
    int start = c.getStartSpan();
    int end = c.getEndSpan();
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        String form = ta.getToken(i);
        String tag = counter.tag(i, ta);
        features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
    }
    return features;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 15 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class WordTypeInformation method getFeatures.

@Override
public /**
 * This feature extractor assumes that the TOKEN View has been generated in the Constituents TextAnnotation.
 * It generate a feature for a window [-2, +2] of Forms (original text) for each constituent.
 */
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    int k = 2;
    // All our constituents are words(tokens)
    String[] forms = getwindowkfrom(TOKENS, startspan, endspan, 2);
    String __id, __value;
    String classifier = "WordTypeInformation";
    Set<Feature> __result = new LinkedHashSet<Feature>();
    for (int i = 0; i < forms.length; i++) {
        if (forms[i] != null) {
            boolean allCapitalized = true, allDigits = true, allNonLetters = true;
            for (int j = 0; j < forms[i].length(); ++j) {
                allCapitalized &= Character.isUpperCase(forms[i].charAt(j));
                allDigits &= Character.isDigit(forms[i].charAt(j));
                allNonLetters &= !Character.isLetter(forms[i].charAt(j));
            }
            __id = classifier + ":" + ("c" + i);
            __value = "(" + (allCapitalized) + ")";
            __result.add(new DiscreteFeature(__id + __value));
            __id = classifier + ":" + ("d" + i);
            __value = "(" + (allDigits) + ")";
            __result.add(new DiscreteFeature(__id + __value));
            __id = classifier + ":" + ("c" + i);
            __value = "(" + (allNonLetters) + ")";
            __result.add(new DiscreteFeature(__id + __value));
        }
    }
    return __result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature)

Aggregations

DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)19 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)19 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)15 LinkedHashSet (java.util.LinkedHashSet)7 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)6 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)5 HashSet (java.util.HashSet)4 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)3 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)3 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)2 IOException (java.io.IOException)2