Search in sources :

Example 1 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class LabelTwoAfter method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier;
    String prefix = "LabelTwoAfter";
    TextAnnotation ta = c.getTextAnnotation();
    int lenOfTokens = ta.getTokens().length;
    int start = c.getStartSpan() + 2;
    int end = c.getEndSpan() + 2;
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        if (!isPOSFromCounting) {
            classifier = prefix + "_" + "POS";
            if (i < lenOfTokens) {
                TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
                String form = ta.getToken(i);
                String tag = POSView.getLabel(i);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        } else if (isBaseLineCounting) {
            classifier = prefix + "_" + "BaselinePOS";
            if (i < lenOfTokens) {
                String form = ta.getToken(i);
                String tag = counter.tag(i, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        } else {
            classifier = prefix + "_" + "MikheevPOS";
            if (i < lenOfTokens) {
                String form = ta.getToken(i);
                String tag = counter.tag(i, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 2 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class MixedChunkWindowTwoBeforePOSWindowThreeBefore method getFeatures.

@Override
public /**
 * This feature extractor assumes that the TOKEN View, POS View and the SHALLOW_PARSE View have been
 * generated in the Constituents TextAnnotation. It will use its own POS tag and well as the POS tag
 * and the SHALLOW_PARSE (Chunk) labels of the previous two tokens and return it as a discrete feature.
 */
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    try {
        TOKENS = ta.getView(ViewNames.TOKENS);
        POS = ta.getView(ViewNames.POS);
        SHALLOW_PARSE = ta.getView(ViewNames.SHALLOW_PARSE);
    } catch (Exception e) {
        e.printStackTrace();
    }
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // All our constituents are words(tokens)
    // words two before
    int k = -2;
    List<Constituent> wordstwobefore = getwordskfrom(TOKENS, startspan, endspan, k);
    if (wordstwobefore.size() != 2)
        return null;
    String[] tags = new String[3];
    String[] labels = new String[2];
    int i = 0;
    for (Constituent token : wordstwobefore) {
        // Should only be one POS tag for each token
        List<String> POS_tag = POS.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
        List<String> Chunk_label = SHALLOW_PARSE.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
        if (POS_tag.size() != 1 || Chunk_label.size() != 1) {
            logger.warn("Error token has more than one POS tag or Chunk Label.");
        }
        labels[i] = Chunk_label.get(0);
        tags[i] = POS_tag.get(0);
        i++;
    }
    tags[i] = POS.getLabelsCoveringSpan(startspan, endspan).get(0);
    Set<Feature> __result = new LinkedHashSet<Feature>();
    String classifier = "MixedChunkWindowTwoBeforePOSWindowThreeBefore";
    String __id = classifier + ":" + "ll";
    String __value = "(" + (labels[0] + "_" + labels[1]) + ")";
    /*
         * BufferedWriter output = null; try { File file = new
         * File("/home/pvijaya2/feat-output.txt");
         * 
         * if(!file.exists()){ file.createNewFile(); }
         * 
         * FileWriter fw = new FileWriter(file,true);
         * 
         * //BufferedWriter writer give better performance BufferedWriter bw = new
         * BufferedWriter(fw);
         */
    logger.info(__id + __value);
    __result.add(new DiscreteFeature(__id + __value));
    __id = classifier + ":" + "lt1";
    __value = "(" + (labels[0] + "_" + tags[1]) + ")";
    logger.info(__id + __value);
    __result.add(new DiscreteFeature(__id + __value));
    __id = classifier + ":" + "lt2";
    __value = "" + (labels[1] + "_" + tags[2]);
    logger.info(__id + __value);
    __result.add(new DiscreteFeature(__id + __value));
    return __result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature) IOException(java.io.IOException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 3 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class LabelTwoBefore method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier;
    String prefix = "LabelTwoBefore";
    TextAnnotation ta = c.getTextAnnotation();
    int start = c.getStartSpan() - 2;
    int end = c.getEndSpan() - 2;
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        if (!isPOSFromCounting) {
            classifier = prefix + "_" + "POS";
            if (i >= 0) {
                TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
                String form = ta.getToken(i);
                String tag = POSView.getLabel(i);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        } else if (isBaseLineCounting) {
            classifier = prefix + "_" + "BaselinePOS";
            if (i >= 0) {
                String form = ta.getToken(i);
                String tag = counter.tag(i, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        } else {
            classifier = prefix + "_" + "MikheevPOS";
            if (i >= 0) {
                String form = ta.getToken(i);
                String tag = counter.tag(i, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 4 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class POSBaseLineFeatureExtractor method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier = "BaseLinePOS";
    TextAnnotation ta = c.getTextAnnotation();
    int start = c.getStartSpan();
    int end = c.getEndSpan();
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        String form = ta.getToken(i);
        String tag = counter.tag(i, ta);
        features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
    }
    return features;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 5 with DiscreteFeature

use of edu.illinois.cs.cogcomp.edison.features.DiscreteFeature in project cogcomp-nlp by CogComp.

the class POSWindowTwo method getFeatures.

@Override
public /**
 * This feature extractor assumes that the TOKEN View, POS View have been
 * generated in the Constituents TextAnnotation. It will use its own POS tag of the
 * two context words before and after the constituent.
 */
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    View TOKENS = null, POS = null;
    try {
        TOKENS = ta.getView(ViewNames.TOKENS);
        POS = ta.getView(ViewNames.POS);
    } catch (Exception e) {
        e.printStackTrace();
    }
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // All our constituents are words(tokens)
    // words two before & after
    int k = 2;
    String[] tags = getwindowtagskfrom(TOKENS, POS, startspan, endspan, k);
    String classifier = "POSWindowTwo";
    String __id, __value;
    Set<Feature> __result = new LinkedHashSet<Feature>();
    for (int i = 0; i < tags.length; i++) {
        if (tags[i] == null) {
            continue;
        } else {
            __id = classifier + ":" + i;
            __value = "(" + tags[i] + ")";
            logger.info(__id + __value);
            __result.add(new DiscreteFeature(__id + __value));
        }
    }
    return __result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature) IOException(java.io.IOException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Aggregations

DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)19 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)19 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)15 LinkedHashSet (java.util.LinkedHashSet)7 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)6 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)5 HashSet (java.util.HashSet)4 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)3 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)3 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)2 IOException (java.io.IOException)2