Search in sources :

Example 11 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class ParseLabelIdentifier method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView parse = (TreeView) ta.getView(parseViewName);
    String l;
    try {
        l = parse.getParsePhrase(c).getLabel();
    } catch (Exception e) {
        throw new EdisonException(e);
    }
    boolean found = isLabelValid(l);
    Set<Feature> features = new LinkedHashSet<>();
    if (found) {
        features.add(DiscreteFeature.create(label));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 12 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class SyntacticFrame method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    Set<Feature> features = new LinkedHashSet<>();
    List<Relation> incomingRelations = c.getIncomingRelations();
    if (incomingRelations.size() > 0) {
        Constituent pred = incomingRelations.get(0).getSource();
        TextAnnotation ta = c.getTextAnnotation();
        TreeView parse = (TreeView) ta.getView(parseViewName);
        Constituent predicate, arg;
        try {
            predicate = parse.getParsePhrase(pred);
            arg = parse.getParsePhrase(c);
        } catch (Exception e) {
            throw new EdisonException(e);
        }
        Constituent vp = TreeView.getParent(predicate);
        // go over VP's siblings before it
        StringBuffer sb1 = new StringBuffer();
        StringBuffer sb2 = new StringBuffer();
        StringBuffer sb3 = new StringBuffer();
        if (!TreeView.isRoot(vp)) {
            Constituent vpParent = TreeView.getParent(vp);
            for (int i = 0; i < vpParent.getOutgoingRelations().size(); i++) {
                Constituent target = vpParent.getOutgoingRelations().get(i).getTarget();
                if (target == vp)
                    break;
                addToFeature(target, arg, sb1, sb2, sb3);
            }
        }
        for (int i = 0; i < vp.getOutgoingRelations().size(); i++) {
            Constituent target = vp.getOutgoingRelations().get(i).getTarget();
            if (target.getSpan().equals(predicate.getSpan())) {
                sb1.append("v-");
                sb2.append("v-");
                sb3.append(WordHelpers.getLemma(ta, target.getStartSpan())).append("-");
            } else {
                addToFeature(target, arg, sb1, sb2, sb3);
            }
        }
        features.add(DiscreteFeature.create(sb1.toString()));
        features.add(DiscreteFeature.create("general:" + sb2.toString()));
        features.add(DiscreteFeature.create("lemma:" + sb3.toString()));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 13 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class MixedChunkWindowTwoBeforePOSWindowThreeBefore method getFeatures.

@Override
public /**
     * This feature extractor assumes that the TOKEN View, POS View and the SHALLOW_PARSE View have been
     * generated in the Constituents TextAnnotation. It will use its own POS tag and well as the POS tag
     * and the SHALLOW_PARSE (Chunk) labels of the previous two tokens and return it as a discrete feature. 
     *
     **/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    try {
        TOKENS = ta.getView(ViewNames.TOKENS);
        POS = ta.getView(ViewNames.POS);
        SHALLOW_PARSE = ta.getView(ViewNames.SHALLOW_PARSE);
    } catch (Exception e) {
        e.printStackTrace();
    }
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // All our constituents are words(tokens)
    // words two before
    int k = -2;
    List<Constituent> wordstwobefore = getwordskfrom(TOKENS, startspan, endspan, k);
    if (wordstwobefore.size() != 2)
        return null;
    String[] tags = new String[3];
    String[] labels = new String[2];
    int i = 0;
    for (Constituent token : wordstwobefore) {
        // Should only be one POS tag for each token
        List<String> POS_tag = POS.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
        List<String> Chunk_label = SHALLOW_PARSE.getLabelsCoveringSpan(token.getStartSpan(), token.getEndSpan());
        if (POS_tag.size() != 1 || Chunk_label.size() != 1) {
            logger.warn("Error token has more than one POS tag or Chunk Label.");
        }
        labels[i] = Chunk_label.get(0);
        tags[i] = POS_tag.get(0);
        i++;
    }
    tags[i] = POS.getLabelsCoveringSpan(startspan, endspan).get(0);
    Set<Feature> __result = new LinkedHashSet<Feature>();
    String classifier = "MixedChunkWindowTwoBeforePOSWindowThreeBefore";
    String __id = classifier + ":" + "ll";
    String __value = "(" + (labels[0] + "_" + labels[1]) + ")";
    /*
         * BufferedWriter output = null; try { File file = new
         * File("/home/pvijaya2/feat-output.txt");
         * 
         * if(!file.exists()){ file.createNewFile(); }
         * 
         * FileWriter fw = new FileWriter(file,true);
         * 
         * //BufferedWriter writer give better performance BufferedWriter bw = new
         * BufferedWriter(fw);
         */
    logger.info(__id + __value);
    __result.add(new DiscreteFeature(__id + __value));
    __id = classifier + ":" + "lt1";
    __value = "(" + (labels[0] + "_" + tags[1]) + ")";
    logger.info(__id + __value);
    __result.add(new DiscreteFeature(__id + __value));
    __id = classifier + ":" + "lt2";
    __value = "" + (labels[1] + "_" + tags[2]);
    logger.info(__id + __value);
    __result.add(new DiscreteFeature(__id + __value));
    return __result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature) IOException(java.io.IOException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 14 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class CurrencyIndicator method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    try {
        if (!loaded)
            synchronized (this) {
                // now its changed to be loaded from datastore.
                if (!loaded)
                    loadCurrency(gzip, true);
            }
    } catch (Exception ex) {
        throw new EdisonException(ex);
    }
    TextAnnotation ta = c.getTextAnnotation();
    if (!ta.hasView(VIEW_NAME)) {
        try {
            addCurrencyView(ta);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
    Set<Feature> features = new LinkedHashSet<>();
    for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
        if (cc.getEndSpan() == c.getEndSpan()) {
            if (cc.getStartSpan() - 1 > c.getEndSpan()) {
                // check if this is a number
                if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
                    features.add(CURRENCY);
                    break;
                }
            }
        } else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
            features.add(CURRENCY);
            break;
        }
    }
    return features;
}
Also used : EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) URISyntaxException(java.net.URISyntaxException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 15 with EdisonException

use of edu.illinois.cs.cogcomp.edison.utilities.EdisonException in project cogcomp-nlp by CogComp.

the class TestBrownClusterFeatureExtractor method test.

@Test
public final void test() {
    int[] prefixLengths = new int[] { 4, 6, 10, 20 };
    BrownClusterFeatureExtractor bcfex1 = BrownClusterFeatureExtractor.instance1000;
    BrownClusterFeatureExtractor bcfex2 = null;
    try {
        bcfex2 = new BrownClusterFeatureExtractor("bllip", "brownBllipClusters", prefixLengths);
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    BrownClusterFeatureExtractor bcfex3 = null;
    try {
        bcfex3 = new BrownClusterFeatureExtractor("wiki", "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt", prefixLengths);
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    TokenizerTextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = taBldr.createTextAnnotation("test", "test", "This test sentence has Joynt and Lieberknecht and Fibonnaci in it " + "just to exercise possible brown cluster hits in resources used by NER.");
    Set<Feature> feats = new HashSet<>();
    for (int wordIndex = 0; wordIndex < ta.size(); ++wordIndex) try {
        feats.addAll(bcfex1.getWordFeatures(ta, wordIndex));
        feats.addAll(bcfex2.getWordFeatures(ta, wordIndex));
        feats.addAll(bcfex3.getWordFeatures(ta, wordIndex));
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(ta.hasView(ViewNames.BROWN_CLUSTERS + "_wiki"));
    String[] featArray = new String[feats.size()];
    int i = 0;
    for (Feature f : feats) featArray[i++] = f.toString();
    Arrays.sort(featArray);
    String actualOutput = StringUtils.join(",", featArray);
    assertEquals(expectedOutput, actualOutput);
}
Also used : TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) BrownClusterFeatureExtractor(edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)41 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)22 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)22 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)17 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)15 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)13 LinkedHashSet (java.util.LinkedHashSet)12 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)8 WordNetFeatureExtractor (edu.illinois.cs.cogcomp.edison.features.factory.WordNetFeatureExtractor)8 HashSet (java.util.HashSet)7 Test (org.junit.Test)6 Set (java.util.Set)5 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)4 ArrayList (java.util.ArrayList)4 RealFeature (edu.illinois.cs.cogcomp.edison.features.RealFeature)3 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)1 BrownClusterFeatureExtractor (edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor)1