Search in sources :

Example 46 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ContextFeatureExtractor method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    int start = c.getStartSpan() - contextSize;
    int end = c.getEndSpan() + contextSize;
    if (start < 0)
        start = 0;
    if (end >= ta.size())
        end = ta.size();
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        if (ignoreConstituent)
            if (c.getStartSpan() <= i && i < c.getEndSpan())
                continue;
        for (FeatureExtractor f : this.generators) {
            Constituent neighbor = new Constituent("TMP", "TMP", ta, i, i + 1);
            Set<Feature> feats = f.getFeatures(neighbor);
            for (Feature feat : feats) {
                String preamble = "context";
                if (specifyIndex) {
                    String index = "*";
                    if (i < c.getStartSpan())
                        index = (i - c.getStartSpan()) + "";
                    else if (i >= c.getEndSpan())
                        index = (i - c.getEndSpan() + 1) + "";
                    preamble += index;
                }
                preamble += ":";
                features.add(feat.prefixWith(preamble + f.getName()));
            }
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 47 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class CurrencyIndicator method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    try {
        if (!loaded)
            synchronized (this) {
                // now its changed to be loaded from datastore.
                if (!loaded)
                    loadCurrency(gzip, true);
            }
    } catch (Exception ex) {
        throw new EdisonException(ex);
    }
    TextAnnotation ta = c.getTextAnnotation();
    if (!ta.hasView(VIEW_NAME)) {
        try {
            addCurrencyView(ta);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
    Set<Feature> features = new LinkedHashSet<>();
    for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
        if (cc.getEndSpan() == c.getEndSpan()) {
            if (cc.getStartSpan() - 1 > c.getEndSpan()) {
                // check if this is a number
                if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
                    features.add(CURRENCY);
                    break;
                }
            }
        } else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
            features.add(CURRENCY);
            break;
        }
    }
    return features;
}
Also used : EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) URISyntaxException(java.net.URISyntaxException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 48 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class TestBrownClusterFeatureExtractor method test.

@Test
public final void test() {
    int[] prefixLengths = new int[] { 4, 6, 10, 20 };
    BrownClusterFeatureExtractor bcfex1 = BrownClusterFeatureExtractor.instance1000;
    BrownClusterFeatureExtractor bcfex2 = null;
    try {
        bcfex2 = new BrownClusterFeatureExtractor("bllip", "brownBllipClusters", prefixLengths);
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    BrownClusterFeatureExtractor bcfex3 = null;
    try {
        bcfex3 = new BrownClusterFeatureExtractor("wiki", "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt", prefixLengths);
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    TokenizerTextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = taBldr.createTextAnnotation("test", "test", "This test sentence has Joynt and Lieberknecht and Fibonnaci in it " + "just to exercise possible brown cluster hits in resources used by NER.");
    Set<Feature> feats = new HashSet<>();
    for (int wordIndex = 0; wordIndex < ta.size(); ++wordIndex) try {
        feats.addAll(bcfex1.getWordFeatures(ta, wordIndex));
        feats.addAll(bcfex2.getWordFeatures(ta, wordIndex));
        feats.addAll(bcfex3.getWordFeatures(ta, wordIndex));
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(ta.hasView(ViewNames.BROWN_CLUSTERS + "_wiki"));
    String[] featArray = new String[feats.size()];
    int i = 0;
    for (Feature f : feats) featArray[i++] = f.toString();
    Arrays.sort(featArray);
    String actualOutput = StringUtils.join(",", featArray);
    assertEquals(expectedOutput, actualOutput);
}
Also used : TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) BrownClusterFeatureExtractor(edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 49 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class TestSrlNomIdentifier method test.

/**
     * Only in and out relations in the SRL_VERB view are used for the purpose of testing.
     */
public final void test() throws Exception {
    logger.info("Nom_Identifier Feature Extractor");
    String[] viewsToAdd = { ViewNames.POS, ViewNames.LEMMA, ViewNames.SHALLOW_PARSE, ViewNames.PARSE_GOLD, ViewNames.SRL_VERB, ViewNames.PARSE_STANFORD, ViewNames.NER_CONLL };
    TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(viewsToAdd, true, 3);
    ta.addView(ClauseViewGenerator.STANFORD);
    ta.addView(PseudoParse.STANFORD);
    logger.info("This textannotation annotates the text: \n" + ta.getText());
    View SRL_VERB = ta.getView("SRL_VERB");
    List<Constituent> testlist = SRL_VERB.getConstituentsCoveringSpan(10, 13);
    testlist.addAll(SRL_VERB.getConstituentsCoveringSpan(26, 27));
    FeatureManifest featureManifest;
    FeatureExtractor fex;
    String fileName = Constant.prefix + "/Nom/Identifier/nom-identifier.fex";
    featureManifest = new FeatureManifest(new FileInputStream(fileName));
    FeatureManifest.setFeatureExtractor("hyphen-argument-feature", FeatureGenerators.hyphenTagFeature);
    FeatureManifest.setTransformer("parse-left-sibling", FeatureGenerators.getParseLeftSibling(ViewNames.PARSE_STANFORD));
    FeatureManifest.setTransformer("parse-right-sibling", FeatureGenerators.getParseRightSibling(ViewNames.PARSE_STANFORD));
    FeatureManifest.setFeatureExtractor("pp-features", FeatureGenerators.ppFeatures(ViewNames.PARSE_STANFORD));
    FeatureManifest.setFeatureExtractor("projected-path", new ProjectedPath(ViewNames.PARSE_STANFORD));
    featureManifest.useCompressedName();
    featureManifest.setVariable("*default-parser*", ViewNames.PARSE_STANFORD);
    fex = featureManifest.createFex();
    SrlNomIdentifier ni = new SrlNomIdentifier();
    for (Constituent test : testlist) {
        assertTrue(SRLFeaturesComparator.isEqual(test, fex, ni));
    }
}
Also used : FeatureExtractor(edu.illinois.cs.cogcomp.edison.features.FeatureExtractor) FeatureManifest(edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest) ProjectedPath(edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) FileInputStream(java.io.FileInputStream)

Example 50 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class AnnotatorLazyInitTest method testLazy.

@Test
public void testLazy() {
    SimpleGazetteerAnnotator sga = null;
    Properties props = new Properties();
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "6");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.TRUE);
    try {
        sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
    } catch (IOException | URISyntaxException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertFalse(sga.isInitialized());
    assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
    assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
    TextAnnotation ta = tab.createTextAnnotation("The CIA has no London headquarters, though General Electric does.");
    try {
        sga.getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(ta.hasView(sga.getViewName()));
    assertTrue(sga.isInitialized());
    assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
    assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
    assertTrue(ta.hasView(sga.getViewName()));
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Properties(java.util.Properties) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Test(org.junit.Test)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)218 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)95 Test (org.junit.Test)65 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)49 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)29 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)25 ArrayList (java.util.ArrayList)23 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 LinkedHashSet (java.util.LinkedHashSet)21 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)16 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)16 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)16 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)14 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)12 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)11