Search in sources :

Example 1 with BrownClusterFeatureExtractor

use of edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor in project cogcomp-nlp by CogComp.

the class TestBrownClusterFeatureExtractor method test.

@Test
public final void test() {
    int[] prefixLengths = new int[] { 4, 6, 10, 20 };
    BrownClusterFeatureExtractor bcfex1 = BrownClusterFeatureExtractor.instance1000;
    BrownClusterFeatureExtractor bcfex2 = null;
    try {
        bcfex2 = new BrownClusterFeatureExtractor("bllip", "brownBllipClusters", prefixLengths);
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    BrownClusterFeatureExtractor bcfex3 = null;
    try {
        bcfex3 = new BrownClusterFeatureExtractor("wiki", "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt", prefixLengths);
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    TokenizerTextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = taBldr.createTextAnnotation("test", "test", "This test sentence has Joynt and Lieberknecht and Fibonnaci in it " + "just to exercise possible brown cluster hits in resources used by NER.");
    Set<Feature> feats = new HashSet<>();
    for (int wordIndex = 0; wordIndex < ta.size(); ++wordIndex) try {
        feats.addAll(bcfex1.getWordFeatures(ta, wordIndex));
        feats.addAll(bcfex2.getWordFeatures(ta, wordIndex));
        feats.addAll(bcfex3.getWordFeatures(ta, wordIndex));
    } catch (EdisonException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(ta.hasView(ViewNames.BROWN_CLUSTERS + "_wiki"));
    String[] featArray = new String[feats.size()];
    int i = 0;
    for (Feature f : feats) featArray[i++] = f.toString();
    Arrays.sort(featArray);
    String actualOutput = StringUtils.join(",", featArray);
    assertEquals(expectedOutput, actualOutput);
}
Also used : TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) BrownClusterFeatureExtractor(edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)1 BrownClusterFeatureExtractor (edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor)1 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)1 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)1 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)1 HashSet (java.util.HashSet)1 Test (org.junit.Test)1