use of edu.illinois.cs.cogcomp.edison.features.factory.BrownClusterFeatureExtractor in project cogcomp-nlp by CogComp.
the class TestBrownClusterFeatureExtractor method test.
@Test
public final void test() {
int[] prefixLengths = new int[] { 4, 6, 10, 20 };
BrownClusterFeatureExtractor bcfex1 = BrownClusterFeatureExtractor.instance1000;
BrownClusterFeatureExtractor bcfex2 = null;
try {
bcfex2 = new BrownClusterFeatureExtractor("bllip", "brownBllipClusters", prefixLengths);
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
BrownClusterFeatureExtractor bcfex3 = null;
try {
bcfex3 = new BrownClusterFeatureExtractor("wiki", "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt", prefixLengths);
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
TokenizerTextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBldr.createTextAnnotation("test", "test", "This test sentence has Joynt and Lieberknecht and Fibonnaci in it " + "just to exercise possible brown cluster hits in resources used by NER.");
Set<Feature> feats = new HashSet<>();
for (int wordIndex = 0; wordIndex < ta.size(); ++wordIndex) try {
feats.addAll(bcfex1.getWordFeatures(ta, wordIndex));
feats.addAll(bcfex2.getWordFeatures(ta, wordIndex));
feats.addAll(bcfex3.getWordFeatures(ta, wordIndex));
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(ViewNames.BROWN_CLUSTERS + "_wiki"));
String[] featArray = new String[feats.size()];
int i = 0;
for (Feature f : feats) featArray[i++] = f.toString();
Arrays.sort(featArray);
String actualOutput = StringUtils.join(",", featArray);
assertEquals(expectedOutput, actualOutput);
}
Aggregations