use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ContextFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - contextSize;
int end = c.getEndSpan() + contextSize;
if (start < 0)
start = 0;
if (end >= ta.size())
end = ta.size();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (ignoreConstituent)
if (c.getStartSpan() <= i && i < c.getEndSpan())
continue;
for (FeatureExtractor f : this.generators) {
Constituent neighbor = new Constituent("TMP", "TMP", ta, i, i + 1);
Set<Feature> feats = f.getFeatures(neighbor);
for (Feature feat : feats) {
String preamble = "context";
if (specifyIndex) {
String index = "*";
if (i < c.getStartSpan())
index = (i - c.getStartSpan()) + "";
else if (i >= c.getEndSpan())
index = (i - c.getEndSpan() + 1) + "";
preamble += index;
}
preamble += ":";
features.add(feat.prefixWith(preamble + f.getName()));
}
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class CurrencyIndicator method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
try {
if (!loaded)
synchronized (this) {
// now its changed to be loaded from datastore.
if (!loaded)
loadCurrency(gzip, true);
}
} catch (Exception ex) {
throw new EdisonException(ex);
}
TextAnnotation ta = c.getTextAnnotation();
if (!ta.hasView(VIEW_NAME)) {
try {
addCurrencyView(ta);
} catch (Exception e) {
e.printStackTrace();
}
}
SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
Set<Feature> features = new LinkedHashSet<>();
for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
if (cc.getEndSpan() == c.getEndSpan()) {
if (cc.getStartSpan() - 1 > c.getEndSpan()) {
// check if this is a number
if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
features.add(CURRENCY);
break;
}
}
} else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
features.add(CURRENCY);
break;
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class TestBrownClusterFeatureExtractor method test.
@Test
public final void test() {
int[] prefixLengths = new int[] { 4, 6, 10, 20 };
BrownClusterFeatureExtractor bcfex1 = BrownClusterFeatureExtractor.instance1000;
BrownClusterFeatureExtractor bcfex2 = null;
try {
bcfex2 = new BrownClusterFeatureExtractor("bllip", "brownBllipClusters", prefixLengths);
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
BrownClusterFeatureExtractor bcfex3 = null;
try {
bcfex3 = new BrownClusterFeatureExtractor("wiki", "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt", prefixLengths);
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
TokenizerTextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBldr.createTextAnnotation("test", "test", "This test sentence has Joynt and Lieberknecht and Fibonnaci in it " + "just to exercise possible brown cluster hits in resources used by NER.");
Set<Feature> feats = new HashSet<>();
for (int wordIndex = 0; wordIndex < ta.size(); ++wordIndex) try {
feats.addAll(bcfex1.getWordFeatures(ta, wordIndex));
feats.addAll(bcfex2.getWordFeatures(ta, wordIndex));
feats.addAll(bcfex3.getWordFeatures(ta, wordIndex));
} catch (EdisonException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(ViewNames.BROWN_CLUSTERS + "_wiki"));
String[] featArray = new String[feats.size()];
int i = 0;
for (Feature f : feats) featArray[i++] = f.toString();
Arrays.sort(featArray);
String actualOutput = StringUtils.join(",", featArray);
assertEquals(expectedOutput, actualOutput);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class TestSrlNomIdentifier method test.
/**
* Only in and out relations in the SRL_VERB view are used for the purpose of testing.
*/
public final void test() throws Exception {
logger.info("Nom_Identifier Feature Extractor");
String[] viewsToAdd = { ViewNames.POS, ViewNames.LEMMA, ViewNames.SHALLOW_PARSE, ViewNames.PARSE_GOLD, ViewNames.SRL_VERB, ViewNames.PARSE_STANFORD, ViewNames.NER_CONLL };
TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(viewsToAdd, true, 3);
ta.addView(ClauseViewGenerator.STANFORD);
ta.addView(PseudoParse.STANFORD);
logger.info("This textannotation annotates the text: \n" + ta.getText());
View SRL_VERB = ta.getView("SRL_VERB");
List<Constituent> testlist = SRL_VERB.getConstituentsCoveringSpan(10, 13);
testlist.addAll(SRL_VERB.getConstituentsCoveringSpan(26, 27));
FeatureManifest featureManifest;
FeatureExtractor fex;
String fileName = Constant.prefix + "/Nom/Identifier/nom-identifier.fex";
featureManifest = new FeatureManifest(new FileInputStream(fileName));
FeatureManifest.setFeatureExtractor("hyphen-argument-feature", FeatureGenerators.hyphenTagFeature);
FeatureManifest.setTransformer("parse-left-sibling", FeatureGenerators.getParseLeftSibling(ViewNames.PARSE_STANFORD));
FeatureManifest.setTransformer("parse-right-sibling", FeatureGenerators.getParseRightSibling(ViewNames.PARSE_STANFORD));
FeatureManifest.setFeatureExtractor("pp-features", FeatureGenerators.ppFeatures(ViewNames.PARSE_STANFORD));
FeatureManifest.setFeatureExtractor("projected-path", new ProjectedPath(ViewNames.PARSE_STANFORD));
featureManifest.useCompressedName();
featureManifest.setVariable("*default-parser*", ViewNames.PARSE_STANFORD);
fex = featureManifest.createFex();
SrlNomIdentifier ni = new SrlNomIdentifier();
for (Constituent test : testlist) {
assertTrue(SRLFeaturesComparator.isEqual(test, fex, ni));
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class AnnotatorLazyInitTest method testLazy.
@Test
public void testLazy() {
SimpleGazetteerAnnotator sga = null;
Properties props = new Properties();
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "6");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.TRUE);
try {
sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
} catch (IOException | URISyntaxException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertFalse(sga.isInitialized());
assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
TextAnnotation ta = tab.createTextAnnotation("The CIA has no London headquarters, though General Electric does.");
try {
sga.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(sga.getViewName()));
assertTrue(sga.isInitialized());
assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
assertTrue(ta.hasView(sga.getViewName()));
}
Aggregations