use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class PosWordConjunctionSizeTwoWindowSizeTwo method getFeatures.
@Override
public /**
* This feature extractor assumes that the TOKEN View, POS View have been
* generated in the Constituents TextAnnotation. It will use its own POS tag and well
* as the form of the word as a forms of the words around the constitent a
*
**/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
View TOKENS = null, POS = null;
try {
TOKENS = ta.getView(ViewNames.TOKENS);
POS = ta.getView(ViewNames.POS);
} catch (Exception e) {
e.printStackTrace();
}
// We can assume that the constituent in this case is a Word(Token) described by the LBJ
// chunk definition
int startspan = c.getStartSpan();
int endspan = c.getEndSpan();
// All our constituents are words(tokens)
// words two before & after
int k = 2;
int window = 2;
String[] forms = getWindowK(TOKENS, startspan, endspan, k);
String[] tags = getWindowKTags(POS, startspan, endspan, k);
String classifier = "PosWordConjunctionSizeTwoWindowSizeTwo";
String id, value;
Set<Feature> result = new LinkedHashSet<>();
for (int j = 0; j < k; j++) {
for (int i = 0; i < tags.length; i++) {
StringBuilder f = new StringBuilder();
for (int context = 0; context <= j && i + context < tags.length; context++) {
if (context != 0) {
f.append("_");
}
f.append(tags[i + context]);
f.append("-");
f.append(forms[i + context]);
}
// 2 is the center object in the array so i should go from -2 to +2 (with 0 being
// the center)
// j is the size of the n-gram so it goes 1 to 2
id = classifier + ":" + ((i - window) + "_" + (j + 1));
value = "(" + (f.toString()) + ")";
result.add(new DiscreteFeature(id + value));
}
}
return result;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method testResults.
/**
* See if we get the right entities back. TODO: MS removed @Test annotation as this test
* currently fails, but benchmark performance is good
*/
public void testResults() {
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
for (Constituent c : view.getConstituents()) {
assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class NerOntonotesTest method testOntonotesNer.
@Test
public void testOntonotesNer() {
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
Properties props = new Properties();
NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(new ResourceManager(props), ViewNames.NER_ONTONOTES);
TextAnnotation taOnto = tab.createTextAnnotation("", "", TEST_INPUT);
try {
nerOntonotes.getView(taOnto);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
View v = taOnto.getView(nerOntonotes.getViewName());
assertEquals(v.getConstituents().size(), 4);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class TextAnnotationMapDBHandlerTest method updateTextAnnotation.
@Test
public void updateTextAnnotation() throws Exception {
TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(false, 2);
mapDBHandler.addTextAnnotation(testDataset, ta);
ta = mapDBHandler.getDataset(testDataset).next();
// Add a new view to the TextAnnotation
String viewName = "TEST_VIEW";
View dummyView = new View(viewName, "TEST", ta, 0.0);
ta.addView(viewName, dummyView);
assertTrue(ta.hasView(viewName));
// Update the DB
mapDBHandler.updateTextAnnotation(ta);
// Check if the update is present
ta = mapDBHandler.getDataset(testDataset).next();
assertTrue(ta.hasView(viewName));
// Revert the changes and check if it's updated
ta.removeView(viewName);
mapDBHandler.updateTextAnnotation(ta);
ta = mapDBHandler.getTextAnnotation(ta);
assertFalse(ta.hasView(viewName));
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class WordBigrams method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
Set<Feature> features = new LinkedHashSet<>();
View tokens = instance.getTextAnnotation().getView(ViewNames.TOKENS);
List<Constituent> list = tokens.getConstituentsCoveringSpan(instance.getStartSpan(), instance.getEndSpan());
list.sort(TextAnnotationUtilities.constituentStartComparator);
ITransformer<Constituent, String> surfaceFormTransformer = new ITransformer<Constituent, String>() {
public String transform(Constituent input) {
return input.getSurfaceForm();
}
};
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 1, surfaceFormTransformer));
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 2, surfaceFormTransformer));
return features;
}
Aggregations