use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class ChunkEmbedding method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
View constituents = c.getTextAnnotation().getView(viewName);
IQueryable<Constituent> shallowParseContained = constituents.where(Queries.containedInConstituent(c));
int numChunks = ((QueryableList<Constituent>) shallowParseContained).size();
Set<Feature> features = new LinkedHashSet<>();
if (numChunks > 0)
features.add(RealFeature.create("nchnks", numChunks));
if (numChunks == 1) {
features.add(DiscreteFeature.create("nchnks-th:1"));
} else if (numChunks == 2) {
features.add(DiscreteFeature.create("nchnks-th:2"));
} else if (numChunks >= 3) {
features.add(DiscreteFeature.create("nchnks-th:many"));
}
Counter<String> counter = new Counter<>();
Predicate<Constituent> condition = Queries.containedInConstituentExclusive(c);
addFeatures(features, constituents, condition, counter, "cont-in");
condition = Queries.containsConstituent(c);
addFeatures(features, constituents, condition, counter, "contains");
condition = Queries.sameSpanAsConstituent(c);
addFeatures(features, constituents, condition, counter, "=span");
condition = Queries.exclusivelyOverlaps(c);
addFeatures(features, constituents, condition, counter, "ex-ovlp");
condition = Queries.hasOverlap(c);
addFeatures(features, constituents, condition, counter, "has-ovlp");
// condition = Queries.hasNoOverlap(c);
// addFeatures(features, constituents, condition, counter,
// "has-no-overlap");
condition = Queries.sameStartSpanAs(c);
addFeatures(features, constituents, condition, counter, "=start");
condition = Queries.sameEndSpanAs(c);
addFeatures(features, constituents, condition, counter, "=end");
features.addAll(FeatureUtilities.getFeatures(counter));
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class IllinoisLemmatizer method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
View v = null;
try {
v = this.createLemmaView(textAnnotation);
} catch (IOException e) {
e.printStackTrace();
String msg = NAME + ".getView(): caught IOException trying to create view: " + e.getMessage();
throw new AnnotatorException(msg);
}
textAnnotation.addView(getViewName(), v);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class LemmatizerTATest method testCreateWnLemmaView.
@Test
public void testCreateWnLemmaView() {
View lemmaView = null;
try {
lemmaView = lem.createLemmaView(inputTa);
} catch (IOException e) {
e.printStackTrace();
fail(e.getMessage());
}
Constituent posC = inputTa.getView(ViewNames.POS).getConstituents().get(0);
assertEquals(0, posC.getStartSpan());
assertEquals(1, posC.getEndSpan());
boolean isTested = false;
if (null != lemmaView) {
List<Constituent> spans = inputTa.getView(ViewNames.LEMMA).getConstituents();
// orig 'The'
String the = spans.get(0).getLabel();
// orig 'men'
String CIA = spans.get(1).getLabel();
// orig 'have'
String thought = spans.get(2).getLabel();
// orig 'had'
String had = spans.get(6).getLabel();
// orig 'examinations'
String were = spans.get(15).getLabel();
assertEquals(the, "the");
assertEquals(CIA, "cia");
assertEquals(thought, "think");
assertEquals(had, "have");
assertEquals(were, "be");
isTested = true;
}
assertTrue(isTested);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class PropbankReaderTest method testParsedViews.
@Test
public void testParsedViews() throws Exception {
String treebankHome = "src/test/resources/edu/illinois/cs/cogcomp/nlp/corpusreaders/pennTreeBank_3";
String propbankHome = "src/test/resources/edu/illinois/cs/cogcomp/nlp/corpusreaders/propBank_1";
String[] sections = new String[] { "00" };
PropbankReader data = new PropbankReader(treebankHome, propbankHome, sections, ViewNames.SRL_VERB, true);
Counter<String> viewCounter = new Counter<>();
int numDocuments = 0;
while (data.hasNext()) {
TextAnnotation ta = data.next();
for (String viewName : ta.getAvailableViews()) {
View view = ta.getView(viewName);
for (Constituent cons : view) {
assertTrue("Constituents in " + viewName + " should have valid start character offset", cons.getStartCharOffset() >= 0);
assertTrue("Constituents in " + viewName + " should have valid character offsets", cons.getStartCharOffset() < cons.getEndCharOffset());
}
viewCounter.incrementCount(viewName);
}
numDocuments++;
}
assertEquals(3, numDocuments);
for (String viewName : viewCounter.getSortedItems()) {
assertEquals("ViewName_" + viewName, 3, viewCounter.getCount(viewName), 0);
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.
the class ProtobufSerializerTest method testSerializerWithCharOffsets.
@Test
public void testSerializerWithCharOffsets() throws Exception {
View rhymeView = new View("rhyme", "test", ta, 0.4);
Map<String, Double> newLabelsToScores = new TreeMap<String, Double>();
String[] labels = { "eeny", "meeny", "miny", "mo" };
double[] scores = { 0.15, 0.15, 0.3, 0.4 };
for (int i = 0; i < labels.length; ++i) newLabelsToScores.put(labels[i], scores[i]);
Constituent first = new Constituent(newLabelsToScores, "rhyme", ta, 2, 4);
rhymeView.addConstituent(first);
/**
* no constraint on scores -- don't have to sum to 1.0
*/
for (int i = labels.length - 1; i > 0; --i) newLabelsToScores.put(labels[i], scores[3 - i]);
Constituent second = new Constituent(newLabelsToScores, "rhyme", ta, 2, 4);
rhymeView.addConstituent(second);
Map<String, Double> relLabelsToScores = new TreeMap<>();
relLabelsToScores.put("Yes", 0.8);
relLabelsToScores.put("No", 0.2);
Relation rel = new Relation(relLabelsToScores, first, second);
rhymeView.addRelation(rel);
ta.addView("rhyme", rhymeView);
// Serialize to protocol buffers format
TextAnnotationImpl.TextAnnotationProto textAnnotationProto = ProtobufSerializer.writeTextAnnotation(ta);
byte[] protoSerializedData = textAnnotationProto.toByteArray();
TextAnnotationImpl.TextAnnotationProto protoRead = TextAnnotationImpl.TextAnnotationProto.parseFrom(protoSerializedData);
TextAnnotation parsedTA = ProtobufSerializer.readTextAnnotation(protoRead);
// Convert to JSON and verify content.
String taJson = SerializationHelper.serializeToJson(parsedTA, true);
JsonObject jobj = (JsonObject) new JsonParser().parse(taJson);
JsonSerializerTest.verifySerializedJSONObject(jobj, ta);
}
Aggregations