use of com.joliciel.talismane.tokeniser.StringAttribute in project talismane by joliciel-informatique.
the class RegexMarkerFilterTest method testTag.
@Test
public void testTag() throws Exception {
RawTextRegexAnnotator filter = new RawTextRegexAnnotator(RawTextMarkType.TAG, "<skip>(.*?)</skip>", 0, 1000);
filter.setAttribute(new StringAttribute("TAG1", "x"));
AnnotatedText text = new AnnotatedText("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<StringAttribute>> attributes = text.getAnnotations(StringAttribute.class);
assertEquals(2, attributes.size());
int i = 0;
for (Annotation<StringAttribute> attribute : attributes) {
if (i == 0) {
assertEquals("J'ai du ".length(), attribute.getStart());
assertEquals("J'ai du <skip>skip me</skip>".length(), attribute.getEnd());
assertEquals("TAG1", attribute.getData().getKey());
assertEquals("x", attribute.getData().getValue());
} else if (i == 1) {
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.".length(), attribute.getStart());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>".length(), attribute.getEnd());
assertEquals("TAG1", attribute.getData().getKey());
assertEquals("x", attribute.getData().getValue());
}
i++;
}
}
Aggregations