use of com.joliciel.talismane.Annotation in project talismane by joliciel-informatique.
the class RawTextTest method testGetDetectedSentences.
@Test
public void testGetDetectedSentences() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
String text = "Sentence 1<sent/>Sentence 2. Sentence 3. Sentence 4.";
RawText textBlock = new RawText(text, true, sessionId);
// we add a sentence break annotation to the raw text (as if it was
// added by a filter)
System.out.println("we add a sentence break annotation (as if it was added by a filter)");
List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = new ArrayList<>();
sentenceBreaks.add(new Annotation<>("Sentence 1".length(), "Sentence 1<sent/>".length(), new RawTextSentenceBreakMarker("me"), labels));
textBlock.addAnnotations(sentenceBreaks);
List<Annotation<RawTextSkipMarker>> skips = new ArrayList<>();
skips.add(new Annotation<>("Sentence 1".length(), "Sentence 1<sent/>".length(), new RawTextSkipMarker("me"), labels));
textBlock.addAnnotations(skips);
System.out.println("textBlock text: " + textBlock.getText());
System.out.println("textBlock annotations: " + textBlock.getAnnotations().toString());
AnnotatedText processedTextBlock = textBlock.getProcessedText();
assertEquals("Sentence 1 Sentence 2. Sentence 3. Sentence 4.", processedTextBlock.getText());
// add sentence boundaries to the processed text (as if they were added
// by a sentence detector)
System.out.println("add sentence boundaries to the processed text (as if they were added by a sentence detector)");
List<Annotation<SentenceBoundary>> sentenceBoundaries = new ArrayList<>();
sentenceBoundaries.add(new Annotation<>("Sentence 1".length(), "Sentence 1 Sentence 2.".length(), new SentenceBoundary(), labels));
sentenceBoundaries.add(new Annotation<>("Sentence 1 Sentence 2.".length(), "Sentence 1 Sentence 2. Sentence 3.".length(), new SentenceBoundary(), labels));
processedTextBlock.addAnnotations(sentenceBoundaries);
assertEquals("Sentence 1 Sentence 2. Sentence 3. Sentence 4.", processedTextBlock.getText());
System.out.println("textBlock text: " + textBlock.getText());
System.out.println("textBlock annotations: " + textBlock.getAnnotations().toString());
// ensure that the sentence boundary annotations in the original text
// are in the right place
assertEquals("Sentence 1<sent/>Sentence 2. Sentence 3. Sentence 4.", textBlock.getText());
sentenceBoundaries = textBlock.getAnnotations(SentenceBoundary.class);
assertEquals(2, sentenceBoundaries.size());
assertEquals("Sentence 1<sent/>".length(), sentenceBoundaries.get(0).getStart());
assertEquals("Sentence 1<sent/>Sentence 2.".length(), sentenceBoundaries.get(0).getEnd());
assertEquals("Sentence 1<sent/>Sentence 2.".length(), sentenceBoundaries.get(1).getStart());
assertEquals("Sentence 1<sent/>Sentence 2. Sentence 3.".length(), sentenceBoundaries.get(1).getEnd());
List<Sentence> sentences = textBlock.getDetectedSentences();
System.out.println("sentences: " + sentences.toString());
assertEquals(4, sentences.size());
assertEquals("Sentence 1", sentences.get(0).getText());
assertEquals("".length(), sentences.get(0).getOriginalIndex(0));
assertEquals("Sentence 2.", sentences.get(1).getText());
assertEquals("Sentence 1<sent/>".length(), sentences.get(1).getOriginalIndex(0));
assertEquals("Sentence 3.", sentences.get(2).getText());
assertEquals("Sentence 1<sent/>Sentence 2. ".length(), sentences.get(2).getOriginalIndex(0));
assertEquals("Sentence 4.", sentences.get(3).getText());
assertEquals("Sentence 1<sent/>Sentence 2. Sentence 3. ".length(), sentences.get(3).getOriginalIndex(0));
// test that sentence annotations get added to the original raw text
Sentence sentence4 = sentences.get(3);
List<Annotation<String>> annotations = new ArrayList<>();
annotations.add(new Annotation<String>("Sentence ".length(), "Sentence 4".length(), "four", labels));
sentence4.addAnnotations(annotations);
annotations = textBlock.getAnnotations(String.class);
assertEquals(1, annotations.size());
assertEquals("Sentence 1<sent/>Sentence 2. Sentence 3. Sentence ".length(), annotations.get(0).getStart());
assertEquals("Sentence 1<sent/>Sentence 2. Sentence 3. Sentence 4".length(), annotations.get(0).getEnd());
}
use of com.joliciel.talismane.Annotation in project talismane by joliciel-informatique.
the class RegexMarkerFilterTest method testApply.
@Test
public void testApply() throws Exception {
RawTextRegexAnnotator filter = new RawTextRegexAnnotator(RawTextMarkType.SKIP, "<skip>.*?</skip>", 0, 1000);
AnnotatedText text = new AnnotatedText("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip me</skip>");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<RawTextSkipMarker>> skips = text.getAnnotations(RawTextSkipMarker.class);
assertEquals(2, skips.size());
int i = 0;
for (Annotation<RawTextSkipMarker> skip : skips) {
if (i == 0) {
assertEquals("J'ai du ".length(), skip.getStart());
assertEquals("J'ai du <skip>skip me</skip>".length(), skip.getEnd());
} else if (i == 2) {
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.".length(), skip.getStart());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip me</skip>".length(), skip.getEnd());
}
i++;
}
}
use of com.joliciel.talismane.Annotation in project talismane by joliciel-informatique.
the class RegexMarkerFilterTest method testTag.
@Test
public void testTag() throws Exception {
RawTextRegexAnnotator filter = new RawTextRegexAnnotator(RawTextMarkType.TAG, "<skip>(.*?)</skip>", 0, 1000);
filter.setAttribute(new StringAttribute("TAG1", "x"));
AnnotatedText text = new AnnotatedText("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<StringAttribute>> attributes = text.getAnnotations(StringAttribute.class);
assertEquals(2, attributes.size());
int i = 0;
for (Annotation<StringAttribute> attribute : attributes) {
if (i == 0) {
assertEquals("J'ai du ".length(), attribute.getStart());
assertEquals("J'ai du <skip>skip me</skip>".length(), attribute.getEnd());
assertEquals("TAG1", attribute.getData().getKey());
assertEquals("x", attribute.getData().getValue());
} else if (i == 1) {
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.".length(), attribute.getStart());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>".length(), attribute.getEnd());
assertEquals("TAG1", attribute.getData().getKey());
assertEquals("x", attribute.getData().getValue());
}
i++;
}
}
use of com.joliciel.talismane.Annotation in project talismane by joliciel-informatique.
the class RegexMarkerFilterTest method testApplyWithReplacement.
@Test
public void testApplyWithReplacement() throws Exception {
RawTextRegexAnnotator filter = new RawTextRegexAnnotator(RawTextMarkType.REPLACE, "<skip>(.*?)</skip>", 0, 1000);
filter.setReplacement("Skipped:$1");
AnnotatedText text = new AnnotatedText("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<RawTextReplaceMarker>> replaces = text.getAnnotations(RawTextReplaceMarker.class);
assertEquals(2, replaces.size());
int i = 0;
for (Annotation<RawTextReplaceMarker> replace : replaces) {
if (i == 0) {
assertEquals("J'ai du ".length(), replace.getStart());
assertEquals("J'ai du <skip>skip me</skip>".length(), replace.getEnd());
assertEquals("Skipped:skip me", replace.getData().getInsertionText());
} else if (i == 2) {
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.".length(), replace.getStart());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>".length(), replace.getEnd());
assertEquals("Skipped:skip this", replace.getData().getInsertionText());
}
i++;
}
}
use of com.joliciel.talismane.Annotation in project talismane by joliciel-informatique.
the class RegexMarkerFilterTest method testUnaryOperatorsStop.
@Test
public void testUnaryOperatorsStop() throws Exception {
RawTextRegexAnnotator filter = new RawTextRegexAnnotator(RawTextMarkType.STOP, "<skip>", 0, 1000);
AnnotatedText text = new AnnotatedText("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip this</skip>");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<RawTextMarker>> markers = text.getAnnotations(RawTextMarker.class);
assertEquals(2, markers.size());
int i = 0;
for (Annotation<RawTextMarker> textMarker : markers) {
if (i == 0) {
assertEquals(RawTextMarkType.STOP, textMarker.getData().getType());
assertEquals("J'ai du ".length(), textMarker.getStart());
assertEquals("J'ai du <skip>".length(), textMarker.getEnd());
} else if (i == 1) {
assertEquals(RawTextMarkType.STOP, textMarker.getData().getType());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.".length(), textMarker.getStart());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>".length(), textMarker.getEnd());
}
i++;
}
}
Aggregations