use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker in project talismane by joliciel-informatique.
the class NewlineEndOfSentenceMarkerTest method testApply.
@Test
public void testApply() throws Exception {
NewlineEndOfSentenceMarker filter = new NewlineEndOfSentenceMarker(1000);
AnnotatedText text = new AnnotatedText("1\r\n2\r\n");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = text.getAnnotations(RawTextSentenceBreakMarker.class);
assertEquals(2, sentenceBreaks.size());
List<Annotation<RawTextSkipMarker>> skips = text.getAnnotations(RawTextSkipMarker.class);
assertEquals(2, skips.size());
assertEquals(1, sentenceBreaks.get(0).getStart());
assertEquals(3, sentenceBreaks.get(0).getEnd());
assertEquals(1, skips.get(0).getStart());
assertEquals(3, skips.get(0).getEnd());
assertEquals(4, sentenceBreaks.get(1).getStart());
assertEquals(6, sentenceBreaks.get(1).getEnd());
assertEquals(4, skips.get(1).getStart());
assertEquals(6, skips.get(1).getEnd());
text = new AnnotatedText("1\r2\r");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
sentenceBreaks = text.getAnnotations(RawTextSentenceBreakMarker.class);
assertEquals(2, sentenceBreaks.size());
skips = text.getAnnotations(RawTextSkipMarker.class);
assertEquals(2, skips.size());
assertEquals(1, sentenceBreaks.get(0).getStart());
assertEquals(2, sentenceBreaks.get(0).getEnd());
assertEquals(1, skips.get(0).getStart());
assertEquals(2, skips.get(0).getEnd());
assertEquals(3, sentenceBreaks.get(1).getStart());
assertEquals(4, sentenceBreaks.get(1).getEnd());
assertEquals(3, skips.get(1).getStart());
assertEquals(4, skips.get(1).getEnd());
text = new AnnotatedText("1\r2\r");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
sentenceBreaks = text.getAnnotations(RawTextSentenceBreakMarker.class);
assertEquals(2, sentenceBreaks.size());
skips = text.getAnnotations(RawTextSkipMarker.class);
assertEquals(2, skips.size());
assertEquals(1, sentenceBreaks.get(0).getStart());
assertEquals(2, sentenceBreaks.get(0).getEnd());
assertEquals(1, skips.get(0).getStart());
assertEquals(2, skips.get(0).getEnd());
assertEquals(3, sentenceBreaks.get(1).getStart());
assertEquals(4, sentenceBreaks.get(1).getEnd());
assertEquals(3, skips.get(1).getStart());
assertEquals(4, skips.get(1).getEnd());
}
use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker in project talismane by joliciel-informatique.
the class RawTextTest method testGetProcessedText.
@Test
public void testGetProcessedText() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
String text = "1 2 3<skip>skip</skip> 4<skip>skip</skip> five";
RawText rawText = new RawText(text, true, sessionId);
List<Annotation<RawTextSkipMarker>> skips = new ArrayList<>();
skips.add(new Annotation<>("1 2 3".length(), "1 2 3<skip>skip</skip>".length(), new RawTextSkipMarker("me"), labels));
skips.add(new Annotation<>("1 2 3<skip>skip</skip> 4".length(), "1 2 3<skip>skip</skip> 4<skip>skip</skip>".length(), new RawTextSkipMarker("me"), labels));
rawText.addAnnotations(skips);
List<Annotation<RawTextReplaceMarker>> replaces = new ArrayList<>();
replaces.add(new Annotation<>("1 2 3<skip>skip</skip> 4<skip>skip</skip> ".length(), "1 2 3<skip>skip</skip> 4<skip>skip</skip> five".length(), new RawTextReplaceMarker("me", "5"), labels));
rawText.addAnnotations(replaces);
AnnotatedText processedTextBlock = rawText.getProcessedText();
assertEquals("1 2 3 4 5", processedTextBlock.getText());
}
use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker in project talismane by joliciel-informatique.
the class RawTextTest method testNoSentenceAnnotationLocation.
@Test
public void testNoSentenceAnnotationLocation() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
String text = "Mr. Jones and <skip/>Mrs. Smith.";
RawText textBlock = new RawText(text, true, sessionId);
List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreaks = new ArrayList<>();
System.out.println("we add no sentence break annotations (as if they were added by a filter)");
noSentenceBreaks.add(new Annotation<>("".length(), "Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
noSentenceBreaks.add(new Annotation<>("Mr. Jones and <skip/>".length(), "Mr. Jones and <skip/>Mrs.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
textBlock.addAnnotations(noSentenceBreaks);
List<Annotation<RawTextSkipMarker>> skips = new ArrayList<>();
skips.add(new Annotation<>("Mr. Jones and ".length(), "Mr. Jones and <skip/>".length(), new RawTextSkipMarker("me"), labels));
textBlock.addAnnotations(skips);
System.out.println("textBlock text: " + textBlock.getText());
System.out.println("textBlock annotations: " + textBlock.getAnnotations().toString());
AnnotatedText processedTextBlock = textBlock.getProcessedText();
assertEquals("Mr. Jones and Mrs. Smith.", processedTextBlock.getText());
// ensure that the no sentence break text got added at the right place
// in the processed text
noSentenceBreaks = processedTextBlock.getAnnotations(RawTextNoSentenceBreakMarker.class);
System.out.println("Processed annotations: " + noSentenceBreaks);
assertEquals(2, noSentenceBreaks.size());
assertEquals("".length(), noSentenceBreaks.get(0).getStart());
assertEquals("Mr.".length(), noSentenceBreaks.get(0).getEnd());
assertEquals("Mr. Jones and ".length(), noSentenceBreaks.get(1).getStart());
assertEquals("Mr. Jones and Mrs.".length(), noSentenceBreaks.get(1).getEnd());
}
use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker in project talismane by joliciel-informatique.
the class RegexMarkerFilterTest method testApplyWithGroup.
@Test
public void testApplyWithGroup() throws Exception {
RawTextRegexAnnotator filter = new RawTextRegexAnnotator(RawTextMarkType.SKIP, "<skip>(.*?)</skip>", 1, 1000);
AnnotatedText text = new AnnotatedText("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip me</skip>");
filter.annotate(text);
LOG.debug(text.getAnnotations().toString());
List<Annotation<RawTextSkipMarker>> skips = text.getAnnotations(RawTextSkipMarker.class);
assertEquals(2, skips.size());
int i = 0;
for (Annotation<RawTextSkipMarker> skip : skips) {
if (i == 0) {
assertEquals("J'ai du <skip>".length(), skip.getStart());
assertEquals("J'ai du <skip>skip me".length(), skip.getEnd());
} else if (i == 2) {
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>".length(), skip.getStart());
assertEquals("J'ai du <skip>skip me</skip>mal à le croire.<skip>skip me".length(), skip.getEnd());
}
i++;
}
}
use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker in project talismane by joliciel-informatique.
the class RollingTextBlockTest method testNoSentenceAnnotationLocation.
@Test
public void testNoSentenceAnnotationLocation() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
// String text = "I see Mr. Jones and <skip/>Mrs. Smith.";
RollingTextBlock textBlock = new RollingTextBlock(true, null, sessionId);
textBlock = textBlock.roll("I see ");
textBlock = textBlock.roll("Mr. Jones ");
textBlock = textBlock.roll("and <sk");
AnnotatedText rawText = textBlock.getRawTextBlock();
System.out.println("rawText text: " + rawText.getText());
List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreaks = new ArrayList<>();
System.out.println("we add no sentence break annotations (as if they were added by a filter)");
noSentenceBreaks.add(new Annotation<>("".length(), "Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
rawText.addAnnotations(noSentenceBreaks);
System.out.println("textBlock text: " + textBlock.getText());
System.out.println("textBlock annotations: " + textBlock.getAnnotations().toString());
textBlock = textBlock.roll("ip/>Mrs.");
rawText = textBlock.getRawTextBlock();
System.out.println("rawText text: " + rawText.getText());
List<Annotation<RawTextSkipMarker>> skips = new ArrayList<>();
skips.add(new Annotation<>("and ".length(), "and <skip/>".length(), new RawTextSkipMarker("me"), labels));
rawText.addAnnotations(skips);
AnnotatedText processedTextBlock = textBlock.getProcessedText();
assertEquals("I see Mr. Jones and ", processedTextBlock.getText());
// ensure that the no sentence break text got added at the right place
// in the processed text
noSentenceBreaks = processedTextBlock.getAnnotations(RawTextNoSentenceBreakMarker.class);
System.out.println("Processed annotations: " + noSentenceBreaks);
assertEquals(1, noSentenceBreaks.size());
assertEquals("I see ".length(), noSentenceBreaks.get(0).getStart());
assertEquals("I see Mr.".length(), noSentenceBreaks.get(0).getEnd());
textBlock = textBlock.roll(" Smith.");
rawText = textBlock.getRawTextBlock();
System.out.println("rawText text: " + rawText.getText());
noSentenceBreaks = new ArrayList<>();
noSentenceBreaks.add(new Annotation<>("ip/>".length(), "ip/>Mrs.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
rawText.addAnnotations(noSentenceBreaks);
System.out.println("textBlock text: " + textBlock.getText());
System.out.println("textBlock annotations: " + textBlock.getAnnotations().toString());
textBlock = textBlock.roll("");
System.out.println("textBlock text: " + textBlock.getText());
System.out.println("textBlock annotations: " + textBlock.getAnnotations().toString());
processedTextBlock = textBlock.getProcessedText();
assertEquals("and Mrs. Smith.", processedTextBlock.getText());
// ensure that the no sentence break text got added at the right place
// in the processed text
noSentenceBreaks = processedTextBlock.getAnnotations(RawTextNoSentenceBreakMarker.class);
System.out.println("Processed annotations: " + noSentenceBreaks);
assertEquals(1, noSentenceBreaks.size());
assertEquals("and ".length(), noSentenceBreaks.get(0).getStart());
assertEquals("and Mrs.".length(), noSentenceBreaks.get(0).getEnd());
}
Aggregations