use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker in project talismane by joliciel-informatique.
the class SentenceDetectorTest method testDetectSentences2.
@Test
public void testDetectSentences2() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
DecisionMaker decisionMaker = new DecisionMaker() {
@Override
public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
return new GeometricMeanScoringStrategy();
}
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Decision> decisions = new ArrayList<>();
Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
decisions.add(decision);
return decisions;
}
};
String[] labels = new String[0];
Set<SentenceDetectorFeature<?>> features = new HashSet<>();
SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
String text = "Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After";
AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), text.length());
List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones\nHow are you, ".length(), "Before analysis. Hello Mr. Jones\nHow are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(noSentenceBreakMarkers);
List<Annotation<SentenceBoundary>> existingBoundaries = new ArrayList<>();
existingBoundaries.add(new Annotation<>("".length(), "Before analysis.".length(), new SentenceBoundary(), labels));
annotatedText.addAnnotations(existingBoundaries);
List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = new ArrayList<>();
sentenceBreaks.add(new Annotation<>("Before analysis. Hello Mr. Jones".length(), "Before analysis. Hello Mr. Jones\n".length(), new RawTextSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(sentenceBreaks);
List<Integer> guessedBoundaries = sentenceDetector.detectSentences(annotatedText);
assertEquals(2, guessedBoundaries.size());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), guessedBoundaries.get(0).intValue());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), guessedBoundaries.get(1).intValue());
List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
System.out.println(sentenceBoundaries.toString());
assertEquals(4, sentenceBoundaries.size());
assertEquals("".length(), sentenceBoundaries.get(0).getStart());
assertEquals("Before analysis.".length(), sentenceBoundaries.get(0).getEnd());
assertEquals("Before analysis. ".length(), sentenceBoundaries.get(1).getStart());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(1).getEnd());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(2).getStart());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(2).getEnd());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(3).getStart());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After".length(), sentenceBoundaries.get(3).getEnd());
}
Aggregations