Search in sources :

Example 6 with RawTextSentenceBreakMarker

use of com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker in project talismane by joliciel-informatique.

the class SentenceDetectorTest method testDetectSentences2.

@Test
public void testDetectSentences2() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    DecisionMaker decisionMaker = new DecisionMaker() {

        @Override
        public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
            return new GeometricMeanScoringStrategy();
        }

        @Override
        public List<Decision> decide(List<FeatureResult<?>> featureResults) {
            List<Decision> decisions = new ArrayList<>();
            Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
            decisions.add(decision);
            return decisions;
        }
    };
    String[] labels = new String[0];
    Set<SentenceDetectorFeature<?>> features = new HashSet<>();
    SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
    String text = "Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After";
    AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), text.length());
    List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
    noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
    noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones\nHow are you, ".length(), "Before analysis. Hello Mr. Jones\nHow are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
    annotatedText.addAnnotations(noSentenceBreakMarkers);
    List<Annotation<SentenceBoundary>> existingBoundaries = new ArrayList<>();
    existingBoundaries.add(new Annotation<>("".length(), "Before analysis.".length(), new SentenceBoundary(), labels));
    annotatedText.addAnnotations(existingBoundaries);
    List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = new ArrayList<>();
    sentenceBreaks.add(new Annotation<>("Before analysis. Hello Mr. Jones".length(), "Before analysis. Hello Mr. Jones\n".length(), new RawTextSentenceBreakMarker("me"), labels));
    annotatedText.addAnnotations(sentenceBreaks);
    List<Integer> guessedBoundaries = sentenceDetector.detectSentences(annotatedText);
    assertEquals(2, guessedBoundaries.size());
    assertEquals("Before analysis. Hello Mr. Jones\n".length(), guessedBoundaries.get(0).intValue());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), guessedBoundaries.get(1).intValue());
    List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
    System.out.println(sentenceBoundaries.toString());
    assertEquals(4, sentenceBoundaries.size());
    assertEquals("".length(), sentenceBoundaries.get(0).getStart());
    assertEquals("Before analysis.".length(), sentenceBoundaries.get(0).getEnd());
    assertEquals("Before analysis. ".length(), sentenceBoundaries.get(1).getStart());
    assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(1).getEnd());
    assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(2).getStart());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(2).getEnd());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(3).getStart());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After".length(), sentenceBoundaries.get(3).getEnd());
}
Also used : SentenceDetectorFeature(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) GeometricMeanScoringStrategy(com.joliciel.talismane.machineLearning.GeometricMeanScoringStrategy) RawTextNoSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) AnnotatedText(com.joliciel.talismane.AnnotatedText) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) ClassificationSolution(com.joliciel.talismane.machineLearning.ClassificationSolution) Decision(com.joliciel.talismane.machineLearning.Decision) Annotation(com.joliciel.talismane.Annotation) RawTextSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Aggregations

Annotation (com.joliciel.talismane.Annotation)6 RawTextSentenceBreakMarker (com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker)6 AnnotatedText (com.joliciel.talismane.AnnotatedText)5 ArrayList (java.util.ArrayList)5 TalismaneTest (com.joliciel.talismane.TalismaneTest)4 RawTextSkipMarker (com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker)4 Config (com.typesafe.config.Config)4 Test (org.junit.Test)4 RawTextNoSentenceBreakMarker (com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker)3 Decision (com.joliciel.talismane.machineLearning.Decision)2 DecisionMaker (com.joliciel.talismane.machineLearning.DecisionMaker)2 SentenceBoundary (com.joliciel.talismane.sentenceDetector.SentenceBoundary)2 SentenceDetectorFeature (com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Matcher (java.util.regex.Matcher)2 Annotator (com.joliciel.talismane.Annotator)1 TalismaneException (com.joliciel.talismane.TalismaneException)1 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)1 ClassificationSolution (com.joliciel.talismane.machineLearning.ClassificationSolution)1