Search in sources :

Example 6 with RawTextNoSentenceBreakMarker

use of com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker in project talismane by joliciel-informatique.

the class SentenceDetectorTest method testDetectSentences.

@Test
public void testDetectSentences() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    DecisionMaker decisionMaker = new DecisionMaker() {

        @Override
        public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
            return new GeometricMeanScoringStrategy();
        }

        @Override
        public List<Decision> decide(List<FeatureResult<?>> featureResults) {
            List<Decision> decisions = new ArrayList<>();
            Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
            decisions.add(decision);
            return decisions;
        }
    };
    String[] labels = new String[0];
    Set<SentenceDetectorFeature<?>> features = new HashSet<>();
    SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
    String text = "Before analysis. Hello Mr. Jones. How are you, Mr. Jones? After analysis.";
    AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), "Before analysis. Hello Mr. Jones. How are you, Mr. Jones?".length());
    List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
    noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
    noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones. How are you, ".length(), "Before analysis. Hello Mr. Jones. How are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
    annotatedText.addAnnotations(noSentenceBreakMarkers);
    List<Integer> sentenceBreaks = sentenceDetector.detectSentences(annotatedText);
    assertEquals(2, sentenceBreaks.size());
    assertEquals("Before analysis. Hello Mr. Jones.".length(), sentenceBreaks.get(0).intValue());
    assertEquals("Before analysis. Hello Mr. Jones. How are you, Mr. Jones?".length(), sentenceBreaks.get(1).intValue());
    List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
    assertEquals(2, sentenceBoundaries.size());
    assertEquals("".length(), sentenceBoundaries.get(0).getStart());
    assertEquals("Before analysis. Hello Mr. Jones.".length(), sentenceBoundaries.get(0).getEnd());
    assertEquals("Before analysis. Hello Mr. Jones.".length(), sentenceBoundaries.get(1).getStart());
    assertEquals("Before analysis. Hello Mr. Jones. How are you, Mr. Jones?".length(), sentenceBoundaries.get(1).getEnd());
}
Also used : SentenceDetectorFeature(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature) AnnotatedText(com.joliciel.talismane.AnnotatedText) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) ClassificationSolution(com.joliciel.talismane.machineLearning.ClassificationSolution) GeometricMeanScoringStrategy(com.joliciel.talismane.machineLearning.GeometricMeanScoringStrategy) Decision(com.joliciel.talismane.machineLearning.Decision) Annotation(com.joliciel.talismane.Annotation) RawTextNoSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Aggregations

Annotation (com.joliciel.talismane.Annotation)6 RawTextNoSentenceBreakMarker (com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker)6 ArrayList (java.util.ArrayList)6 AnnotatedText (com.joliciel.talismane.AnnotatedText)5 Config (com.typesafe.config.Config)5 TalismaneTest (com.joliciel.talismane.TalismaneTest)4 Test (org.junit.Test)4 Decision (com.joliciel.talismane.machineLearning.Decision)3 DecisionMaker (com.joliciel.talismane.machineLearning.DecisionMaker)3 RawTextSentenceBreakMarker (com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker)3 RawTextSkipMarker (com.joliciel.talismane.rawText.RawTextMarker.RawTextSkipMarker)3 SentenceDetectorFeature (com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature)3 HashSet (java.util.HashSet)3 List (java.util.List)3 ClassificationSolution (com.joliciel.talismane.machineLearning.ClassificationSolution)2 GeometricMeanScoringStrategy (com.joliciel.talismane.machineLearning.GeometricMeanScoringStrategy)2 Matcher (java.util.regex.Matcher)2 Annotator (com.joliciel.talismane.Annotator)1 TalismaneException (com.joliciel.talismane.TalismaneException)1 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)1