use of com.joliciel.talismane.machineLearning.DecisionMaker in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShape.
/**
* Make sure we get 5 equally weighted sequences in the case of a 50/50 prob for
* splitting each time.
*/
@SuppressWarnings("unchecked")
@Test
public void testSplitShape() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.5);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.5);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
for (ShapeSequence shapeSequence : shapeSequences) {
assertEquals(1.0, shapeSequence.getScore(), 0.0001);
}
}
use of com.joliciel.talismane.machineLearning.DecisionMaker in project talismane by joliciel-informatique.
the class SentenceDetectorTest method testDetectSentences2.
@Test
public void testDetectSentences2() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
DecisionMaker decisionMaker = new DecisionMaker() {
@Override
public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
return new GeometricMeanScoringStrategy();
}
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Decision> decisions = new ArrayList<>();
Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
decisions.add(decision);
return decisions;
}
};
String[] labels = new String[0];
Set<SentenceDetectorFeature<?>> features = new HashSet<>();
SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
String text = "Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After";
AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), text.length());
List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones\nHow are you, ".length(), "Before analysis. Hello Mr. Jones\nHow are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(noSentenceBreakMarkers);
List<Annotation<SentenceBoundary>> existingBoundaries = new ArrayList<>();
existingBoundaries.add(new Annotation<>("".length(), "Before analysis.".length(), new SentenceBoundary(), labels));
annotatedText.addAnnotations(existingBoundaries);
List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = new ArrayList<>();
sentenceBreaks.add(new Annotation<>("Before analysis. Hello Mr. Jones".length(), "Before analysis. Hello Mr. Jones\n".length(), new RawTextSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(sentenceBreaks);
List<Integer> guessedBoundaries = sentenceDetector.detectSentences(annotatedText);
assertEquals(2, guessedBoundaries.size());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), guessedBoundaries.get(0).intValue());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), guessedBoundaries.get(1).intValue());
List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
System.out.println(sentenceBoundaries.toString());
assertEquals(4, sentenceBoundaries.size());
assertEquals("".length(), sentenceBoundaries.get(0).getStart());
assertEquals("Before analysis.".length(), sentenceBoundaries.get(0).getEnd());
assertEquals("Before analysis. ".length(), sentenceBoundaries.get(1).getStart());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(1).getEnd());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(2).getStart());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(2).getEnd());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(3).getStart());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After".length(), sentenceBoundaries.get(3).getEnd());
}
use of com.joliciel.talismane.machineLearning.DecisionMaker in project talismane by joliciel-informatique.
the class SentenceDetectorTest method testDetectSentences.
@Test
public void testDetectSentences() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
DecisionMaker decisionMaker = new DecisionMaker() {
@Override
public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
return new GeometricMeanScoringStrategy();
}
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Decision> decisions = new ArrayList<>();
Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
decisions.add(decision);
return decisions;
}
};
String[] labels = new String[0];
Set<SentenceDetectorFeature<?>> features = new HashSet<>();
SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
String text = "Before analysis. Hello Mr. Jones. How are you, Mr. Jones? After analysis.";
AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), "Before analysis. Hello Mr. Jones. How are you, Mr. Jones?".length());
List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones. How are you, ".length(), "Before analysis. Hello Mr. Jones. How are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(noSentenceBreakMarkers);
List<Integer> sentenceBreaks = sentenceDetector.detectSentences(annotatedText);
assertEquals(2, sentenceBreaks.size());
assertEquals("Before analysis. Hello Mr. Jones.".length(), sentenceBreaks.get(0).intValue());
assertEquals("Before analysis. Hello Mr. Jones. How are you, Mr. Jones?".length(), sentenceBreaks.get(1).intValue());
List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
assertEquals(2, sentenceBoundaries.size());
assertEquals("".length(), sentenceBoundaries.get(0).getStart());
assertEquals("Before analysis. Hello Mr. Jones.".length(), sentenceBoundaries.get(0).getEnd());
assertEquals("Before analysis. Hello Mr. Jones.".length(), sentenceBoundaries.get(1).getStart());
assertEquals("Before analysis. Hello Mr. Jones. How are you, Mr. Jones?".length(), sentenceBoundaries.get(1).getEnd());
}
use of com.joliciel.talismane.machineLearning.DecisionMaker in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShapeNoSplitMoreLikely.
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeNoSplitMoreLikely() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.4);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.6);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
int i = 0;
double prob = 1.0;
double twoThirds = 0.4 / 0.6;
LOG.debug("twoThirds: " + twoThirds);
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " decisions:");
for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
if (i == 0) {
prob = 1.0;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(1, shapeSequence.size());
} else if (i == 1) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(2, shapeSequence.size());
} else if (i == 2) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 3) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 4) {
prob = 1.0 * twoThirds * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(4, shapeSequence.size());
}
i++;
}
}
use of com.joliciel.talismane.machineLearning.DecisionMaker in project jochre by urieli.
the class RecursiveShapeSplitterTest method testSplitShapeSplitMoreLikely.
/**
* If a split is always more likely (e.g. 60% likelihood), ensure the shape
* sequences are ordered correctly.
*/
@SuppressWarnings("unchecked")
@Test
public void testSplitShapeSplitMoreLikely() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
shape.setBaseLine(12);
shape.setMeanLine(4);
final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
shape1.setBaseLine(12);
shape1.setMeanLine(4);
final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
shape2.setBaseLine(12);
shape2.setMeanLine(4);
final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
final DecisionMaker decisionMaker = mock(DecisionMaker.class);
Split split = new Split(shape, jochreSession);
split.setPosition(31);
List<Split> splits = new ArrayList<>();
splits.add(split);
when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.6);
Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.4);
List<Decision> decisions = new ArrayList<>();
decisions.add(yesDecision);
decisions.add(noDecision);
when(decisionMaker.decide(anyList())).thenReturn(decisions);
Split split1 = new Split(shape1, jochreSession);
split1.setPosition(15);
List<Split> splits1 = new ArrayList<>();
splits1.add(split1);
when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
Split split2 = new Split(shape2, jochreSession);
split2.setPosition(15);
List<Split> splits2 = new ArrayList<>();
splits2.add(split2);
when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
splitter.setBeamWidth(10);
splitter.setMaxDepth(2);
splitter.setMinWidthRatio(1.0);
List<ShapeSequence> shapeSequences = splitter.split(shape);
assertEquals(5, shapeSequences.size());
int i = 0;
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " shapes:");
for (ShapeInSequence shapeInSequence : shapeSequence) {
Shape oneShape = shapeInSequence.getShape();
LOG.debug("Shape: " + oneShape.getLeft() + "," + oneShape.getRight());
}
LOG.debug("" + shapeSequence.getScore());
i++;
}
i = 0;
double prob = 1.0;
double twoThirds = 0.4 / 0.6;
LOG.debug("twoThirds: " + twoThirds);
for (ShapeSequence shapeSequence : shapeSequences) {
LOG.debug("sequence " + i + " decisions:");
for (Decision decision : shapeSequence.getDecisions()) LOG.debug("" + decision.getProbability());
if (i == 0) {
prob = 1.0;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(4, shapeSequence.size());
} else if (i == 1) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 2) {
prob = 1.0 * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(3, shapeSequence.size());
} else if (i == 3) {
prob = 1.0 * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(2, shapeSequence.size());
} else if (i == 4) {
prob = 1.0 * twoThirds * twoThirds * twoThirds;
assertEquals(prob, shapeSequence.getScore(), 0.0001);
assertEquals(1, shapeSequence.size());
}
i++;
}
}
Aggregations