Search in sources :

Example 16 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class PatternEventStream method getTaggedTokens.

public List<TaggedToken<TokeniserOutcome>> getTaggedTokens(TokenSequence tokenSequence, List<Integer> tokenSplits) {
    List<TaggedToken<TokeniserOutcome>> taggedTokens = new ArrayList<TaggedToken<TokeniserOutcome>>();
    for (Token token : tokenSequence.listWithWhiteSpace()) {
        TokeniserOutcome outcome = TokeniserOutcome.JOIN;
        if (tokenSplits.contains(token.getStartIndex()))
            outcome = TokeniserOutcome.SEPARATE;
        Decision decision = new Decision(outcome.name());
        TaggedToken<TokeniserOutcome> taggedToken = new TaggedToken<>(token, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
        taggedTokens.add(taggedToken);
    }
    return taggedTokens;
}
Also used : TaggedToken(com.joliciel.talismane.tokeniser.TaggedToken) ArrayList(java.util.ArrayList) TaggedToken(com.joliciel.talismane.tokeniser.TaggedToken) Token(com.joliciel.talismane.tokeniser.Token) TokeniserOutcome(com.joliciel.talismane.tokeniser.TokeniserOutcome) Decision(com.joliciel.talismane.machineLearning.Decision)

Example 17 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project jochre by urieli.

the class RecursiveShapeSplitterTest method testSplitShape.

/**
 * Make sure we get 5 equally weighted sequences in the case of a 50/50 prob for
 * splitting each time.
 */
@SuppressWarnings("unchecked")
@Test
public void testSplitShape() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    Config config = ConfigFactory.load();
    JochreSession jochreSession = new JochreSession(config);
    BufferedImage originalImage = new BufferedImage(256, 256, BufferedImage.TYPE_INT_RGB);
    final JochreImage jochreImage = new JochreImage(originalImage, jochreSession);
    final Shape shape = new Shape(jochreImage, 0, 0, 63, 15, jochreSession);
    shape.setBaseLine(12);
    shape.setMeanLine(4);
    final Shape shape1 = new Shape(jochreImage, 0, 0, 31, 15, jochreSession);
    shape1.setBaseLine(12);
    shape1.setMeanLine(4);
    final Shape shape2 = new Shape(jochreImage, 32, 0, 63, 15, jochreSession);
    shape2.setBaseLine(12);
    shape2.setMeanLine(4);
    final SplitCandidateFinder splitCandidateFinder = mock(SplitCandidateFinder.class);
    final DecisionMaker decisionMaker = mock(DecisionMaker.class);
    Split split = new Split(shape, jochreSession);
    split.setPosition(31);
    List<Split> splits = new ArrayList<>();
    splits.add(split);
    when(splitCandidateFinder.findSplitCandidates(shape)).thenReturn(splits);
    Decision yesDecision = new Decision(SplitOutcome.DO_SPLIT.name(), 0.5);
    Decision noDecision = new Decision(SplitOutcome.DO_NOT_SPLIT.name(), 0.5);
    List<Decision> decisions = new ArrayList<>();
    decisions.add(yesDecision);
    decisions.add(noDecision);
    when(decisionMaker.decide(anyList())).thenReturn(decisions);
    Split split1 = new Split(shape1, jochreSession);
    split1.setPosition(15);
    List<Split> splits1 = new ArrayList<>();
    splits1.add(split1);
    when(splitCandidateFinder.findSplitCandidates(shape1)).thenReturn(splits1);
    Split split2 = new Split(shape2, jochreSession);
    split2.setPosition(15);
    List<Split> splits2 = new ArrayList<>();
    splits2.add(split2);
    when(splitCandidateFinder.findSplitCandidates(shape2)).thenReturn(splits2);
    Set<SplitFeature<?>> splitFeatures = new TreeSet<>();
    RecursiveShapeSplitter splitter = new RecursiveShapeSplitter(splitCandidateFinder, splitFeatures, decisionMaker, jochreSession);
    splitter.setBeamWidth(10);
    splitter.setMaxDepth(2);
    splitter.setMinWidthRatio(1.0);
    List<ShapeSequence> shapeSequences = splitter.split(shape);
    assertEquals(5, shapeSequences.size());
    for (ShapeSequence shapeSequence : shapeSequences) {
        assertEquals(1.0, shapeSequence.getScore(), 0.0001);
    }
}
Also used : JochreImage(com.joliciel.jochre.graphics.JochreImage) Shape(com.joliciel.jochre.graphics.Shape) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) SplitFeature(com.joliciel.jochre.boundaries.features.SplitFeature) BufferedImage(java.awt.image.BufferedImage) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet) JochreSession(com.joliciel.jochre.JochreSession) Test(org.junit.Test)

Example 18 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project jochre by urieli.

the class LetterByLetterBoundaryDetector method findBoundaries.

@Override
public List<ShapeSequence> findBoundaries(GroupOfShapes group) {
    // find the possible shape sequences that make up this group
    ShapeSequence emptySequence = new ShapeSequence();
    PriorityQueue<ShapeSequence> heap = new PriorityQueue<ShapeSequence>();
    heap.add(emptySequence);
    for (Shape shape : group.getShapes()) {
        PriorityQueue<ShapeSequence> previousHeap = heap;
        heap = new PriorityQueue<ShapeSequence>();
        // check if shape is wide enough to bother with
        double widthRatio = (double) shape.getWidth() / (double) shape.getXHeight();
        double heightRatio = (double) shape.getHeight() / (double) shape.getXHeight();
        // Splitting/merging shapes as required
        List<ShapeSequence> splitSequences = null;
        if (this.shapeSplitter != null && widthRatio >= minWidthRatioForSplit && heightRatio >= minHeightRatioForSplit) {
            splitSequences = shapeSplitter.split(shape);
        } else {
            // create a sequence containing only this shape
            ShapeSequence singleShapeSequence = new ShapeSequence();
            singleShapeSequence.addShape(shape);
            splitSequences = new ArrayList<ShapeSequence>();
            splitSequences.add(singleShapeSequence);
        }
        // limit the breadth to K
        int maxSequences = previousHeap.size() > this.beamWidth ? this.beamWidth : previousHeap.size();
        for (int j = 0; j < maxSequences; j++) {
            ShapeSequence history = previousHeap.poll();
            for (ShapeSequence splitSequence : splitSequences) {
                ShapeInSequence previousShapeInSequence = null;
                Shape previousShape = null;
                if (history.size() > 0) {
                    previousShapeInSequence = history.get(history.size() - 1);
                    previousShape = previousShapeInSequence.getShape();
                }
                ShapeInSequence firstShapeInSequence = splitSequence.get(0);
                Shape firstShape = firstShapeInSequence.getShape();
                double mergeProb = 0;
                if (this.shapeMerger != null && previousShape != null) {
                    ShapePair mergeCandidate = new ShapePair(previousShape, shape);
                    double mergeCandidateWidthRatio = 0;
                    double mergeCandidateDistanceRatio = 0;
                    mergeCandidateWidthRatio = (double) mergeCandidate.getWidth() / (double) mergeCandidate.getXHeight();
                    mergeCandidateDistanceRatio = (double) mergeCandidate.getInnerDistance() / (double) mergeCandidate.getXHeight();
                    if (mergeCandidateWidthRatio <= maxWidthRatioForMerge && mergeCandidateDistanceRatio <= maxDistanceRatioForMerge) {
                        mergeProb = shapeMerger.checkMerge(previousShape, firstShape);
                    }
                }
                if (mergeProb > 0) {
                    Shape mergedShape = shapeMerger.merge(previousShape, firstShape);
                    ShapeSequence mergedSequence = new ShapeSequence(history);
                    mergedSequence.remove(mergedSequence.size() - 1);
                    List<Shape> originalShapesForMerge = new ArrayList<Shape>();
                    originalShapesForMerge.addAll(previousShapeInSequence.getOriginalShapes());
                    originalShapesForMerge.addAll(firstShapeInSequence.getOriginalShapes());
                    mergedSequence.addShape(mergedShape, originalShapesForMerge);
                    boolean isFirstShape = true;
                    for (ShapeInSequence splitShape : splitSequence) {
                        if (!isFirstShape)
                            mergedSequence.add(splitShape);
                        isFirstShape = false;
                    }
                    heap.add(mergedSequence);
                    Decision mergeDecision = new Decision(MergeOutcome.DO_MERGE.name(), mergeProb);
                    mergedSequence.addDecision(mergeDecision);
                    for (Decision splitDecision : splitSequence.getDecisions()) mergedSequence.addDecision(splitDecision);
                }
                if (mergeProb < 1) {
                    ShapeSequence totalSequence = new ShapeSequence(history);
                    if (mergeProb > 0) {
                        Decision mergeDecision = new Decision(MergeOutcome.DO_NOT_MERGE.name(), 1 - mergeProb);
                        totalSequence.addDecision(mergeDecision);
                    }
                    for (Decision splitDecision : splitSequence.getDecisions()) totalSequence.addDecision(splitDecision);
                    for (ShapeInSequence splitShape : splitSequence) {
                        totalSequence.add(splitShape);
                    }
                    heap.add(totalSequence);
                }
            }
        // next split sequence for this shape
        }
    // next history from previous heap
    }
    // next shape in group
    List<ShapeSequence> result = new ArrayList<ShapeSequence>();
    for (int i = 0; i < this.beamWidth; i++) {
        if (heap.isEmpty())
            break;
        ShapeSequence nextSequence = heap.poll();
        result.add(nextSequence);
    }
    return result;
}
Also used : Shape(com.joliciel.jochre.graphics.Shape) ArrayList(java.util.ArrayList) PriorityQueue(java.util.PriorityQueue) Decision(com.joliciel.talismane.machineLearning.Decision)

Example 19 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project jochre by urieli.

the class ShapeMerger method checkMerge.

/**
 * Given two sequential shape, returns the probability of a merge.
 */
public double checkMerge(Shape shape1, Shape shape2) {
    ShapePair mergeCandidate = new ShapePair(shape1, shape2);
    if (LOG.isTraceEnabled())
        LOG.trace("mergeCandidate: " + mergeCandidate);
    List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
    // analyse features
    for (MergeFeature<?> feature : mergeFeatures) {
        RuntimeEnvironment env = new RuntimeEnvironment();
        FeatureResult<?> featureResult = feature.check(mergeCandidate, env);
        if (featureResult != null) {
            featureResults.add(featureResult);
            if (LOG.isTraceEnabled()) {
                LOG.trace(featureResult.toString());
            }
        }
    }
    List<Decision> decisions = decisionMaker.decide(featureResults);
    double yesProb = 0.0;
    for (Decision decision : decisions) {
        if (decision.getOutcome().equals(MergeOutcome.DO_MERGE)) {
            yesProb = decision.getProbability();
            break;
        }
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("yesProb: " + yesProb);
    }
    return yesProb;
}
Also used : RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Decision(com.joliciel.talismane.machineLearning.Decision)

Example 20 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class PosTagRegexBasedCorpusReader method convertToPosTaggedToken.

protected PosTaggedToken convertToPosTaggedToken(CorpusLine corpusLine, PosTagSequence posTagSequence, int index, File currentFile) throws TalismaneException {
    Token token = posTagSequence.getTokenSequence().get(index);
    PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
    PosTag posTag = null;
    try {
        posTag = posTagSet.getPosTag(corpusLine.getElement(CorpusElement.POSTAG));
    } catch (UnknownPosTagException upte) {
        String fileName = "";
        if (currentFile != null)
            fileName = currentFile.getPath();
        throw new TalismaneException("Unknown posTag, " + fileName + ", on line " + corpusLine.getLineNumber() + ": " + corpusLine.getElement(CorpusElement.POSTAG));
    }
    Decision posTagDecision = new Decision(posTag.getCode());
    PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
    if (LOG.isTraceEnabled()) {
        LOG.trace(posTaggedToken.toString());
    }
    if (corpusLine.hasElement(CorpusElement.POSTAG_COMMENT))
        posTaggedToken.setComment(corpusLine.getElement(CorpusElement.POSTAG_COMMENT));
    // set the lexical entry if we have one
    if (corpusLine.getLexicalEntry() != null) {
        List<LexicalEntry> lexicalEntrySet = new ArrayList<>(1);
        lexicalEntrySet.add(corpusLine.getLexicalEntry());
        posTaggedToken.setLexicalEntries(lexicalEntrySet);
    }
    posTagSequence.addPosTaggedToken(posTaggedToken);
    return posTaggedToken;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) Token(com.joliciel.talismane.tokeniser.Token) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry) Decision(com.joliciel.talismane.machineLearning.Decision)

Aggregations

Decision (com.joliciel.talismane.machineLearning.Decision)37 ArrayList (java.util.ArrayList)24 Config (com.typesafe.config.Config)15 TreeSet (java.util.TreeSet)15 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)13 Token (com.joliciel.talismane.tokeniser.Token)12 Test (org.junit.Test)12 Sentence (com.joliciel.talismane.rawText.Sentence)11 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)11 List (java.util.List)11 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)10 TalismaneTest (com.joliciel.talismane.TalismaneTest)9 DecisionMaker (com.joliciel.talismane.machineLearning.DecisionMaker)9 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)8 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)8 TalismaneException (com.joliciel.talismane.TalismaneException)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)7 HashSet (java.util.HashSet)7 Shape (com.joliciel.jochre.graphics.Shape)6 HashMap (java.util.HashMap)6