Search in sources :

Example 21 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class ParseConfiguration method addDependency.

/**
 * Add the given dependency to the current configuration.
 *
 * @param transition
 *          the transition generating this dependency
 * @throws CircularDependencyException
 *           if this would create a circular dependency
 */
public DependencyArc addDependency(PosTaggedToken head, PosTaggedToken dependent, String label, Transition transition) throws CircularDependencyException {
    DependencyArc arc = new DependencyArc(head, dependent, label);
    if (LOG.isTraceEnabled())
        LOG.trace("Adding arc " + arc + " with transition " + transition);
    this.addDependency(arc);
    this.dependentTransitionMap[dependent.getIndex()] = transition;
    // calculate probability based on decisions
    if (LOG.isTraceEnabled())
        LOG.trace("Prob for " + arc.toString());
    double probLog = 0.0;
    int numDecisions = 0;
    for (int i = lastProbApplied; i < this.decisions.size(); i++) {
        Decision decision = decisions.get(i);
        probLog += decision.getProbabilityLog();
        if (LOG.isTraceEnabled()) {
            LOG.trace(decision.getOutcome() + ", *= " + decision.getProbability());
        }
        numDecisions++;
    }
    if (useGeometricMeanForProbs) {
        if (numDecisions > 0)
            probLog /= numDecisions;
    }
    arc.setProbability(Math.exp(probLog));
    this.lastProbApplied = this.decisions.size();
    if (LOG.isTraceEnabled())
        LOG.trace("prob=" + arc.getProbability());
    return arc;
}
Also used : Decision(com.joliciel.talismane.machineLearning.Decision)

Example 22 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class LinearSVMOneVsRestDecisionMaker method decide.

@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
    List<Feature> featureList = LinearSVMUtils.prepareData(featureResults, featureIndexMap);
    List<Decision> decisions = null;
    if (featureList.size() == 0) {
        LOG.info("No features for current context.");
        TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
        double uniformProb = 1 / outcomes.size();
        for (String outcome : outcomes) {
            Decision decision = new Decision(outcome, uniformProb);
            outcomeSet.add(decision);
        }
        decisions = new ArrayList<Decision>(outcomeSet);
    } else {
        Feature[] instance = new Feature[1];
        instance = featureList.toArray(instance);
        TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
        int i = 0;
        for (Model model : models) {
            int myLabel = 0;
            for (int j = 0; j < model.getLabels().length; j++) if (model.getLabels()[j] == 1)
                myLabel = j;
            double[] probabilities = new double[2];
            Linear.predictProbability(model, instance, probabilities);
            Decision decision = new Decision(outcomes.get(i), probabilities[myLabel]);
            outcomeSet.add(decision);
            i++;
        }
        decisions = new ArrayList<Decision>(outcomeSet);
    }
    return decisions;
}
Also used : Feature(de.bwaldvogel.liblinear.Feature) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet) Model(de.bwaldvogel.liblinear.Model)

Example 23 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class StandoffReader method hasNextSentence.

@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
    if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
    // we've reached the end, do nothing
    } else {
        if (configuration == null && sentenceIndex < sentences.size()) {
            List<StandoffToken> tokens = sentences.get(sentenceIndex++);
            LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
            if (rules == null)
                throw new RuntimeException("Linguistic rules have not been set.");
            String text = "";
            for (StandoffToken standoffToken : tokens) {
                String word = standoffToken.text;
                if (rules.shouldAddSpace(text, word))
                    text += " ";
                text += word;
            }
            Sentence sentence = new Sentence(text, sessionId);
            for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
                annotator.annotate(sentence);
            }
            PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
            PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
            Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
            for (StandoffToken standoffToken : tokens) {
                Token token = tokenSequence.addToken(standoffToken.text);
                Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
                PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
                if (LOG.isTraceEnabled()) {
                    LOG.trace(posTaggedToken.toString());
                }
                posTaggedToken.setComment(standoffToken.comment);
                posTagSequence.addPosTaggedToken(posTaggedToken);
                idTokenMap.put(standoffToken.id, posTaggedToken);
                LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
            }
            tokenSequence.setWithRoot(true);
            configuration = new ParseConfiguration(posTagSequence);
            for (StandoffToken standoffToken : tokens) {
                StandoffRelation relation = relationMap.get(standoffToken.id);
                if (relation != null) {
                    PosTaggedToken head = idTokenMap.get(relation.fromToken);
                    PosTaggedToken dependent = idTokenMap.get(relation.toToken);
                    if (head == null) {
                        throw new TalismaneException("No token found for head id: " + relation.fromToken);
                    }
                    if (dependent == null) {
                        throw new TalismaneException("No token found for dependent id: " + relation.toToken);
                    }
                    DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
                    arc.setComment(relation.comment);
                } else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
                    if (punctuationDepLabel != null) {
                        PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
                        for (int i = dependent.getIndex() - 1; i >= 0; i--) {
                            PosTaggedToken head = posTagSequence.get(i);
                            if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
                                continue;
                            configuration.addDependency(head, dependent, punctuationDepLabel, null);
                            break;
                        }
                    }
                }
            }
        }
    }
    return (configuration != null);
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TalismaneException(com.joliciel.talismane.TalismaneException) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) Decision(com.joliciel.talismane.machineLearning.Decision) ParseConfiguration(com.joliciel.talismane.parser.ParseConfiguration) PretokenisedSequence(com.joliciel.talismane.tokeniser.PretokenisedSequence) LinguisticRules(com.joliciel.talismane.LinguisticRules) SentenceAnnotator(com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) DependencyArc(com.joliciel.talismane.parser.DependencyArc) Sentence(com.joliciel.talismane.rawText.Sentence)

Example 24 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class LinearSVMDecisionMaker method decide.

@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
    List<Feature> featureList = LinearSVMUtils.prepareData(featureResults, featureIndexMap);
    List<Decision> decisions = null;
    if (featureList.size() == 0) {
        LOG.info("No features for current context.");
        TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
        double uniformProb = 1 / outcomes.size();
        for (String outcome : outcomes) {
            Decision decision = new Decision(outcome, uniformProb);
            outcomeSet.add(decision);
        }
        decisions = new ArrayList<Decision>(outcomeSet);
    } else {
        Feature[] instance = new Feature[1];
        instance = featureList.toArray(instance);
        double[] probabilities = new double[model.getLabels().length];
        Linear.predictProbability(model, instance, probabilities);
        TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
        for (int i = 0; i < model.getLabels().length; i++) {
            Decision decision = new Decision(outcomes.get(i), probabilities[i]);
            outcomeSet.add(decision);
        }
        decisions = new ArrayList<Decision>(outcomeSet);
    }
    return decisions;
}
Also used : TreeSet(java.util.TreeSet) Feature(de.bwaldvogel.liblinear.Feature) Decision(com.joliciel.talismane.machineLearning.Decision)

Example 25 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class SentenceDetectorTest method testDetectSentences2.

@Test
public void testDetectSentences2() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    DecisionMaker decisionMaker = new DecisionMaker() {

        @Override
        public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
            return new GeometricMeanScoringStrategy();
        }

        @Override
        public List<Decision> decide(List<FeatureResult<?>> featureResults) {
            List<Decision> decisions = new ArrayList<>();
            Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
            decisions.add(decision);
            return decisions;
        }
    };
    String[] labels = new String[0];
    Set<SentenceDetectorFeature<?>> features = new HashSet<>();
    SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
    String text = "Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After";
    AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), text.length());
    List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
    noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
    noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones\nHow are you, ".length(), "Before analysis. Hello Mr. Jones\nHow are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
    annotatedText.addAnnotations(noSentenceBreakMarkers);
    List<Annotation<SentenceBoundary>> existingBoundaries = new ArrayList<>();
    existingBoundaries.add(new Annotation<>("".length(), "Before analysis.".length(), new SentenceBoundary(), labels));
    annotatedText.addAnnotations(existingBoundaries);
    List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = new ArrayList<>();
    sentenceBreaks.add(new Annotation<>("Before analysis. Hello Mr. Jones".length(), "Before analysis. Hello Mr. Jones\n".length(), new RawTextSentenceBreakMarker("me"), labels));
    annotatedText.addAnnotations(sentenceBreaks);
    List<Integer> guessedBoundaries = sentenceDetector.detectSentences(annotatedText);
    assertEquals(2, guessedBoundaries.size());
    assertEquals("Before analysis. Hello Mr. Jones\n".length(), guessedBoundaries.get(0).intValue());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), guessedBoundaries.get(1).intValue());
    List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
    System.out.println(sentenceBoundaries.toString());
    assertEquals(4, sentenceBoundaries.size());
    assertEquals("".length(), sentenceBoundaries.get(0).getStart());
    assertEquals("Before analysis.".length(), sentenceBoundaries.get(0).getEnd());
    assertEquals("Before analysis. ".length(), sentenceBoundaries.get(1).getStart());
    assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(1).getEnd());
    assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(2).getStart());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(2).getEnd());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(3).getStart());
    assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After".length(), sentenceBoundaries.get(3).getEnd());
}
Also used : SentenceDetectorFeature(com.joliciel.talismane.sentenceDetector.features.SentenceDetectorFeature) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) GeometricMeanScoringStrategy(com.joliciel.talismane.machineLearning.GeometricMeanScoringStrategy) RawTextNoSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextNoSentenceBreakMarker) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) AnnotatedText(com.joliciel.talismane.AnnotatedText) DecisionMaker(com.joliciel.talismane.machineLearning.DecisionMaker) ClassificationSolution(com.joliciel.talismane.machineLearning.ClassificationSolution) Decision(com.joliciel.talismane.machineLearning.Decision) Annotation(com.joliciel.talismane.Annotation) RawTextSentenceBreakMarker(com.joliciel.talismane.rawText.RawTextMarker.RawTextSentenceBreakMarker) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Aggregations

Decision (com.joliciel.talismane.machineLearning.Decision)37 ArrayList (java.util.ArrayList)24 Config (com.typesafe.config.Config)15 TreeSet (java.util.TreeSet)15 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)13 Token (com.joliciel.talismane.tokeniser.Token)12 Test (org.junit.Test)12 Sentence (com.joliciel.talismane.rawText.Sentence)11 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)11 List (java.util.List)11 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)10 TalismaneTest (com.joliciel.talismane.TalismaneTest)9 DecisionMaker (com.joliciel.talismane.machineLearning.DecisionMaker)9 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)8 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)8 TalismaneException (com.joliciel.talismane.TalismaneException)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)7 HashSet (java.util.HashSet)7 Shape (com.joliciel.jochre.graphics.Shape)6 HashMap (java.util.HashMap)6