Search in sources :

Example 11 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.

@Test
public void testCheckInternalMultipleEntries() throws Exception {
    System.setProperty("config.file", "src/test/resources/testWithLex.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    Sentence sentence = new Sentence("je demande", sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
    Decision decision = new Decision("V", 1.0);
    final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
    PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {

        @Override
        protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
            return this.generateResult(posTaggedToken);
        }
    };
    StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
    LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
    PosTagSequence history = new PosTagSequence(tokenSequence);
    PosTaggerContext context = new PosTaggerContextImpl(token, history);
    RuntimeEnvironment env = new RuntimeEnvironment();
    FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
    List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
    System.out.println(outcomes);
    for (WeightedOutcome<String> outcome : outcomes) {
        assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
    }
    assertEquals(2, outcomes.size());
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) Config(com.typesafe.config.Config) StringLiteralFeature(com.joliciel.talismane.machineLearning.features.StringLiteralFeature) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) PosTaggerContext(com.joliciel.talismane.posTagger.PosTaggerContext) Decision(com.joliciel.talismane.machineLearning.Decision) PosTaggerContextImpl(com.joliciel.talismane.posTagger.PosTaggerContextImpl) PosTagSequence(com.joliciel.talismane.posTagger.PosTagSequence) List(java.util.List) Sentence(com.joliciel.talismane.rawText.Sentence) TokenSequence(com.joliciel.talismane.tokeniser.TokenSequence) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Example 12 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class MaxentDetailedAnalysisWriter method onAnalyse.

/*
   * (non-Javadoc)
   * 
   * @see com.joliciel.talismane.maxent.MaxentObserver#onAnalyse(java.util.List,
   * java.util.Collection)
   */
@Override
public void onAnalyse(Object event, List<FeatureResult<?>> featureResults, Collection<Decision> outcomes) throws IOException {
    Map<String, Double> outcomeTotals = new TreeMap<String, Double>();
    double uniformPrior = Math.log(1 / (double) outcomeList.size());
    for (String outcome : outcomeList) outcomeTotals.put(outcome, uniformPrior);
    writer.append("####### Event: " + event.toString() + "\n");
    writer.append("### Feature results:\n");
    for (FeatureResult<?> featureResult : featureResults) {
        if (featureResult.getOutcome() instanceof List) {
            @SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
            for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
                String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
                String featureOutcome = stringOutcome.getOutcome();
                double value = stringOutcome.getWeight();
                this.writeFeatureResult(featureName, featureOutcome, value, outcomeTotals);
            }
        } else {
            double value = 1.0;
            if (featureResult.getFeature() instanceof DoubleFeature) {
                value = (Double) featureResult.getOutcome();
            }
            this.writeFeatureResult(featureResult.getTrainingName(), featureResult.getOutcome().toString(), value, outcomeTotals);
        }
    }
    writer.append("### Outcome totals:\n");
    writer.append("# Uniform prior: " + uniformPrior + " (=1/" + outcomeList.size() + ")\n");
    double grandTotal = 0;
    for (String outcome : outcomeList) {
        double total = outcomeTotals.get(outcome);
        double expTotal = Math.exp(total);
        grandTotal += expTotal;
    }
    writer.append(String.format("%1$-30s", "outcome") + String.format("%1$#15s", "total(log)") + String.format("%1$#15s", "total") + String.format("%1$#15s", "normalised") + "\n");
    for (String outcome : outcomeList) {
        double total = outcomeTotals.get(outcome);
        double expTotal = Math.exp(total);
        writer.append(String.format("%1$-30s", outcome) + String.format("%1$#15s", decFormat.format(total)) + String.format("%1$#15s", decFormat.format(expTotal)) + String.format("%1$#15s", decFormat.format(expTotal / grandTotal)) + "\n");
    }
    writer.append("\n");
    Map<String, Double> outcomeWeights = new TreeMap<String, Double>();
    for (Decision decision : outcomes) {
        outcomeWeights.put(decision.getOutcome(), decision.getProbability());
    }
    writer.append("### Outcome list:\n");
    Set<WeightedOutcome<String>> weightedOutcomes = new TreeSet<WeightedOutcome<String>>();
    for (String outcome : outcomeList) {
        Double weightObj = outcomeWeights.get(outcome);
        double weight = (weightObj == null ? 0.0 : weightObj.doubleValue());
        WeightedOutcome<String> weightedOutcome = new WeightedOutcome<String>(outcome, weight);
        weightedOutcomes.add(weightedOutcome);
    }
    for (WeightedOutcome<String> weightedOutcome : weightedOutcomes) {
        writer.append(String.format("%1$-30s", weightedOutcome.getOutcome()) + String.format("%1$#15s", decFormat.format(weightedOutcome.getWeight())) + "\n");
    }
    writer.append("\n");
    writer.flush();
}
Also used : WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) TreeMap(java.util.TreeMap) DoubleFeature(com.joliciel.talismane.machineLearning.features.DoubleFeature) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 13 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class OpenNLPDecisionMaker method decide.

@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
    List<String> contextList = new ArrayList<String>();
    List<Float> weightList = new ArrayList<Float>();
    OpenNLPDecisionMaker.prepareData(featureResults, contextList, weightList);
    String[] contexts = new String[contextList.size()];
    float[] weights = new float[weightList.size()];
    int i = 0;
    for (String context : contextList) {
        contexts[i++] = context;
    }
    i = 0;
    for (Float weight : weightList) {
        weights[i++] = weight;
    }
    double[] probs = model.eval(contexts, weights);
    String[] outcomes = new String[probs.length];
    for (i = 0; i < probs.length; i++) outcomes[i] = model.getOutcome(i);
    TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
    for (i = 0; i < probs.length; i++) {
        Decision decision = new Decision(outcomes[i], probs[i]);
        outcomeSet.add(decision);
    }
    List<Decision> decisions = new ArrayList<Decision>(outcomeSet);
    return decisions;
}
Also used : ArrayList(java.util.ArrayList) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet)

Example 14 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class PerceptronDecisionMaker method decide.

@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
    List<Integer> featureIndexList = new ArrayList<Integer>();
    List<Double> featureValueList = new ArrayList<Double>();
    modelParameters.prepareData(featureResults, featureIndexList, featureValueList);
    double[] results = this.predict(featureIndexList, featureValueList);
    double[] probs = new double[results.length];
    if (this.getPerceptronScoring() == PerceptronScoring.normalisedExponential) {
        // e^(x/absmax)/sum(e^(x/absmax))
        // where x/absmax is in [-1,1]
        // e^(x/absmax) is in [1/e,e]
        double absoluteMax = 1;
        for (int i = 0; i < results.length; i++) {
            if (Math.abs(results[i]) > absoluteMax)
                absoluteMax = Math.abs(results[i]);
        }
        double total = 0.0;
        for (int i = 0; i < results.length; i++) {
            probs[i] = Math.exp(results[i] / absoluteMax);
            total += probs[i];
        }
        for (int i = 0; i < probs.length; i++) {
            probs[i] /= total;
        }
    } else {
        // make all results >= 1
        double min = Double.MAX_VALUE;
        for (int i = 0; i < results.length; i++) {
            if (results[i] < min)
                min = results[i];
        }
        if (min < 0) {
            for (int i = 0; i < results.length; i++) {
                probs[i] = (results[i] - min) + 1;
            }
        }
        // then divide by total to get a probability distribution
        double total = 0.0;
        for (int i = 0; i < probs.length; i++) {
            total += probs[i];
        }
        for (int i = 0; i < probs.length; i++) {
            probs[i] /= total;
        }
    }
    int i = 0;
    TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
    for (String outcome : modelParameters.getOutcomes()) {
        Decision decision = new Decision(outcome, results[i], probs[i]);
        outcomeSet.add(decision);
        i++;
    }
    List<Decision> decisions = new ArrayList<Decision>(outcomeSet);
    return decisions;
}
Also used : ArrayList(java.util.ArrayList) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet)

Example 15 with Decision

use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.

the class TokenisedAtomicTokenSequenceTest method testGetTokenSequence.

@Test
public void testGetTokenSequence() throws Exception {
    System.setProperty("config.file", "src/test/resources/test.conf");
    ConfigFactory.invalidateCaches();
    final Config config = ConfigFactory.load();
    final String sessionId = "test";
    final Sentence sentence = new Sentence("Je n'ai pas encore l'ourang-outan.", sessionId);
    TokeniserOutcome[] tokeniserOutcomeArray = new TokeniserOutcome[] { // Je
    TokeniserOutcome.SEPARATE, // _
    TokeniserOutcome.SEPARATE, // n
    TokeniserOutcome.SEPARATE, // '
    TokeniserOutcome.JOIN, // ai
    TokeniserOutcome.SEPARATE, // _
    TokeniserOutcome.SEPARATE, // pas
    TokeniserOutcome.SEPARATE, // _
    TokeniserOutcome.JOIN, // encore
    TokeniserOutcome.JOIN, // _
    TokeniserOutcome.SEPARATE, // l
    TokeniserOutcome.SEPARATE, // '
    TokeniserOutcome.JOIN, // ourang
    TokeniserOutcome.SEPARATE, // -
    TokeniserOutcome.JOIN, // outan
    TokeniserOutcome.JOIN, // .
    TokeniserOutcome.SEPARATE };
    TokenisedAtomicTokenSequence atomicTokenSequence = new TokenisedAtomicTokenSequence(sentence, sessionId);
    TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
    tokenSequence.findDefaultTokens();
    int i = 0;
    for (Token token : tokenSequence.listWithWhiteSpace()) {
        Decision decision = new Decision(tokeniserOutcomeArray[i++].name());
        TaggedToken<TokeniserOutcome> taggedToken = new TaggedToken<>(token, decision, TokeniserOutcome.valueOf(decision.getOutcome()));
        atomicTokenSequence.add(taggedToken);
    }
    TokenSequence newTokenSequence = atomicTokenSequence.inferTokenSequence();
    LOG.debug(newTokenSequence.toString());
    i = 0;
    for (Token token : newTokenSequence) {
        if (i == 0) {
            assertEquals("Je", token.getAnalyisText());
        } else if (i == 1) {
            assertEquals("n'", token.getAnalyisText());
        } else if (i == 2) {
            assertEquals("ai", token.getAnalyisText());
        } else if (i == 3) {
            assertEquals("pas encore", token.getAnalyisText());
        } else if (i == 4) {
            assertEquals("l'", token.getAnalyisText());
        } else if (i == 5) {
            assertEquals("ourang-outan", token.getAnalyisText());
        } else if (i == 6) {
            assertEquals(".", token.getAnalyisText());
        }
        i++;
    }
    assertEquals(7, newTokenSequence.size());
}
Also used : Config(com.typesafe.config.Config) Sentence(com.joliciel.talismane.rawText.Sentence) Decision(com.joliciel.talismane.machineLearning.Decision) TalismaneTest(com.joliciel.talismane.TalismaneTest) Test(org.junit.Test)

Aggregations

Decision (com.joliciel.talismane.machineLearning.Decision)37 ArrayList (java.util.ArrayList)24 Config (com.typesafe.config.Config)15 TreeSet (java.util.TreeSet)15 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)13 Token (com.joliciel.talismane.tokeniser.Token)12 Test (org.junit.Test)12 Sentence (com.joliciel.talismane.rawText.Sentence)11 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)11 List (java.util.List)11 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)10 TalismaneTest (com.joliciel.talismane.TalismaneTest)9 DecisionMaker (com.joliciel.talismane.machineLearning.DecisionMaker)9 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)8 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)8 TalismaneException (com.joliciel.talismane.TalismaneException)7 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)7 HashSet (java.util.HashSet)7 Shape (com.joliciel.jochre.graphics.Shape)6 HashMap (java.util.HashMap)6