Search in sources :

Example 11 with FeatureResult

use of com.joliciel.talismane.machineLearning.features.FeatureResult in project talismane by joliciel-informatique.

the class MaxentDetailedAnalysisWriter method onAnalyse.

/*
   * (non-Javadoc)
   * 
   * @see com.joliciel.talismane.maxent.MaxentObserver#onAnalyse(java.util.List,
   * java.util.Collection)
   */
@Override
public void onAnalyse(Object event, List<FeatureResult<?>> featureResults, Collection<Decision> outcomes) throws IOException {
    Map<String, Double> outcomeTotals = new TreeMap<String, Double>();
    double uniformPrior = Math.log(1 / (double) outcomeList.size());
    for (String outcome : outcomeList) outcomeTotals.put(outcome, uniformPrior);
    writer.append("####### Event: " + event.toString() + "\n");
    writer.append("### Feature results:\n");
    for (FeatureResult<?> featureResult : featureResults) {
        if (featureResult.getOutcome() instanceof List) {
            @SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
            for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
                String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
                String featureOutcome = stringOutcome.getOutcome();
                double value = stringOutcome.getWeight();
                this.writeFeatureResult(featureName, featureOutcome, value, outcomeTotals);
            }
        } else {
            double value = 1.0;
            if (featureResult.getFeature() instanceof DoubleFeature) {
                value = (Double) featureResult.getOutcome();
            }
            this.writeFeatureResult(featureResult.getTrainingName(), featureResult.getOutcome().toString(), value, outcomeTotals);
        }
    }
    writer.append("### Outcome totals:\n");
    writer.append("# Uniform prior: " + uniformPrior + " (=1/" + outcomeList.size() + ")\n");
    double grandTotal = 0;
    for (String outcome : outcomeList) {
        double total = outcomeTotals.get(outcome);
        double expTotal = Math.exp(total);
        grandTotal += expTotal;
    }
    writer.append(String.format("%1$-30s", "outcome") + String.format("%1$#15s", "total(log)") + String.format("%1$#15s", "total") + String.format("%1$#15s", "normalised") + "\n");
    for (String outcome : outcomeList) {
        double total = outcomeTotals.get(outcome);
        double expTotal = Math.exp(total);
        writer.append(String.format("%1$-30s", outcome) + String.format("%1$#15s", decFormat.format(total)) + String.format("%1$#15s", decFormat.format(expTotal)) + String.format("%1$#15s", decFormat.format(expTotal / grandTotal)) + "\n");
    }
    writer.append("\n");
    Map<String, Double> outcomeWeights = new TreeMap<String, Double>();
    for (Decision decision : outcomes) {
        outcomeWeights.put(decision.getOutcome(), decision.getProbability());
    }
    writer.append("### Outcome list:\n");
    Set<WeightedOutcome<String>> weightedOutcomes = new TreeSet<WeightedOutcome<String>>();
    for (String outcome : outcomeList) {
        Double weightObj = outcomeWeights.get(outcome);
        double weight = (weightObj == null ? 0.0 : weightObj.doubleValue());
        WeightedOutcome<String> weightedOutcome = new WeightedOutcome<String>(outcome, weight);
        weightedOutcomes.add(weightedOutcome);
    }
    for (WeightedOutcome<String> weightedOutcome : weightedOutcomes) {
        writer.append(String.format("%1$-30s", weightedOutcome.getOutcome()) + String.format("%1$#15s", decFormat.format(weightedOutcome.getWeight())) + "\n");
    }
    writer.append("\n");
    writer.flush();
}
Also used : WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) TreeMap(java.util.TreeMap) DoubleFeature(com.joliciel.talismane.machineLearning.features.DoubleFeature) Decision(com.joliciel.talismane.machineLearning.Decision) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 12 with FeatureResult

use of com.joliciel.talismane.machineLearning.features.FeatureResult in project jochre by urieli.

the class JochreMergeEventStream method next.

@Override
public ClassificationEvent next() {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        LOG.debug("next event, " + mergeCandidate.getFirstShape() + ", " + mergeCandidate.getSecondShape());
        List<FeatureResult<?>> featureResults = new ArrayList<>();
        // analyse features
        for (MergeFeature<?> feature : mergeFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = feature.check(mergeCandidate, env);
            if (featureResult != null) {
                featureResults.add(featureResult);
                if (LOG.isTraceEnabled()) {
                    LOG.trace(featureResult.toString());
                }
            }
        }
        MergeOutcome outcome = MergeOutcome.DO_NOT_MERGE;
        boolean shouldMerge = false;
        if (mergeCandidate.getFirstShape().getLetter().startsWith("|")) {
            if (mergeCandidate.getSecondShape().getLetter().length() == 0 || mergeCandidate.getSecondShape().getLetter().endsWith("|"))
                shouldMerge = true;
        } else if (mergeCandidate.getSecondShape().getLetter().endsWith("|")) {
            if (mergeCandidate.getFirstShape().getLetter().length() == 0)
                shouldMerge = true;
        }
        if (shouldMerge)
            outcome = MergeOutcome.DO_MERGE;
        if (outcome.equals(MergeOutcome.DO_MERGE))
            yesCount++;
        else
            noCount++;
        LOG.debug("Outcome: " + outcome);
        event = new ClassificationEvent(featureResults, outcome.name());
        // set mergeCandidate to null so that hasNext can retrieve the next
        // one.
        this.mergeCandidate = null;
    }
    return event;
}
Also used : RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 13 with FeatureResult

use of com.joliciel.talismane.machineLearning.features.FeatureResult in project jochre by urieli.

the class ShapeMerger method checkMerge.

/**
 * Given two sequential shape, returns the probability of a merge.
 */
public double checkMerge(Shape shape1, Shape shape2) {
    ShapePair mergeCandidate = new ShapePair(shape1, shape2);
    if (LOG.isTraceEnabled())
        LOG.trace("mergeCandidate: " + mergeCandidate);
    List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
    // analyse features
    for (MergeFeature<?> feature : mergeFeatures) {
        RuntimeEnvironment env = new RuntimeEnvironment();
        FeatureResult<?> featureResult = feature.check(mergeCandidate, env);
        if (featureResult != null) {
            featureResults.add(featureResult);
            if (LOG.isTraceEnabled()) {
                LOG.trace(featureResult.toString());
            }
        }
    }
    List<Decision> decisions = decisionMaker.decide(featureResults);
    double yesProb = 0.0;
    for (Decision decision : decisions) {
        if (decision.getOutcome().equals(MergeOutcome.DO_MERGE)) {
            yesProb = decision.getProbability();
            break;
        }
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("yesProb: " + yesProb);
    }
    return yesProb;
}
Also used : RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Decision(com.joliciel.talismane.machineLearning.Decision)

Example 14 with FeatureResult

use of com.joliciel.talismane.machineLearning.features.FeatureResult in project talismane by joliciel-informatique.

the class PosTagEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException, IOException {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        PosTaggedToken taggedToken = currentSentence.get(currentIndex++);
        String classification = taggedToken.getTag().getCode();
        if (LOG.isDebugEnabled())
            LOG.debug("next event, token: " + taggedToken.getToken().getAnalyisText() + " : " + classification);
        PosTaggerContext context = new PosTaggerContextImpl(taggedToken.getToken(), currentHistory);
        List<FeatureResult<?>> posTagFeatureResults = new ArrayList<FeatureResult<?>>();
        for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
            if (featureResult != null)
                posTagFeatureResults.add(featureResult);
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("Token: " + taggedToken.getToken().getAnalyisText());
            SortedSet<String> featureResultSet = posTagFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        event = new ClassificationEvent(posTagFeatureResults, classification);
        currentHistory.addPosTaggedToken(taggedToken);
        if (currentIndex == currentSentence.size()) {
            currentSentence = null;
        }
    }
    return event;
}
Also used : Logger(org.slf4j.Logger) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) PosTaggerFeature(com.joliciel.talismane.posTagger.features.PosTaggerFeature) List(java.util.List) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 15 with FeatureResult

use of com.joliciel.talismane.machineLearning.features.FeatureResult in project talismane by joliciel-informatique.

the class LanguageDetectorEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException {
    LanguageTaggedText languageTaggedText = this.corpusReader.nextText();
    List<FeatureResult<?>> featureResults = new ArrayList<FeatureResult<?>>();
    for (LanguageDetectorFeature<?> feature : features) {
        RuntimeEnvironment env = new RuntimeEnvironment();
        FeatureResult<?> featureResult = feature.check(languageTaggedText.getText(), env);
        if (featureResult != null)
            featureResults.add(featureResult);
    }
    String classification = languageTaggedText.getLanguage().toLanguageTag();
    if (LOG.isTraceEnabled()) {
        for (FeatureResult<?> result : featureResults) {
            LOG.trace(result.toString());
        }
        LOG.trace("classification: " + classification);
    }
    ClassificationEvent event = new ClassificationEvent(featureResults, classification);
    return event;
}
Also used : RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Aggregations

FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)22 ArrayList (java.util.ArrayList)22 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)18 List (java.util.List)14 Decision (com.joliciel.talismane.machineLearning.Decision)11 TreeSet (java.util.TreeSet)10 TalismaneException (com.joliciel.talismane.TalismaneException)9 ClassificationEvent (com.joliciel.talismane.machineLearning.ClassificationEvent)9 IOException (java.io.IOException)9 Map (java.util.Map)8 Set (java.util.Set)8 SortedSet (java.util.SortedSet)8 Collectors (java.util.stream.Collectors)8 Logger (org.slf4j.Logger)8 LoggerFactory (org.slf4j.LoggerFactory)8 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)5 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)5 TreeMap (java.util.TreeMap)5 TalismaneSession (com.joliciel.talismane.TalismaneSession)4 ClassificationEventStream (com.joliciel.talismane.machineLearning.ClassificationEventStream)4