Search in sources :

Example 1 with Decoder

use of zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.Decoder in project zemberek-nlp by ahmetaa.

the class PerceptronAmbiguityResolverTrainer method train.

public PerceptronAmbiguityResolver train(DataSet trainingSet, DataSet devSet, int iterationCount) {
    FeatureExtractor extractor = new FeatureExtractor(false);
    Decoder decoder = new Decoder(weights, extractor);
    int numExamples = 0;
    for (int i = 0; i < iterationCount; i++) {
        Log.info("Iteration:" + i);
        trainingSet.shuffle();
        for (SentenceAnalysis sentence : trainingSet.sentences) {
            if (sentence.size() == 0) {
                continue;
            }
            numExamples++;
            DecodeResult result = decoder.bestPath(sentence.ambiguousAnalysis());
            if (sentence.bestAnalysis().equals(result.bestParse)) {
                continue;
            }
            if (sentence.bestAnalysis().size() != result.bestParse.size()) {
                throw new IllegalStateException("Best parse result must have same amount of tokens with Correct parse." + " \nCorrect = " + sentence.bestAnalysis() + " \nBest = " + result.bestParse);
            }
            IntValueMap<String> correctFeatures = extractor.extractFeatureCounts(sentence.bestAnalysis());
            IntValueMap<String> bestFeatures = extractor.extractFeatureCounts(result.bestParse);
            updateModel(correctFeatures, bestFeatures, numExamples);
        }
        for (String feat : averagedWeights) {
            updateAveragedWeights(feat, numExamples);
            counts.put(feat, numExamples);
        }
        Log.info("Testing development set.");
        PerceptronAmbiguityResolver disambiguator = new PerceptronAmbiguityResolver(averagedWeights, extractor);
        test(devSet, disambiguator);
    }
    return new PerceptronAmbiguityResolver(averagedWeights, new FeatureExtractor(false));
}
Also used : DecodeResult(zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.DecodeResult) FeatureExtractor(zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.FeatureExtractor) Decoder(zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.Decoder) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis)

Aggregations

DecodeResult (zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.DecodeResult)1 Decoder (zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.Decoder)1 FeatureExtractor (zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.FeatureExtractor)1 SentenceAnalysis (zemberek.morphology.analysis.SentenceAnalysis)1