use of zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.DecodeResult in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolverTrainer method train.
public PerceptronAmbiguityResolver train(DataSet trainingSet, DataSet devSet, int iterationCount) {
FeatureExtractor extractor = new FeatureExtractor(false);
Decoder decoder = new Decoder(weights, extractor);
int numExamples = 0;
for (int i = 0; i < iterationCount; i++) {
Log.info("Iteration:" + i);
trainingSet.shuffle();
for (SentenceAnalysis sentence : trainingSet.sentences) {
if (sentence.size() == 0) {
continue;
}
numExamples++;
DecodeResult result = decoder.bestPath(sentence.ambiguousAnalysis());
if (sentence.bestAnalysis().equals(result.bestParse)) {
continue;
}
if (sentence.bestAnalysis().size() != result.bestParse.size()) {
throw new IllegalStateException("Best parse result must have same amount of tokens with Correct parse." + " \nCorrect = " + sentence.bestAnalysis() + " \nBest = " + result.bestParse);
}
IntValueMap<String> correctFeatures = extractor.extractFeatureCounts(sentence.bestAnalysis());
IntValueMap<String> bestFeatures = extractor.extractFeatureCounts(result.bestParse);
updateModel(correctFeatures, bestFeatures, numExamples);
}
for (String feat : averagedWeights) {
updateAveragedWeights(feat, numExamples);
counts.put(feat, numExamples);
}
Log.info("Testing development set.");
PerceptronAmbiguityResolver disambiguator = new PerceptronAmbiguityResolver(averagedWeights, extractor);
test(devSet, disambiguator);
}
return new PerceptronAmbiguityResolver(averagedWeights, new FeatureExtractor(false));
}
use of zemberek.morphology.ambiguity.PerceptronAmbiguityResolver.DecodeResult in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolverTrainer method test.
public static void test(DataSet set, PerceptronAmbiguityResolver resolver) {
int hit = 0, total = 0;
Stopwatch sw = Stopwatch.createStarted();
for (SentenceAnalysis sentence : set.sentences) {
DecodeResult result = resolver.getDecoder().bestPath(sentence.ambiguousAnalysis());
int i = 0;
List<SingleAnalysis> bestExpected = sentence.bestAnalysis();
for (SingleAnalysis bestActual : result.bestParse) {
if (bestExpected.get(i).equals(bestActual)) {
hit++;
}
total++;
i++;
}
}
Log.info("Elapsed: " + sw.elapsed(TimeUnit.MILLISECONDS));
Log.info("Word count:" + total + " hit=" + hit + String.format(Locale.ENGLISH, " Accuracy:%f", hit * 1.0 / total));
}
Aggregations