use of zemberek.core.data.Weights in project zemberek-nlp by ahmetaa.
the class PerceptronAmbiguityResolverEvaluation method main.
public static void main(String[] args) throws IOException {
Path root = Paths.get("/media/ahmetaa/depo/ambiguity");
List<Path> paths = Lists.newArrayList(Paths.get("data/gold/gold1.txt"), root.resolve("www.aljazeera.com.tr-rule-result.txt"), root.resolve("wowturkey.com-rule-result.txt"), root.resolve("open-subtitles-tr-2018-rule-result.txt"), root.resolve("sak.train"), root.resolve("www.haberturk.com-rule-result.txt"), root.resolve("www.cnnturk.com-rule-result.txt"));
Path dev = root.resolve("sak.dev");
Path model = Paths.get("morphology/src/main/resources/tr/ambiguity/model");
Path modelCompressed = Paths.get("morphology/src/main/resources/tr/ambiguity/model-compressed");
TurkishMorphology morphology = TurkishMorphology.create(RootLexicon.builder().addTextDictionaryResources("tr/master-dictionary.dict", "tr/non-tdk.dict", "tr/proper.dict", "tr/proper-from-corpus.dict", "tr/abbreviations.dict", "tr/person-names.dict").build());
DataSet trainingSet = new DataSet();
for (Path path : paths) {
trainingSet.add(DataSet.load(path, morphology));
}
DataSet devSet = DataSet.load(dev, morphology);
PerceptronAmbiguityResolver resolver = new PerceptronAmbiguityResolverTrainer(morphology).train(trainingSet, devSet, 7);
Weights modelTrained = (Weights) resolver.getModel();
modelTrained.pruneNearZeroWeights();
modelTrained.saveAsText(model);
System.out.println("Load model and test");
PerceptronAmbiguityResolver resolverRead = PerceptronAmbiguityResolver.fromModelFile(model);
Path test = root.resolve("sak.test");
((Weights) resolverRead.getModel()).compress().serialize(modelCompressed);
PerceptronAmbiguityResolverTrainer.test(test, morphology, resolverRead);
System.out.println("Load compressed model and test");
PerceptronAmbiguityResolver comp = PerceptronAmbiguityResolver.fromModelFile(modelCompressed);
PerceptronAmbiguityResolverTrainer.test(test, morphology, comp);
}
use of zemberek.core.data.Weights in project zemberek-nlp by ahmetaa.
the class PerceptronNer method loadModel.
public static PerceptronNer loadModel(Path modelRoot, TurkishMorphology morphology) throws IOException {
Map<String, ClassModel> weightsMap = new HashMap<>();
List<Path> files = Files.walk(modelRoot, 1).filter(s -> s.toFile().getName().endsWith(".ner.model")).collect(Collectors.toList());
for (Path file : files) {
ClassModel weights = ClassModel.load(file);
weightsMap.put(weights.id, weights);
}
return new PerceptronNer(weightsMap, morphology);
}
use of zemberek.core.data.Weights in project zemberek-nlp by ahmetaa.
the class PerceptronNerTrainer method averageWeights.
private static void averageWeights(Map<String, ClassModel> averages, Map<String, ClassModel> model, IntValueMap<String> counts) {
for (String typeId : model.keySet()) {
Weights w = (Weights) model.get(typeId).sparseWeights;
Weights a = (Weights) averages.get(typeId).sparseWeights;
for (String s : w) {
w.put(s, w.get(s) - a.get(s) / counts.get(typeId));
}
}
}
Aggregations