use of zemberek.core.embeddings.SubWordHashProvider in project zemberek-nlp by ahmetaa.
the class FastTextClassifierTrainer method train.
public FastTextClassifier train(Path corpus) {
Args args = Args.forSupervised();
args.loss = builder.type == LossType.SOFTMAX ? loss_name.softmax : loss_name.hierarchicalSoftmax;
args.dim = builder.dimension;
args.wordNgrams = builder.wordNgramOrder;
args.thread = builder.threadCount;
args.epoch = builder.epochCount;
args.lr = builder.learningRate;
args.ws = builder.contextWindowSize;
SubWordHashProvider p = builder.subWordHashProvider;
args.subWordHashProvider = p;
args.minn = p.getMinN();
args.maxn = p.getMaxN();
args.minCount = builder.minWordCount;
args.cutoff = builder.quantizationCutOff;
FastTextTrainer trainer = new FastTextTrainer(args);
// for catching and forwarding progress events.
trainer.getEventBus().register(this);
try {
return new FastTextClassifier(trainer.train(corpus));
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
Aggregations