use of zemberek.ner.NerDataSet in project zemberek-nlp by ahmetaa.
the class EvaluateNer method run.
@Override
public void run() throws Exception {
initializeOutputDir();
if (hypothesisPath == null) {
IOUtil.checkDirectoryArgument(modelRoot, "Model Root");
} else {
IOUtil.checkFileArgument(referencePath, "Hypothesis File");
}
IOUtil.checkFileArgument(referencePath, "Reference File");
NerDataSet hypothesis;
NerDataSet reference = NerDataSet.load(referencePath, annotationStyle);
Log.info("Reference :");
Log.info(reference.info());
if (hypothesisPath == null) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
Stopwatch sw = Stopwatch.createStarted();
hypothesis = ner.evaluate(reference);
double secs = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
Log.info("NER is applied to reference data in %.4f seconds.", secs);
} else {
hypothesis = NerDataSet.load(hypothesisPath, annotationStyle);
}
Log.info("Hypothesis :");
Log.info(hypothesis.info());
Path reportPath = outDir.resolve("eval-report");
PerceptronNerTrainer.evaluationReport(reference, hypothesis, reportPath);
TestResult result = PerceptronNerTrainer.collectEvaluationData(reference, hypothesis);
Log.info("Evaluation Result:");
Log.info(result.dump());
Log.info("Detailed evaluation report is written in %s", reportPath);
}
use of zemberek.ner.NerDataSet in project zemberek-nlp by ahmetaa.
the class TrainNerModel method run.
@Override
public void run() throws Exception {
initializeOutputDir();
IOUtil.checkFileArgument(trainDataPath, "Training file");
Path modelRoot = outDir.resolve("model");
Path modelRootCompressed = outDir.resolve("model-compressed");
Path logPath = outDir.resolve("train-log");
Log.addFileHandler(logPath);
if (developmentPath != null) {
IOUtil.checkFileArgument(developmentPath, "Development file");
}
NerDataSet trainingSet = NerDataSet.load(trainDataPath, annotationStyle);
Log.info("Training set information:");
Log.info(trainingSet.info());
NerDataSet devSet = null;
if (developmentPath != null) {
devSet = NerDataSet.load(developmentPath, annotationStyle);
Log.info("Development set information:");
Log.info(devSet.info());
}
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
Log.info("------------ Training Started --------------------");
PerceptronNer ner = new PerceptronNerTrainer(morphology).train(trainingSet, devSet, iterationCount, learningRate);
Files.createDirectories(modelRoot);
Files.createDirectories(modelRootCompressed);
ner.saveModelAsText(modelRoot);
ner.saveModelCompressed(modelRootCompressed);
Log.info("Text model is created in %s", modelRoot);
Log.info("Compressed model is created in %s", modelRootCompressed);
}
use of zemberek.ner.NerDataSet in project zemberek-nlp by ahmetaa.
the class GenerateNerModel method main.
public static void main(String[] args) throws IOException {
// you will need ner-train and ner-test files to run this example.
Path trainPath = Paths.get("ner-train");
Path testPath = Paths.get("ner-test");
Path modelRoot = Paths.get("my-model");
NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
// prints information
Log.info(trainingSet.info());
NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
Log.info(testSet.info());
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
// Training occurs here. Result is a PerceptronNer instance.
// There will be 7 iterations with 0.1 learning rate.
PerceptronNer ner = new PerceptronNerTrainer(morphology).train(trainingSet, testSet, 13, 0.1f);
Files.createDirectories(modelRoot);
ner.saveModelAsText(modelRoot);
}
Aggregations