use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.
the class EvaluateNer method run.
@Override
public void run() throws Exception {
initializeOutputDir();
if (hypothesisPath == null) {
IOUtil.checkDirectoryArgument(modelRoot, "Model Root");
} else {
IOUtil.checkFileArgument(referencePath, "Hypothesis File");
}
IOUtil.checkFileArgument(referencePath, "Reference File");
NerDataSet hypothesis;
NerDataSet reference = NerDataSet.load(referencePath, annotationStyle);
Log.info("Reference :");
Log.info(reference.info());
if (hypothesisPath == null) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
Stopwatch sw = Stopwatch.createStarted();
hypothesis = ner.evaluate(reference);
double secs = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
Log.info("NER is applied to reference data in %.4f seconds.", secs);
} else {
hypothesis = NerDataSet.load(hypothesisPath, annotationStyle);
}
Log.info("Hypothesis :");
Log.info(hypothesis.info());
Path reportPath = outDir.resolve("eval-report");
PerceptronNerTrainer.evaluationReport(reference, hypothesis, reportPath);
TestResult result = PerceptronNerTrainer.collectEvaluationData(reference, hypothesis);
Log.info("Evaluation Result:");
Log.info(result.dump());
Log.info("Detailed evaluation report is written in %s", reportPath);
}
use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.
the class FindNamedEntities method run.
@Override
public void run() throws Exception {
initializeOutputDir();
IOUtil.checkDirectoryArgument(modelRoot, "Model Root");
IOUtil.checkFileArgument(inputPath, "Input File");
Path out = outDir.resolve(inputPath.toFile().getName() + ".ne");
List<String> lines = Files.readAllLines(inputPath, StandardCharsets.UTF_8);
List<String> sentences = TurkishSentenceExtractor.DEFAULT.fromParagraphs(lines);
Log.info("There are %d lines and about %d sentences", lines.size(), sentences.size());
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
Stopwatch sw = Stopwatch.createStarted();
int tokenCount = 0;
try (PrintWriter pw = new PrintWriter(out.toFile(), "UTF-8")) {
for (String sentence : sentences) {
sentence = TextUtil.normalizeApostrophes(sentence);
sentence = TextUtil.normalizeQuotesHyphens(sentence);
sentence = TextUtil.normalizeSpacesAndSoftHyphens(sentence);
List<String> words = TurkishTokenizer.DEFAULT.tokenizeToStrings(sentence);
tokenCount += words.size();
NerSentence result = ner.findNamedEntities(sentence, words);
pw.println(result.getAsTrainingSentence(annotationStyle));
}
}
double secs = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
Log.info("Token count = %s", tokenCount);
Log.info("File processed in %.4f seconds.", secs);
Log.info("Speed = %.2f tokens/sec", tokenCount / secs);
Log.info("Result is written in %s", out);
}
use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.
the class TrainNerModel method run.
@Override
public void run() throws Exception {
initializeOutputDir();
IOUtil.checkFileArgument(trainDataPath, "Training file");
Path modelRoot = outDir.resolve("model");
Path modelRootCompressed = outDir.resolve("model-compressed");
Path logPath = outDir.resolve("train-log");
Log.addFileHandler(logPath);
if (developmentPath != null) {
IOUtil.checkFileArgument(developmentPath, "Development file");
}
NerDataSet trainingSet = NerDataSet.load(trainDataPath, annotationStyle);
Log.info("Training set information:");
Log.info(trainingSet.info());
NerDataSet devSet = null;
if (developmentPath != null) {
devSet = NerDataSet.load(developmentPath, annotationStyle);
Log.info("Development set information:");
Log.info(devSet.info());
}
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
Log.info("------------ Training Started --------------------");
PerceptronNer ner = new PerceptronNerTrainer(morphology).train(trainingSet, devSet, iterationCount, learningRate);
Files.createDirectories(modelRoot);
Files.createDirectories(modelRootCompressed);
ner.saveModelAsText(modelRoot);
ner.saveModelCompressed(modelRootCompressed);
Log.info("Text model is created in %s", modelRoot);
Log.info("Compressed model is created in %s", modelRootCompressed);
}
use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.
the class UseNer method main.
public static void main(String[] args) throws IOException {
// assumes you generated a model in my-model directory.
Path modelRoot = Paths.get("my-model");
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
String sentence = "Ali Kaan yarın İstanbul'a gidecek.";
NerSentence result = ner.findNamedEntities(sentence);
List<NamedEntity> namedEntities = result.getNamedEntities();
for (NamedEntity namedEntity : namedEntities) {
System.out.println(namedEntity);
}
}
use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.
the class GenerateNerModel method main.
public static void main(String[] args) throws IOException {
// you will need ner-train and ner-test files to run this example.
Path trainPath = Paths.get("ner-train");
Path testPath = Paths.get("ner-test");
Path modelRoot = Paths.get("my-model");
NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
// prints information
Log.info(trainingSet.info());
NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
Log.info(testSet.info());
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
// Training occurs here. Result is a PerceptronNer instance.
// There will be 7 iterations with 0.1 learning rate.
PerceptronNer ner = new PerceptronNerTrainer(morphology).train(trainingSet, testSet, 13, 0.1f);
Files.createDirectories(modelRoot);
ner.saveModelAsText(modelRoot);
}
Aggregations