Search in sources :

Example 1 with PerceptronNer

use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.

the class EvaluateNer method run.

@Override
public void run() throws Exception {
    initializeOutputDir();
    if (hypothesisPath == null) {
        IOUtil.checkDirectoryArgument(modelRoot, "Model Root");
    } else {
        IOUtil.checkFileArgument(referencePath, "Hypothesis File");
    }
    IOUtil.checkFileArgument(referencePath, "Reference File");
    NerDataSet hypothesis;
    NerDataSet reference = NerDataSet.load(referencePath, annotationStyle);
    Log.info("Reference :");
    Log.info(reference.info());
    if (hypothesisPath == null) {
        TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
        PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
        Stopwatch sw = Stopwatch.createStarted();
        hypothesis = ner.evaluate(reference);
        double secs = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
        Log.info("NER is applied to reference data in %.4f seconds.", secs);
    } else {
        hypothesis = NerDataSet.load(hypothesisPath, annotationStyle);
    }
    Log.info("Hypothesis :");
    Log.info(hypothesis.info());
    Path reportPath = outDir.resolve("eval-report");
    PerceptronNerTrainer.evaluationReport(reference, hypothesis, reportPath);
    TestResult result = PerceptronNerTrainer.collectEvaluationData(reference, hypothesis);
    Log.info("Evaluation Result:");
    Log.info(result.dump());
    Log.info("Detailed evaluation report is written in %s", reportPath);
}
Also used : Path(java.nio.file.Path) Stopwatch(com.google.common.base.Stopwatch) NerDataSet(zemberek.ner.NerDataSet) PerceptronNer(zemberek.ner.PerceptronNer) TestResult(zemberek.ner.PerceptronNerTrainer.TestResult) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Example 2 with PerceptronNer

use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.

the class FindNamedEntities method run.

@Override
public void run() throws Exception {
    initializeOutputDir();
    IOUtil.checkDirectoryArgument(modelRoot, "Model Root");
    IOUtil.checkFileArgument(inputPath, "Input File");
    Path out = outDir.resolve(inputPath.toFile().getName() + ".ne");
    List<String> lines = Files.readAllLines(inputPath, StandardCharsets.UTF_8);
    List<String> sentences = TurkishSentenceExtractor.DEFAULT.fromParagraphs(lines);
    Log.info("There are %d lines and about %d sentences", lines.size(), sentences.size());
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
    Stopwatch sw = Stopwatch.createStarted();
    int tokenCount = 0;
    try (PrintWriter pw = new PrintWriter(out.toFile(), "UTF-8")) {
        for (String sentence : sentences) {
            sentence = TextUtil.normalizeApostrophes(sentence);
            sentence = TextUtil.normalizeQuotesHyphens(sentence);
            sentence = TextUtil.normalizeSpacesAndSoftHyphens(sentence);
            List<String> words = TurkishTokenizer.DEFAULT.tokenizeToStrings(sentence);
            tokenCount += words.size();
            NerSentence result = ner.findNamedEntities(sentence, words);
            pw.println(result.getAsTrainingSentence(annotationStyle));
        }
    }
    double secs = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
    Log.info("Token count = %s", tokenCount);
    Log.info("File processed in %.4f seconds.", secs);
    Log.info("Speed = %.2f tokens/sec", tokenCount / secs);
    Log.info("Result is written in %s", out);
}
Also used : Path(java.nio.file.Path) NerSentence(zemberek.ner.NerSentence) Stopwatch(com.google.common.base.Stopwatch) PerceptronNer(zemberek.ner.PerceptronNer) TurkishMorphology(zemberek.morphology.TurkishMorphology) PrintWriter(java.io.PrintWriter)

Example 3 with PerceptronNer

use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.

the class TrainNerModel method run.

@Override
public void run() throws Exception {
    initializeOutputDir();
    IOUtil.checkFileArgument(trainDataPath, "Training file");
    Path modelRoot = outDir.resolve("model");
    Path modelRootCompressed = outDir.resolve("model-compressed");
    Path logPath = outDir.resolve("train-log");
    Log.addFileHandler(logPath);
    if (developmentPath != null) {
        IOUtil.checkFileArgument(developmentPath, "Development file");
    }
    NerDataSet trainingSet = NerDataSet.load(trainDataPath, annotationStyle);
    Log.info("Training set information:");
    Log.info(trainingSet.info());
    NerDataSet devSet = null;
    if (developmentPath != null) {
        devSet = NerDataSet.load(developmentPath, annotationStyle);
        Log.info("Development set information:");
        Log.info(devSet.info());
    }
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    Log.info("------------ Training Started --------------------");
    PerceptronNer ner = new PerceptronNerTrainer(morphology).train(trainingSet, devSet, iterationCount, learningRate);
    Files.createDirectories(modelRoot);
    Files.createDirectories(modelRootCompressed);
    ner.saveModelAsText(modelRoot);
    ner.saveModelCompressed(modelRootCompressed);
    Log.info("Text model is created in %s", modelRoot);
    Log.info("Compressed model is created in %s", modelRootCompressed);
}
Also used : Path(java.nio.file.Path) NerDataSet(zemberek.ner.NerDataSet) PerceptronNer(zemberek.ner.PerceptronNer) PerceptronNerTrainer(zemberek.ner.PerceptronNerTrainer) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Example 4 with PerceptronNer

use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.

the class UseNer method main.

public static void main(String[] args) throws IOException {
    // assumes you generated a model in my-model directory.
    Path modelRoot = Paths.get("my-model");
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
    String sentence = "Ali Kaan yarın İstanbul'a gidecek.";
    NerSentence result = ner.findNamedEntities(sentence);
    List<NamedEntity> namedEntities = result.getNamedEntities();
    for (NamedEntity namedEntity : namedEntities) {
        System.out.println(namedEntity);
    }
}
Also used : Path(java.nio.file.Path) NerSentence(zemberek.ner.NerSentence) NamedEntity(zemberek.ner.NamedEntity) PerceptronNer(zemberek.ner.PerceptronNer) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Example 5 with PerceptronNer

use of zemberek.ner.PerceptronNer in project zemberek-nlp by ahmetaa.

the class GenerateNerModel method main.

public static void main(String[] args) throws IOException {
    // you will need ner-train and ner-test files to run this example.
    Path trainPath = Paths.get("ner-train");
    Path testPath = Paths.get("ner-test");
    Path modelRoot = Paths.get("my-model");
    NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
    // prints information
    Log.info(trainingSet.info());
    NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
    Log.info(testSet.info());
    TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
    // Training occurs here. Result is a PerceptronNer instance.
    // There will be 7 iterations with 0.1 learning rate.
    PerceptronNer ner = new PerceptronNerTrainer(morphology).train(trainingSet, testSet, 13, 0.1f);
    Files.createDirectories(modelRoot);
    ner.saveModelAsText(modelRoot);
}
Also used : Path(java.nio.file.Path) NerDataSet(zemberek.ner.NerDataSet) PerceptronNer(zemberek.ner.PerceptronNer) PerceptronNerTrainer(zemberek.ner.PerceptronNerTrainer) TurkishMorphology(zemberek.morphology.TurkishMorphology)

Aggregations

Path (java.nio.file.Path)5 TurkishMorphology (zemberek.morphology.TurkishMorphology)5 PerceptronNer (zemberek.ner.PerceptronNer)5 NerDataSet (zemberek.ner.NerDataSet)3 Stopwatch (com.google.common.base.Stopwatch)2 NerSentence (zemberek.ner.NerSentence)2 PerceptronNerTrainer (zemberek.ner.PerceptronNerTrainer)2 PrintWriter (java.io.PrintWriter)1 NamedEntity (zemberek.ner.NamedEntity)1 TestResult (zemberek.ner.PerceptronNerTrainer.TestResult)1