use of zemberek.ner.NerSentence in project zemberek-nlp by ahmetaa.
the class FindNamedEntities method run.
@Override
public void run() throws Exception {
initializeOutputDir();
IOUtil.checkDirectoryArgument(modelRoot, "Model Root");
IOUtil.checkFileArgument(inputPath, "Input File");
Path out = outDir.resolve(inputPath.toFile().getName() + ".ne");
List<String> lines = Files.readAllLines(inputPath, StandardCharsets.UTF_8);
List<String> sentences = TurkishSentenceExtractor.DEFAULT.fromParagraphs(lines);
Log.info("There are %d lines and about %d sentences", lines.size(), sentences.size());
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
Stopwatch sw = Stopwatch.createStarted();
int tokenCount = 0;
try (PrintWriter pw = new PrintWriter(out.toFile(), "UTF-8")) {
for (String sentence : sentences) {
sentence = TextUtil.normalizeApostrophes(sentence);
sentence = TextUtil.normalizeQuotesHyphens(sentence);
sentence = TextUtil.normalizeSpacesAndSoftHyphens(sentence);
List<String> words = TurkishTokenizer.DEFAULT.tokenizeToStrings(sentence);
tokenCount += words.size();
NerSentence result = ner.findNamedEntities(sentence, words);
pw.println(result.getAsTrainingSentence(annotationStyle));
}
}
double secs = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
Log.info("Token count = %s", tokenCount);
Log.info("File processed in %.4f seconds.", secs);
Log.info("Speed = %.2f tokens/sec", tokenCount / secs);
Log.info("Result is written in %s", out);
}
use of zemberek.ner.NerSentence in project zemberek-nlp by ahmetaa.
the class UseNer method main.
public static void main(String[] args) throws IOException {
// assumes you generated a model in my-model directory.
Path modelRoot = Paths.get("my-model");
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
String sentence = "Ali Kaan yarın İstanbul'a gidecek.";
NerSentence result = ner.findNamedEntities(sentence);
List<NamedEntity> namedEntities = result.getNamedEntities();
for (NamedEntity namedEntity : namedEntities) {
System.out.println(namedEntity);
}
}
Aggregations