use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.
the class MorphaAnnotator method annotate.
@Override
public void annotate(Annotation annotation) {
if (VERBOSE) {
log.info("Finding lemmas ...");
}
Morphology morphology = new Morphology();
if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// log.info("Lemmatizing sentence: " + tokens);
for (CoreLabel token : tokens) {
String text = token.get(CoreAnnotations.TextAnnotation.class);
String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
addLemma(morphology, CoreAnnotations.LemmaAnnotation.class, token, text, posTag);
}
}
} else {
throw new RuntimeException("Unable to find words/tokens in: " + annotation);
}
}
use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.
the class ParserGrammar method lemmatize.
/**
* Only works on English, as it is hard coded for using the
* Morphology class, which is English-only
*/
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
List<TaggedWord> tagged;
if (getOp().testOptions.preTag) {
Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
tagged = tagger.apply(tokens);
} else {
Tree tree = parse(tokens);
tagged = tree.taggedYield();
}
Morphology morpha = new Morphology();
List<CoreLabel> lemmas = Generics.newArrayList();
for (TaggedWord token : tagged) {
CoreLabel label = new CoreLabel();
label.setWord(token.word());
label.setTag(token.tag());
morpha.stem(label);
lemmas.add(label);
}
return lemmas;
}
use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.
the class MaxentTagger method tagCoreLabelsOrHasWords.
public List<? extends HasWord> tagCoreLabelsOrHasWords(List<? extends HasWord> sentence, Morphology morpha, boolean outputLemmas) {
if (sentence.size() > 0 && sentence.get(0) instanceof CoreLabel) {
List<CoreLabel> coreLabels = castCoreLabels(sentence);
tagCoreLabels(coreLabels);
if (outputLemmas) {
// thread-safe, so we would make a new one here
if (morpha == null) {
morpha = new Morphology();
}
lemmatize(coreLabels, morpha);
}
return coreLabels;
} else {
List<TaggedWord> taggedSentence = tagSentence(sentence, false);
return taggedSentence;
}
}
use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.
the class MorphologyTest method testDash.
public void testDash() {
Morphology morpha = new Morphology();
morpha.stem("b-");
}
use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.
the class MaxentTagger method runTaggerStdin.
public void runTaggerStdin(BufferedReader reader, BufferedWriter writer, OutputStyle outputStyle) throws IOException {
final TokenizerFactory<? extends HasWord> tokenizerFactory = chooseTokenizerFactory();
// Counts
long totalMillis = 0;
int numWords = 0;
int numSentences = 0;
boolean outputVerbosity = config.getOutputVerbosity();
boolean outputLemmas = config.getOutputLemmas();
Morphology morpha = (outputLemmas) ? new Morphology() : null;
if (outputStyle == OutputStyle.XML || outputStyle == OutputStyle.INLINE_XML) {
writer.write("<?xml version=\"1.0\" encoding=\"" + config.getEncoding() + "\"?>\n");
writer.write("<pos>\n");
}
String sentenceDelimiter = config.getSentenceDelimiter();
if (sentenceDelimiter != null && sentenceDelimiter.equals("newline")) {
sentenceDelimiter = "\n";
}
while (true) {
// Now we do everything through the doc preprocessor
final DocumentPreprocessor docProcessor;
String line = reader.readLine();
// this happens when we reach end of file
if (line == null)
break;
docProcessor = new DocumentPreprocessor(new StringReader(line));
docProcessor.setTokenizerFactory(tokenizerFactory);
docProcessor.setSentenceDelimiter(sentenceDelimiter);
if (config.keepEmptySentences()) {
docProcessor.setKeepEmptySentences(true);
}
for (List<HasWord> sentence : docProcessor) {
numWords += sentence.size();
Timing t = new Timing();
tagAndOutputSentence(sentence, outputLemmas, morpha, outputStyle, outputVerbosity, numSentences, "", writer);
totalMillis += t.stop();
writer.newLine();
writer.flush();
numSentences++;
}
}
if (outputStyle == OutputStyle.XML || outputStyle == OutputStyle.INLINE_XML) {
writer.write("</pos>\n");
}
writer.flush();
printErrWordsPerSec(totalMillis, numWords);
}
Aggregations