Search in sources :

Example 1 with Morphology

use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.

the class MorphaAnnotator method annotate.

@Override
public void annotate(Annotation annotation) {
    if (VERBOSE) {
        log.info("Finding lemmas ...");
    }
    Morphology morphology = new Morphology();
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            // log.info("Lemmatizing sentence: " + tokens);
            for (CoreLabel token : tokens) {
                String text = token.get(CoreAnnotations.TextAnnotation.class);
                String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                addLemma(morphology, CoreAnnotations.LemmaAnnotation.class, token, text, posTag);
            }
        }
    } else {
        throw new RuntimeException("Unable to find words/tokens in: " + annotation);
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Morphology(edu.stanford.nlp.process.Morphology) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 2 with Morphology

use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.

the class ParserGrammar method lemmatize.

/**
 * Only works on English, as it is hard coded for using the
 * Morphology class, which is English-only
 */
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
    List<TaggedWord> tagged;
    if (getOp().testOptions.preTag) {
        Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
        tagged = tagger.apply(tokens);
    } else {
        Tree tree = parse(tokens);
        tagged = tree.taggedYield();
    }
    Morphology morpha = new Morphology();
    List<CoreLabel> lemmas = Generics.newArrayList();
    for (TaggedWord token : tagged) {
        CoreLabel label = new CoreLabel();
        label.setWord(token.word());
        label.setTag(token.tag());
        morpha.stem(label);
        lemmas.add(label);
    }
    return lemmas;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TaggedWord(edu.stanford.nlp.ling.TaggedWord) Morphology(edu.stanford.nlp.process.Morphology) Tree(edu.stanford.nlp.trees.Tree) List(java.util.List)

Example 3 with Morphology

use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.

the class MaxentTagger method tagCoreLabelsOrHasWords.

public List<? extends HasWord> tagCoreLabelsOrHasWords(List<? extends HasWord> sentence, Morphology morpha, boolean outputLemmas) {
    if (sentence.size() > 0 && sentence.get(0) instanceof CoreLabel) {
        List<CoreLabel> coreLabels = castCoreLabels(sentence);
        tagCoreLabels(coreLabels);
        if (outputLemmas) {
            // thread-safe, so we would make a new one here
            if (morpha == null) {
                morpha = new Morphology();
            }
            lemmatize(coreLabels, morpha);
        }
        return coreLabels;
    } else {
        List<TaggedWord> taggedSentence = tagSentence(sentence, false);
        return taggedSentence;
    }
}
Also used : Morphology(edu.stanford.nlp.process.Morphology)

Example 4 with Morphology

use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.

the class MorphologyTest method testDash.

public void testDash() {
    Morphology morpha = new Morphology();
    morpha.stem("b-");
}
Also used : Morphology(edu.stanford.nlp.process.Morphology)

Example 5 with Morphology

use of edu.stanford.nlp.process.Morphology in project CoreNLP by stanfordnlp.

the class MaxentTagger method runTaggerStdin.

public void runTaggerStdin(BufferedReader reader, BufferedWriter writer, OutputStyle outputStyle) throws IOException {
    final TokenizerFactory<? extends HasWord> tokenizerFactory = chooseTokenizerFactory();
    // Counts
    long totalMillis = 0;
    int numWords = 0;
    int numSentences = 0;
    boolean outputVerbosity = config.getOutputVerbosity();
    boolean outputLemmas = config.getOutputLemmas();
    Morphology morpha = (outputLemmas) ? new Morphology() : null;
    if (outputStyle == OutputStyle.XML || outputStyle == OutputStyle.INLINE_XML) {
        writer.write("<?xml version=\"1.0\" encoding=\"" + config.getEncoding() + "\"?>\n");
        writer.write("<pos>\n");
    }
    String sentenceDelimiter = config.getSentenceDelimiter();
    if (sentenceDelimiter != null && sentenceDelimiter.equals("newline")) {
        sentenceDelimiter = "\n";
    }
    while (true) {
        // Now we do everything through the doc preprocessor
        final DocumentPreprocessor docProcessor;
        String line = reader.readLine();
        // this happens when we reach end of file
        if (line == null)
            break;
        docProcessor = new DocumentPreprocessor(new StringReader(line));
        docProcessor.setTokenizerFactory(tokenizerFactory);
        docProcessor.setSentenceDelimiter(sentenceDelimiter);
        if (config.keepEmptySentences()) {
            docProcessor.setKeepEmptySentences(true);
        }
        for (List<HasWord> sentence : docProcessor) {
            numWords += sentence.size();
            Timing t = new Timing();
            tagAndOutputSentence(sentence, outputLemmas, morpha, outputStyle, outputVerbosity, numSentences, "", writer);
            totalMillis += t.stop();
            writer.newLine();
            writer.flush();
            numSentences++;
        }
    }
    if (outputStyle == OutputStyle.XML || outputStyle == OutputStyle.INLINE_XML) {
        writer.write("</pos>\n");
    }
    writer.flush();
    printErrWordsPerSec(totalMillis, numWords);
}
Also used : Morphology(edu.stanford.nlp.process.Morphology) Timing(edu.stanford.nlp.util.Timing) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Aggregations

Morphology (edu.stanford.nlp.process.Morphology)7 CoreLabel (edu.stanford.nlp.ling.CoreLabel)2 TaggedWord (edu.stanford.nlp.ling.TaggedWord)2 Timing (edu.stanford.nlp.util.Timing)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 HasLemma (edu.stanford.nlp.ling.HasLemma)1 HasTag (edu.stanford.nlp.ling.HasTag)1 HasWord (edu.stanford.nlp.ling.HasWord)1 Label (edu.stanford.nlp.ling.Label)1 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)1 Tree (edu.stanford.nlp.trees.Tree)1 CoreMap (edu.stanford.nlp.util.CoreMap)1 MulticoreWrapper (edu.stanford.nlp.util.concurrent.MulticoreWrapper)1 List (java.util.List)1