Search in sources :

Example 16 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class TSVTaggedFileReaderTest method testError.

public void testError() throws IOException {
    File file = createBrokenFile();
    TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
    try {
        for (List<TaggedWord> sentence : record.reader()) {
            throw new AssertionError("Should have thrown an error " + " reading a file with no tags");
        }
    } catch (IllegalArgumentException e) {
    // yay
    }
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) File(java.io.File)

Example 17 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class TSVTaggedFileReaderTest method testReadNormal.

public void testReadNormal() throws IOException {
    File file = createTestFile();
    TaggedFileRecord record = createRecord(file, "");
    List<List<TaggedWord>> sentences = new ArrayList<List<TaggedWord>>();
    for (List<TaggedWord> sentence : record.reader()) {
        sentences.add(sentence);
    }
    assertEquals(3, sentences.size());
    assertEquals(3, sentences.get(0).size());
    assertEquals("A", sentences.get(0).get(0).word());
    assertEquals("B", sentences.get(0).get(1).word());
    assertEquals("C", sentences.get(0).get(2).word());
    assertEquals("D", sentences.get(1).get(0).word());
    assertEquals("E", sentences.get(1).get(1).word());
    assertEquals("F", sentences.get(2).get(0).word());
    assertEquals("1", sentences.get(0).get(0).tag());
    assertEquals("2", sentences.get(0).get(1).tag());
    assertEquals("3", sentences.get(0).get(2).tag());
    assertEquals("4", sentences.get(1).get(0).tag());
    assertEquals("5", sentences.get(1).get(1).tag());
    assertEquals("6", sentences.get(2).get(0).tag());
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File)

Example 18 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class TestClassifier method test.

/**
   * Test on a file containing correct tags already. when init'ing from trees
   * TODO: Add the ability to have a second transformer to transform output back; possibly combine this method
   * with method below
   */
private void test() throws IOException {
    numSentences = 0;
    confusionMatrix = new ConfusionMatrix<>();
    PrintFile pf = null;
    PrintFile pf1 = null;
    PrintFile pf3 = null;
    if (writeWords)
        pf = new PrintFile(saveRoot + ".words");
    if (writeUnknDict)
        pf1 = new PrintFile(saveRoot + ".un.dict");
    if (writeTopWords)
        pf3 = new PrintFile(saveRoot + ".words.top");
    boolean verboseResults = config.getVerboseResults();
    if (config.getNThreads() != 1) {
        MulticoreWrapper<List<TaggedWord>, TestSentence> wrapper = new MulticoreWrapper<>(config.getNThreads(), new TestSentenceProcessor(maxentTagger));
        for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
            wrapper.put(taggedSentence);
            while (wrapper.peek()) {
                processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
            }
        }
        wrapper.join();
        while (wrapper.peek()) {
            processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
        }
    } else {
        for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
            TestSentence testS = new TestSentence(maxentTagger);
            testS.setCorrectTags(taggedSentence);
            testS.tagSentence(taggedSentence, false);
            processResults(testS, pf, pf1, pf3, verboseResults);
        }
    }
    if (pf != null)
        pf.close();
    if (pf1 != null)
        pf1.close();
    if (pf3 != null)
        pf3.close();
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) PrintFile(edu.stanford.nlp.io.PrintFile) List(java.util.List)

Example 19 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class TaggerDemo method main.

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        log.info("usage: java TaggerDemo modelFile fileToTag");
        return;
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
    for (List<HasWord> sentence : sentences) {
        List<TaggedWord> tSentence = tagger.tagSentence(sentence);
        System.out.println(SentenceUtils.listToString(tSentence, false));
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) List(java.util.List)

Example 20 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class TaggerDemo2 method main.

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        log.info("usage: java TaggerDemo2 modelFile fileToTag");
        return;
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
    BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    for (List<HasWord> sentence : documentPreprocessor) {
        List<TaggedWord> tSentence = tagger.tagSentence(sentence);
        pw.println(SentenceUtils.listToString(tSentence, false));
    }
    // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord tw : taggedSent) {
        if (tw.tag().startsWith("JJ")) {
            pw.println(tw.word());
        }
    }
    pw.close();
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabelTokenFactory(edu.stanford.nlp.process.CoreLabelTokenFactory) InputStreamReader(java.io.InputStreamReader) FileInputStream(java.io.FileInputStream) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TaggedWord(edu.stanford.nlp.ling.TaggedWord) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor) PrintWriter(java.io.PrintWriter)

Aggregations

TaggedWord (edu.stanford.nlp.ling.TaggedWord)43 HasWord (edu.stanford.nlp.ling.HasWord)9 CoreLabel (edu.stanford.nlp.ling.CoreLabel)5 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)5 MaxentTagger (edu.stanford.nlp.tagger.maxent.MaxentTagger)5 Tree (edu.stanford.nlp.trees.Tree)5 ArrayList (java.util.ArrayList)5 Label (edu.stanford.nlp.ling.Label)4 WordTag (edu.stanford.nlp.ling.WordTag)4 List (java.util.List)4 HasTag (edu.stanford.nlp.ling.HasTag)3 TaggedFileRecord (edu.stanford.nlp.tagger.io.TaggedFileRecord)3 File (java.io.File)3 StringReader (java.io.StringReader)3 Word (edu.stanford.nlp.ling.Word)2 Morphology (edu.stanford.nlp.process.Morphology)2 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)2 GrammaticalStructure (edu.stanford.nlp.trees.GrammaticalStructure)2 BufferedReader (java.io.BufferedReader)2 IOException (java.io.IOException)2