Search in sources :

Example 11 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class AbstractUnknownWordModelTrainer method train.

@Override
public final void train(Tree tree, double weight) {
    incrementTreesRead(weight);
    int loc = 0;
    List<TaggedWord> yield = tree.taggedYield();
    for (TaggedWord tw : yield) {
        train(tw, loc, weight);
        ++loc;
    }
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord)

Example 12 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class ChineseMaxentLexicon method testOnTreebank.

private int[] testOnTreebank(Collection<TaggedWord> testWords) {
    int[] totalAndCorrect = new int[2];
    totalAndCorrect[0] = 0;
    totalAndCorrect[1] = 0;
    for (TaggedWord word : testWords) {
        String goldTag = word.tag();
        String guessTag = ctlp.basicCategory(getTag(word.word()));
        totalAndCorrect[0]++;
        if (goldTag.equals(guessTag)) {
            totalAndCorrect[1]++;
        }
    }
    return totalAndCorrect;
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord)

Example 13 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class ChineseMaxentLexicon method main.

public static void main(String[] args) {
    TreebankLangParserParams tlpParams = new ChineseTreebankParserParams();
    TreebankLanguagePack ctlp = tlpParams.treebankLanguagePack();
    Options op = new Options(tlpParams);
    TreeAnnotator ta = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);
    log.info("Reading Trees...");
    FileFilter trainFilter = new NumberRangesFileFilter(args[1], true);
    Treebank trainTreebank = tlpParams.memoryTreebank();
    trainTreebank.loadPath(args[0], trainFilter);
    log.info("Annotating trees...");
    Collection<Tree> trainTrees = new ArrayList<>();
    for (Tree tree : trainTreebank) {
        trainTrees.add(ta.transformTree(tree));
    }
    // saves memory
    trainTreebank = null;
    log.info("Training lexicon...");
    Index<String> wordIndex = new HashIndex<>();
    Index<String> tagIndex = new HashIndex<>();
    int featureLevel = DEFAULT_FEATURE_LEVEL;
    if (args.length > 3) {
        featureLevel = Integer.parseInt(args[3]);
    }
    ChineseMaxentLexicon lex = new ChineseMaxentLexicon(op, wordIndex, tagIndex, featureLevel);
    lex.initializeTraining(trainTrees.size());
    lex.train(trainTrees);
    lex.finishTraining();
    log.info("Testing");
    FileFilter testFilter = new NumberRangesFileFilter(args[2], true);
    Treebank testTreebank = tlpParams.memoryTreebank();
    testTreebank.loadPath(args[0], testFilter);
    List<TaggedWord> testWords = new ArrayList<>();
    for (Tree t : testTreebank) {
        for (TaggedWord tw : t.taggedYield()) {
            testWords.add(tw);
        }
    //testWords.addAll(t.taggedYield());
    }
    int[] totalAndCorrect = lex.testOnTreebank(testWords);
    log.info("done.");
    System.out.println(totalAndCorrect[1] + " correct out of " + totalAndCorrect[0] + " -- ACC: " + ((double) totalAndCorrect[1]) / totalAndCorrect[0]);
}
Also used : NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter) Treebank(edu.stanford.nlp.trees.Treebank) TaggedWord(edu.stanford.nlp.ling.TaggedWord) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter)

Example 14 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class ChineseUnknownWordModel method main.

public static void main(String[] args) {
    System.out.println("Testing unknown matching");
    String s = "刘·革命";
    if (s.matches(properNameMatch)) {
        System.out.println("hooray names!");
    } else {
        System.out.println("Uh-oh names!");
    }
    String s1 = "3000";
    if (s1.matches(numberMatch)) {
        System.out.println("hooray numbers!");
    } else {
        System.out.println("Uh-oh numbers!");
    }
    String s11 = "百分之四十三点二";
    if (s11.matches(numberMatch)) {
        System.out.println("hooray numbers!");
    } else {
        System.out.println("Uh-oh numbers!");
    }
    String s12 = "百分之三十八点六";
    if (s12.matches(numberMatch)) {
        System.out.println("hooray numbers!");
    } else {
        System.out.println("Uh-oh numbers!");
    }
    String s2 = "三月";
    if (s2.matches(dateMatch)) {
        System.out.println("hooray dates!");
    } else {
        System.out.println("Uh-oh dates!");
    }
    System.out.println("Testing tagged word");
    ClassicCounter<TaggedWord> c = new ClassicCounter<>();
    TaggedWord tw1 = new TaggedWord("w", "t");
    c.incrementCount(tw1);
    TaggedWord tw2 = new TaggedWord("w", "t2");
    System.out.println(c.containsKey(tw2));
    System.out.println(tw1.equals(tw2));
    WordTag wt1 = toWordTag(tw1);
    WordTag wt2 = toWordTag(tw2);
    WordTag wt3 = new WordTag("w", "t2");
    System.out.println(wt1.equals(wt2));
    System.out.println(wt2.equals(wt3));
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) WordTag(edu.stanford.nlp.ling.WordTag)

Example 15 with TaggedWord

use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.

the class TSVTaggedFileReaderTest method testError.

public void testError() throws IOException {
    File file = createBrokenFile();
    TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
    try {
        for (List<TaggedWord> sentence : record.reader()) {
            throw new AssertionError("Should have thrown an error " + " reading a file with no tags");
        }
    } catch (IllegalArgumentException e) {
    // yay
    }
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) File(java.io.File)

Aggregations

TaggedWord (edu.stanford.nlp.ling.TaggedWord)43 HasWord (edu.stanford.nlp.ling.HasWord)9 CoreLabel (edu.stanford.nlp.ling.CoreLabel)5 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)5 MaxentTagger (edu.stanford.nlp.tagger.maxent.MaxentTagger)5 Tree (edu.stanford.nlp.trees.Tree)5 ArrayList (java.util.ArrayList)5 Label (edu.stanford.nlp.ling.Label)4 WordTag (edu.stanford.nlp.ling.WordTag)4 List (java.util.List)4 HasTag (edu.stanford.nlp.ling.HasTag)3 TaggedFileRecord (edu.stanford.nlp.tagger.io.TaggedFileRecord)3 File (java.io.File)3 StringReader (java.io.StringReader)3 Word (edu.stanford.nlp.ling.Word)2 Morphology (edu.stanford.nlp.process.Morphology)2 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)2 GrammaticalStructure (edu.stanford.nlp.trees.GrammaticalStructure)2 BufferedReader (java.io.BufferedReader)2 IOException (java.io.IOException)2