Search in sources :

Example 1 with WordTag

use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.

the class Morphology method apply.

@Override
public Object apply(Object in) {
    if (in instanceof WordTag) {
        WordTag wt = (WordTag) in;
        String tag = wt.tag();
        return new WordTag(lemmatize(wt.word(), tag, lexer, lexer.option(1)), tag);
    }
    if (in instanceof Word) {
        return stem((Word) in);
    }
    return in;
}
Also used : Word(edu.stanford.nlp.ling.Word) WordTag(edu.stanford.nlp.ling.WordTag)

Example 2 with WordTag

use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.

the class ChineseUnknownWordModel method main.

public static void main(String[] args) {
    System.out.println("Testing unknown matching");
    String s = "刘·革命";
    if (s.matches(properNameMatch)) {
        System.out.println("hooray names!");
    } else {
        System.out.println("Uh-oh names!");
    }
    String s1 = "3000";
    if (s1.matches(numberMatch)) {
        System.out.println("hooray numbers!");
    } else {
        System.out.println("Uh-oh numbers!");
    }
    String s11 = "百分之四十三点二";
    if (s11.matches(numberMatch)) {
        System.out.println("hooray numbers!");
    } else {
        System.out.println("Uh-oh numbers!");
    }
    String s12 = "百分之三十八点六";
    if (s12.matches(numberMatch)) {
        System.out.println("hooray numbers!");
    } else {
        System.out.println("Uh-oh numbers!");
    }
    String s2 = "三月";
    if (s2.matches(dateMatch)) {
        System.out.println("hooray dates!");
    } else {
        System.out.println("Uh-oh dates!");
    }
    System.out.println("Testing tagged word");
    ClassicCounter<TaggedWord> c = new ClassicCounter<>();
    TaggedWord tw1 = new TaggedWord("w", "t");
    c.incrementCount(tw1);
    TaggedWord tw2 = new TaggedWord("w", "t2");
    System.out.println(c.containsKey(tw2));
    System.out.println(tw1.equals(tw2));
    WordTag wt1 = toWordTag(tw1);
    WordTag wt2 = toWordTag(tw2);
    WordTag wt3 = new WordTag("w", "t2");
    System.out.println(wt1.equals(wt2));
    System.out.println(wt2.equals(wt3));
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) WordTag(edu.stanford.nlp.ling.WordTag)

Example 3 with WordTag

use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.

the class PairsHolderTest method testPairsHolder.

public void testPairsHolder() {
    PairsHolder pairsHolder = new PairsHolder();
    for (int i = 0; i < 10; i++) {
        pairsHolder.add(new WordTag("girl", "NN"));
    }
    MaxentTagger maxentTagger = new MaxentTagger();
    maxentTagger.init(null);
    //maxentTagger.pairs = pairsHolder;
    History h = new History(0, 5, 3, pairsHolder, maxentTagger.extractors);
    TaggerExperiments te = new TaggerExperiments(maxentTagger);
    int x = te.getHistoryTable().add(h);
    //int x = maxentTagger.tHistories.add(h);
    int y = te.getHistoryTable().getIndex(h);
    //int y = maxentTagger.tHistories.getIndex(h);
    assertEquals("Failing to get same index for history", x, y);
    Extractor e = new Extractor(0, false);
    String k = e.extract(h);
    assertEquals("Extractor didn't find stored word", k, "girl");
}
Also used : WordTag(edu.stanford.nlp.ling.WordTag)

Example 4 with WordTag

use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.

the class ReadDataTagged method loadFile.

private void loadFile(TaggedFileReader reader, Map<String, IntCounter<String>> wordTagCounts) {
    log.info("Loading tagged words from " + reader.filename());
    ArrayList<String> words = new ArrayList<>();
    ArrayList<String> tags = new ArrayList<>();
    int numSentences = 0;
    int numWords = 0;
    int maxLen = Integer.MIN_VALUE;
    int minLen = Integer.MAX_VALUE;
    for (List<TaggedWord> sentence : reader) {
        if (maxentTagger.wordFunction != null) {
            List<TaggedWord> newSentence = new ArrayList<>(sentence.size());
            for (TaggedWord word : sentence) {
                TaggedWord newWord = new TaggedWord(maxentTagger.wordFunction.apply(word.word()), word.tag());
                newSentence.add(newWord);
            }
            sentence = newSentence;
        }
        for (TaggedWord tw : sentence) {
            if (tw != null) {
                words.add(tw.word());
                tags.add(tw.tag());
                if (!maxentTagger.tagTokens.containsKey(tw.tag())) {
                    maxentTagger.tagTokens.put(tw.tag(), Generics.<String>newHashSet());
                }
                maxentTagger.tagTokens.get(tw.tag()).add(tw.word());
            }
        }
        maxLen = (sentence.size() > maxLen ? sentence.size() : maxLen);
        minLen = (sentence.size() < minLen ? sentence.size() : minLen);
        words.add(Tagger.EOS_WORD);
        tags.add(Tagger.EOS_TAG);
        numElements = numElements + sentence.size() + 1;
        // iterate over the words in the sentence
        for (int i = 0; i < sentence.size() + 1; i++) {
            History h = new History(totalWords + totalSentences, totalWords + totalSentences + sentence.size(), totalWords + totalSentences + i, pairs, maxentTagger.extractors);
            String tag = tags.get(i);
            String word = words.get(i);
            pairs.add(new WordTag(word, tag));
            int y = maxentTagger.addTag(tag);
            DataWordTag dat = new DataWordTag(h, y, tag);
            v.add(dat);
            IntCounter<String> tagCounts = wordTagCounts.get(word);
            if (tagCounts == null) {
                tagCounts = new IntCounter<>();
                wordTagCounts.put(word, tagCounts);
            }
            tagCounts.incrementCount(tag, 1);
        }
        totalSentences++;
        totalWords += sentence.size();
        numSentences++;
        numWords += sentence.size();
        words.clear();
        tags.clear();
        if ((numSentences % 100000) == 0)
            log.info("Read " + numSentences + " sentences, min " + minLen + " words, max " + maxLen + " words ... [still reading]");
    }
    log.info("Read " + numWords + " words from " + reader.filename() + " [done].");
    log.info("Read " + numSentences + " sentences, min " + minLen + " words, max " + maxLen + " words.");
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) ArrayList(java.util.ArrayList) WordTag(edu.stanford.nlp.ling.WordTag)

Example 5 with WordTag

use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.

the class CollocationFinder method getStemmedWordTagsFromTree.

/**
   *
   * @param t a tree
   * @return the WordTags corresponding to the leaves of the tree,
   * stemmed according to their POS tags in the tree.
   */
private static List<WordTag> getStemmedWordTagsFromTree(Tree t) {
    List<WordTag> stemmedWordTags = Generics.newArrayList();
    ArrayList<TaggedWord> s = t.taggedYield();
    for (TaggedWord w : s) {
        WordTag wt = Morphology.stemStatic(w.word(), w.tag());
        stemmedWordTags.add(wt);
    }
    return stemmedWordTags;
}
Also used : TaggedWord(edu.stanford.nlp.ling.TaggedWord) WordTag(edu.stanford.nlp.ling.WordTag)

Aggregations

WordTag (edu.stanford.nlp.ling.WordTag)8 TaggedWord (edu.stanford.nlp.ling.TaggedWord)4 Word (edu.stanford.nlp.ling.Word)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 ArrayList (java.util.ArrayList)1