use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.
the class Morphology method apply.
@Override
public Object apply(Object in) {
if (in instanceof WordTag) {
WordTag wt = (WordTag) in;
String tag = wt.tag();
return new WordTag(lemmatize(wt.word(), tag, lexer, lexer.option(1)), tag);
}
if (in instanceof Word) {
return stem((Word) in);
}
return in;
}
use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.
the class ChineseUnknownWordModel method main.
public static void main(String[] args) {
System.out.println("Testing unknown matching");
String s = "刘·革命";
if (s.matches(properNameMatch)) {
System.out.println("hooray names!");
} else {
System.out.println("Uh-oh names!");
}
String s1 = "3000";
if (s1.matches(numberMatch)) {
System.out.println("hooray numbers!");
} else {
System.out.println("Uh-oh numbers!");
}
String s11 = "百分之四十三点二";
if (s11.matches(numberMatch)) {
System.out.println("hooray numbers!");
} else {
System.out.println("Uh-oh numbers!");
}
String s12 = "百分之三十八点六";
if (s12.matches(numberMatch)) {
System.out.println("hooray numbers!");
} else {
System.out.println("Uh-oh numbers!");
}
String s2 = "三月";
if (s2.matches(dateMatch)) {
System.out.println("hooray dates!");
} else {
System.out.println("Uh-oh dates!");
}
System.out.println("Testing tagged word");
ClassicCounter<TaggedWord> c = new ClassicCounter<>();
TaggedWord tw1 = new TaggedWord("w", "t");
c.incrementCount(tw1);
TaggedWord tw2 = new TaggedWord("w", "t2");
System.out.println(c.containsKey(tw2));
System.out.println(tw1.equals(tw2));
WordTag wt1 = toWordTag(tw1);
WordTag wt2 = toWordTag(tw2);
WordTag wt3 = new WordTag("w", "t2");
System.out.println(wt1.equals(wt2));
System.out.println(wt2.equals(wt3));
}
use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.
the class PairsHolderTest method testPairsHolder.
public void testPairsHolder() {
PairsHolder pairsHolder = new PairsHolder();
for (int i = 0; i < 10; i++) {
pairsHolder.add(new WordTag("girl", "NN"));
}
MaxentTagger maxentTagger = new MaxentTagger();
maxentTagger.init(null);
//maxentTagger.pairs = pairsHolder;
History h = new History(0, 5, 3, pairsHolder, maxentTagger.extractors);
TaggerExperiments te = new TaggerExperiments(maxentTagger);
int x = te.getHistoryTable().add(h);
//int x = maxentTagger.tHistories.add(h);
int y = te.getHistoryTable().getIndex(h);
//int y = maxentTagger.tHistories.getIndex(h);
assertEquals("Failing to get same index for history", x, y);
Extractor e = new Extractor(0, false);
String k = e.extract(h);
assertEquals("Extractor didn't find stored word", k, "girl");
}
use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.
the class ReadDataTagged method loadFile.
private void loadFile(TaggedFileReader reader, Map<String, IntCounter<String>> wordTagCounts) {
log.info("Loading tagged words from " + reader.filename());
ArrayList<String> words = new ArrayList<>();
ArrayList<String> tags = new ArrayList<>();
int numSentences = 0;
int numWords = 0;
int maxLen = Integer.MIN_VALUE;
int minLen = Integer.MAX_VALUE;
for (List<TaggedWord> sentence : reader) {
if (maxentTagger.wordFunction != null) {
List<TaggedWord> newSentence = new ArrayList<>(sentence.size());
for (TaggedWord word : sentence) {
TaggedWord newWord = new TaggedWord(maxentTagger.wordFunction.apply(word.word()), word.tag());
newSentence.add(newWord);
}
sentence = newSentence;
}
for (TaggedWord tw : sentence) {
if (tw != null) {
words.add(tw.word());
tags.add(tw.tag());
if (!maxentTagger.tagTokens.containsKey(tw.tag())) {
maxentTagger.tagTokens.put(tw.tag(), Generics.<String>newHashSet());
}
maxentTagger.tagTokens.get(tw.tag()).add(tw.word());
}
}
maxLen = (sentence.size() > maxLen ? sentence.size() : maxLen);
minLen = (sentence.size() < minLen ? sentence.size() : minLen);
words.add(Tagger.EOS_WORD);
tags.add(Tagger.EOS_TAG);
numElements = numElements + sentence.size() + 1;
// iterate over the words in the sentence
for (int i = 0; i < sentence.size() + 1; i++) {
History h = new History(totalWords + totalSentences, totalWords + totalSentences + sentence.size(), totalWords + totalSentences + i, pairs, maxentTagger.extractors);
String tag = tags.get(i);
String word = words.get(i);
pairs.add(new WordTag(word, tag));
int y = maxentTagger.addTag(tag);
DataWordTag dat = new DataWordTag(h, y, tag);
v.add(dat);
IntCounter<String> tagCounts = wordTagCounts.get(word);
if (tagCounts == null) {
tagCounts = new IntCounter<>();
wordTagCounts.put(word, tagCounts);
}
tagCounts.incrementCount(tag, 1);
}
totalSentences++;
totalWords += sentence.size();
numSentences++;
numWords += sentence.size();
words.clear();
tags.clear();
if ((numSentences % 100000) == 0)
log.info("Read " + numSentences + " sentences, min " + minLen + " words, max " + maxLen + " words ... [still reading]");
}
log.info("Read " + numWords + " words from " + reader.filename() + " [done].");
log.info("Read " + numSentences + " sentences, min " + minLen + " words, max " + maxLen + " words.");
}
use of edu.stanford.nlp.ling.WordTag in project CoreNLP by stanfordnlp.
the class CollocationFinder method getStemmedWordTagsFromTree.
/**
*
* @param t a tree
* @return the WordTags corresponding to the leaves of the tree,
* stemmed according to their POS tags in the tree.
*/
private static List<WordTag> getStemmedWordTagsFromTree(Tree t) {
List<WordTag> stemmedWordTags = Generics.newArrayList();
ArrayList<TaggedWord> s = t.taggedYield();
for (TaggedWord w : s) {
WordTag wt = Morphology.stemStatic(w.word(), w.tag());
stemmedWordTags.add(wt);
}
return stemmedWordTags;
}
Aggregations