use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class AbstractUnknownWordModelTrainer method train.
@Override
public final void train(Tree tree, double weight) {
incrementTreesRead(weight);
int loc = 0;
List<TaggedWord> yield = tree.taggedYield();
for (TaggedWord tw : yield) {
train(tw, loc, weight);
++loc;
}
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class ChineseMaxentLexicon method testOnTreebank.
private int[] testOnTreebank(Collection<TaggedWord> testWords) {
int[] totalAndCorrect = new int[2];
totalAndCorrect[0] = 0;
totalAndCorrect[1] = 0;
for (TaggedWord word : testWords) {
String goldTag = word.tag();
String guessTag = ctlp.basicCategory(getTag(word.word()));
totalAndCorrect[0]++;
if (goldTag.equals(guessTag)) {
totalAndCorrect[1]++;
}
}
return totalAndCorrect;
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class ChineseMaxentLexicon method main.
public static void main(String[] args) {
TreebankLangParserParams tlpParams = new ChineseTreebankParserParams();
TreebankLanguagePack ctlp = tlpParams.treebankLanguagePack();
Options op = new Options(tlpParams);
TreeAnnotator ta = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);
log.info("Reading Trees...");
FileFilter trainFilter = new NumberRangesFileFilter(args[1], true);
Treebank trainTreebank = tlpParams.memoryTreebank();
trainTreebank.loadPath(args[0], trainFilter);
log.info("Annotating trees...");
Collection<Tree> trainTrees = new ArrayList<>();
for (Tree tree : trainTreebank) {
trainTrees.add(ta.transformTree(tree));
}
// saves memory
trainTreebank = null;
log.info("Training lexicon...");
Index<String> wordIndex = new HashIndex<>();
Index<String> tagIndex = new HashIndex<>();
int featureLevel = DEFAULT_FEATURE_LEVEL;
if (args.length > 3) {
featureLevel = Integer.parseInt(args[3]);
}
ChineseMaxentLexicon lex = new ChineseMaxentLexicon(op, wordIndex, tagIndex, featureLevel);
lex.initializeTraining(trainTrees.size());
lex.train(trainTrees);
lex.finishTraining();
log.info("Testing");
FileFilter testFilter = new NumberRangesFileFilter(args[2], true);
Treebank testTreebank = tlpParams.memoryTreebank();
testTreebank.loadPath(args[0], testFilter);
List<TaggedWord> testWords = new ArrayList<>();
for (Tree t : testTreebank) {
for (TaggedWord tw : t.taggedYield()) {
testWords.add(tw);
}
//testWords.addAll(t.taggedYield());
}
int[] totalAndCorrect = lex.testOnTreebank(testWords);
log.info("done.");
System.out.println(totalAndCorrect[1] + " correct out of " + totalAndCorrect[0] + " -- ACC: " + ((double) totalAndCorrect[1]) / totalAndCorrect[0]);
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class ChineseUnknownWordModel method main.
public static void main(String[] args) {
System.out.println("Testing unknown matching");
String s = "刘·革命";
if (s.matches(properNameMatch)) {
System.out.println("hooray names!");
} else {
System.out.println("Uh-oh names!");
}
String s1 = "3000";
if (s1.matches(numberMatch)) {
System.out.println("hooray numbers!");
} else {
System.out.println("Uh-oh numbers!");
}
String s11 = "百分之四十三点二";
if (s11.matches(numberMatch)) {
System.out.println("hooray numbers!");
} else {
System.out.println("Uh-oh numbers!");
}
String s12 = "百分之三十八点六";
if (s12.matches(numberMatch)) {
System.out.println("hooray numbers!");
} else {
System.out.println("Uh-oh numbers!");
}
String s2 = "三月";
if (s2.matches(dateMatch)) {
System.out.println("hooray dates!");
} else {
System.out.println("Uh-oh dates!");
}
System.out.println("Testing tagged word");
ClassicCounter<TaggedWord> c = new ClassicCounter<>();
TaggedWord tw1 = new TaggedWord("w", "t");
c.incrementCount(tw1);
TaggedWord tw2 = new TaggedWord("w", "t2");
System.out.println(c.containsKey(tw2));
System.out.println(tw1.equals(tw2));
WordTag wt1 = toWordTag(tw1);
WordTag wt2 = toWordTag(tw2);
WordTag wt3 = new WordTag("w", "t2");
System.out.println(wt1.equals(wt2));
System.out.println(wt2.equals(wt3));
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TSVTaggedFileReaderTest method testError.
public void testError() throws IOException {
File file = createBrokenFile();
TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
try {
for (List<TaggedWord> sentence : record.reader()) {
throw new AssertionError("Should have thrown an error " + " reading a file with no tags");
}
} catch (IllegalArgumentException e) {
// yay
}
}
Aggregations