use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TSVTaggedFileReaderTest method testError.
public void testError() throws IOException {
File file = createBrokenFile();
TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
try {
for (List<TaggedWord> sentence : record.reader()) {
throw new AssertionError("Should have thrown an error " + " reading a file with no tags");
}
} catch (IllegalArgumentException e) {
// yay
}
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TSVTaggedFileReaderTest method testReadNormal.
public void testReadNormal() throws IOException {
File file = createTestFile();
TaggedFileRecord record = createRecord(file, "");
List<List<TaggedWord>> sentences = new ArrayList<List<TaggedWord>>();
for (List<TaggedWord> sentence : record.reader()) {
sentences.add(sentence);
}
assertEquals(3, sentences.size());
assertEquals(3, sentences.get(0).size());
assertEquals("A", sentences.get(0).get(0).word());
assertEquals("B", sentences.get(0).get(1).word());
assertEquals("C", sentences.get(0).get(2).word());
assertEquals("D", sentences.get(1).get(0).word());
assertEquals("E", sentences.get(1).get(1).word());
assertEquals("F", sentences.get(2).get(0).word());
assertEquals("1", sentences.get(0).get(0).tag());
assertEquals("2", sentences.get(0).get(1).tag());
assertEquals("3", sentences.get(0).get(2).tag());
assertEquals("4", sentences.get(1).get(0).tag());
assertEquals("5", sentences.get(1).get(1).tag());
assertEquals("6", sentences.get(2).get(0).tag());
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TestClassifier method test.
/**
* Test on a file containing correct tags already. when init'ing from trees
* TODO: Add the ability to have a second transformer to transform output back; possibly combine this method
* with method below
*/
private void test() throws IOException {
numSentences = 0;
confusionMatrix = new ConfusionMatrix<>();
PrintFile pf = null;
PrintFile pf1 = null;
PrintFile pf3 = null;
if (writeWords)
pf = new PrintFile(saveRoot + ".words");
if (writeUnknDict)
pf1 = new PrintFile(saveRoot + ".un.dict");
if (writeTopWords)
pf3 = new PrintFile(saveRoot + ".words.top");
boolean verboseResults = config.getVerboseResults();
if (config.getNThreads() != 1) {
MulticoreWrapper<List<TaggedWord>, TestSentence> wrapper = new MulticoreWrapper<>(config.getNThreads(), new TestSentenceProcessor(maxentTagger));
for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
wrapper.put(taggedSentence);
while (wrapper.peek()) {
processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
}
}
wrapper.join();
while (wrapper.peek()) {
processResults(wrapper.poll(), pf, pf1, pf3, verboseResults);
}
} else {
for (List<TaggedWord> taggedSentence : fileRecord.reader()) {
TestSentence testS = new TestSentence(maxentTagger);
testS.setCorrectTags(taggedSentence);
testS.tagSentence(taggedSentence, false);
processResults(testS, pf, pf1, pf3, verboseResults);
}
}
if (pf != null)
pf.close();
if (pf1 != null)
pf1.close();
if (pf3 != null)
pf3.close();
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TaggerDemo method main.
public static void main(String[] args) throws Exception {
if (args.length != 2) {
log.info("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
for (List<HasWord> sentence : sentences) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
System.out.println(SentenceUtils.listToString(tSentence, false));
}
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TaggerDemo2 method main.
public static void main(String[] args) throws Exception {
if (args.length != 2) {
log.info("usage: java TaggerDemo2 modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : documentPreprocessor) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
pw.println(SentenceUtils.listToString(tSentence, false));
}
// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
List<TaggedWord> taggedSent = tagger.tagSentence(sent);
for (TaggedWord tw : taggedSent) {
if (tw.tag().startsWith("JJ")) {
pw.println(tw.word());
}
}
pw.close();
}
Aggregations