use of edu.stanford.nlp.ling.BasicDocument in project CoreNLP by stanfordnlp.
the class WordToTaggedWordProcessor method main.
/**
* This will print out some text, recognizing tags. It can be used to
* test tag breaking. <br> Usage: <code>
* java edu.stanford.nlp.process.WordToTaggedWordProcessor fileOrUrl
* </code>
*
* @param args Command line argument: a file or URL
*/
public static void main(String[] args) {
if (args.length != 1) {
System.out.println("usage: java edu.stanford.nlp.process.WordToTaggedWordProcessor fileOrUrl");
System.exit(0);
}
String filename = args[0];
try {
Document<HasWord, Word, Word> d;
if (filename.startsWith("http://")) {
Document<HasWord, Word, Word> dpre = new BasicDocument<HasWord>().init(new URL(filename));
DocumentProcessor<Word, Word, HasWord, Word> notags = new StripTagsProcessor<>();
d = notags.processDocument(dpre);
} else {
d = new BasicDocument<HasWord>().init(new File(filename));
}
DocumentProcessor<Word, HasWord, HasWord, Word> proc = new WordToTaggedWordProcessor<>();
Document<HasWord, Word, HasWord> sentd = proc.processDocument(d);
// System.out.println(sentd);
int i = 0;
for (HasWord w : sentd) {
System.out.println(i + ": " + w);
i++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.ling.BasicDocument in project CoreNLP by stanfordnlp.
the class PTBEscapingProcessor method main.
/**
* This will do the escaping on an input file. Input file should already be tokenized,
* with tokens separated by whitespace. <br>
* Usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl
*
* @param args Command line argument: a file or URL
*/
public static void main(String[] args) {
if (args.length != 1) {
System.out.println("usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl");
return;
}
String filename = args[0];
try {
// initialized below
Document<String, Word, Word> d;
if (filename.startsWith("http://")) {
Document<String, Word, Word> dpre = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new URL(filename));
DocumentProcessor<Word, Word, String, Word> notags = new StripTagsProcessor<>();
d = notags.processDocument(dpre);
} else {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename));
}
DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>();
Document<String, Word, HasWord> newD = proc.processDocument(d);
for (HasWord word : newD) {
System.out.println(word);
}
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations