use of edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter.OutputStyle in project CoreNLP by stanfordnlp.
the class MaxentTagger method tagFromXML.
/**
* Uses an XML transformer to turn an input stream into a bunch of
* output. Tags all of the text between xmlTags.
*
* The difference between using this and using runTagger in XML mode
* is that this preserves the XML structure outside of the list of
* elements to tag, whereas the runTagger method throws away all of
* the surrounding structure and returns tagged plain text.
*/
public void tagFromXML(InputStream input, Writer writer, String... xmlTags) {
OutputStyle outputStyle = OutputStyle.fromShortName(config.getOutputFormat());
TransformXML<String> txml = new TransformXML<>();
switch(outputStyle) {
case XML:
case INLINE_XML:
txml.transformXML(xmlTags, new TaggerWrapper(this), input, writer, new TransformXML.NoEscapingSAXInterface<>());
break;
case SLASH_TAGS:
case TSV:
txml.transformXML(xmlTags, new TaggerWrapper(this), input, writer, new TransformXML.SAXInterface<>());
break;
default:
throw new RuntimeException("Unexpected format " + outputStyle);
}
}
use of edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter.OutputStyle in project CoreNLP by stanfordnlp.
the class MaxentTagger method runTagger.
/**
* Runs the tagger when we're in TAG mode.
* In this mode, the config contains either the name of the file to
* tag or stdin. That file or input is then tagged.
*/
private void runTagger() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, java.lang.reflect.InvocationTargetException {
String[] xmlInput = config.getXMLInput();
if (xmlInput.length > 0) {
if (xmlInput.length > 1 || !xmlInput[0].equals("null")) {
tagFromXML();
return;
}
}
BufferedWriter writer = null;
BufferedReader br = null;
try {
String outFile = config.getOutputFile();
if (outFile.length() > 0) {
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFile), config.getEncoding()));
} else {
writer = new BufferedWriter(new OutputStreamWriter(System.out, config.getEncoding()));
}
//Now determine if we're tagging from stdin or from a file,
//construct a reader accordingly
boolean stdin = config.useStdin();
OutputStyle outputStyle = OutputStyle.fromShortName(config.getOutputFormat());
if (!stdin) {
String filename = config.getFile();
if (formatPattern.matcher(filename).find()) {
TaggedFileRecord record = TaggedFileRecord.createRecord(config, filename);
runTagger(record.reader(), writer, outputStyle);
} else {
br = IOUtils.readerFromString(config.getFile(), config.getEncoding());
runTagger(br, writer, config.getTagInside(), outputStyle);
}
} else {
log.info("Type some text to tag, then EOF.");
log.info(" (For EOF, use Return, Ctrl-D on Unix; Enter, Ctrl-Z, Enter on Windows.)");
br = new BufferedReader(new InputStreamReader(System.in));
runTaggerStdin(br, writer, outputStyle);
}
} finally {
IOUtils.closeIgnoringExceptions(br);
IOUtils.closeIgnoringExceptions(writer);
}
}
use of edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter.OutputStyle in project CoreNLP by stanfordnlp.
the class MaxentTagger method tagFromXML.
public void tagFromXML(Reader input, Writer writer, String... xmlTags) {
OutputStyle outputStyle = OutputStyle.fromShortName(config.getOutputFormat());
TransformXML<String> txml = new TransformXML<>();
switch(outputStyle) {
case XML:
case INLINE_XML:
txml.transformXML(xmlTags, new TaggerWrapper(this), input, writer, new TransformXML.NoEscapingSAXInterface<>());
break;
case SLASH_TAGS:
case TSV:
txml.transformXML(xmlTags, new TaggerWrapper(this), input, writer, new TransformXML.SAXInterface<>());
break;
default:
throw new RuntimeException("Unexpected format " + outputStyle);
}
}
Aggregations