use of javax.xml.transform.sax.SAXTransformerFactory in project jackrabbit by apache.
the class ExportDocViewTest method doTestExportDocView.
/**
* Tests session.exportDocView with the different argument possibilities.
* The flag withHandler decides if the method requiring a ContentHandler as
* argument is called. The class org.apache.xml.serialize.XMLSerializer is
* taken as ContentHandler in this case. In both cases ( export with a
* ContentHandler and export with Stream) the test node is exported to the
* file defined in the setUp. This exported file is parsed using
* javax.xml.transform package and the receiving document is compared with
* the test node and its properties and child nodes in the repository.
*
* @param withHandler boolean, decides to call method requiring a
* ContentHandler as argument
* @param skipBinary
* @param noRecurse
*/
public void doTestExportDocView(boolean withHandler, boolean skipBinary, boolean noRecurse) throws RepositoryException, IOException, SAXException, TransformerException {
this.skipBinary = skipBinary;
this.noRecurse = noRecurse;
this.withHandler = withHandler;
BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(file));
try {
if (withHandler) {
SAXTransformerFactory stf = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler th = stf.newTransformerHandler();
th.setResult(new StreamResult(os));
session.exportDocumentView(testPath, th, skipBinary, noRecurse);
} else {
session.exportDocumentView(testPath, os, skipBinary, noRecurse);
}
} finally {
os.close();
}
// build the DOM tree
InputStream in = new BufferedInputStream(new FileInputStream(file));
doc = readDocument(in);
compareTree();
}
use of javax.xml.transform.sax.SAXTransformerFactory in project jackrabbit-oak by apache.
the class HtmlRepresentation method startResponse.
private XHTMLContentHandler startResponse(HttpServletResponse response, String title) throws IOException {
try {
response.setContentType("text/html");
response.setCharacterEncoding("UTF-8");
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
Transformer transformer = handler.getTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "html");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
handler.setResult(new StreamResult(response.getOutputStream()));
Metadata metadata = new Metadata();
metadata.set(Metadata.TITLE, title);
return new XHTMLContentHandler(handler, metadata);
} catch (TransformerConfigurationException e) {
throw new IOException(e);
}
}
use of javax.xml.transform.sax.SAXTransformerFactory in project lucene-solr by apache.
the class TikaEntityProcessor method getHtmlHandler.
private static ContentHandler getHtmlHandler(Writer writer) throws TransformerConfigurationException {
SAXTransformerFactory factory = (SAXTransformerFactory) TransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.setResult(new StreamResult(writer));
return new ContentHandlerDecorator(handler) {
@Override
public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
if (XHTMLContentHandler.XHTML.equals(uri)) {
uri = null;
}
if (!"head".equals(localName)) {
super.startElement(uri, localName, name, atts);
}
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
if (XHTMLContentHandler.XHTML.equals(uri)) {
uri = null;
}
if (!"head".equals(localName)) {
super.endElement(uri, localName, name);
}
}
@Override
public void startPrefixMapping(String prefix, String uri) {
/*no op*/
}
@Override
public void endPrefixMapping(String prefix) {
/*no op*/
}
};
}
use of javax.xml.transform.sax.SAXTransformerFactory in project tika by apache.
the class HtmlParserTest method makeHtmlTransformer.
/**
* Create ContentHandler that transforms SAX events into textual HTML output,
* and writes it out to <writer> - typically this is a StringWriter.
*
* @param writer Where to write resulting HTML text.
* @return ContentHandler suitable for passing to parse() methods.
* @throws Exception
*/
private ContentHandler makeHtmlTransformer(Writer writer) throws Exception {
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "no");
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "utf-8");
handler.setResult(new StreamResult(writer));
return handler;
}
use of javax.xml.transform.sax.SAXTransformerFactory in project tika by apache.
the class OutlookParserTest method testOutlookHTMLfromRTF.
@Test
public void testOutlookHTMLfromRTF() throws Exception {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
// Check the HTML version
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(sw));
try (InputStream stream = OutlookParserTest.class.getResourceAsStream("/test-documents/test-outlook2003.msg")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
// As the HTML version should have been processed, ensure
// we got some of the links
String content = sw.toString().replaceAll("<p>\\s+", "<p>");
assertContains("<dd>New Outlook User</dd>", content);
assertContains("designed <i>to help you", content);
assertContains("<p><a href=\"http://r.office.microsoft.com/r/rlidOutlookWelcomeMail10?clid=1033\">Cached Exchange Mode</a>", content);
// Link - check text around it, and the link itself
assertContains("sign up for a free subscription", content);
assertContains("Office Newsletter", content);
assertContains("newsletter will be sent to you", content);
assertContains("http://r.office.microsoft.com/r/rlidNewsletterSignUp?clid=1033", content);
// Make sure we don't have nested html docs
assertEquals(2, content.split("<body>").length);
assertEquals(2, content.split("<\\/body>").length);
}
Aggregations