use of javax.xml.transform.sax.TransformerHandler in project tika by apache.
the class OOXMLParserTest method testEmbeddedPDF.
// TIKA-989:
@Test
public void testEmbeddedPDF() throws Exception {
Metadata metadata = new Metadata();
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "no");
handler.setResult(new StreamResult(sw));
try (InputStream input = OOXMLParserTest.class.getResourceAsStream("/test-documents/testWORD_embedded_pdf.docx")) {
new OOXMLParser().parse(input, handler, metadata, new ParseContext());
}
String xml = sw.toString();
int i = xml.indexOf("Here is the pdf file:");
int j = xml.indexOf("<div class=\"embedded\" id=\"rId5\"/>");
int k = xml.indexOf("Bye Bye");
int l = xml.indexOf("<div class=\"embedded\" id=\"rId6\"/>");
int m = xml.indexOf("Bye for real.");
assertTrue(i != -1);
assertTrue(j != -1);
assertTrue(k != -1);
assertTrue(l != -1);
assertTrue(m != -1);
assertTrue(i < j);
assertTrue(j < k);
assertTrue(k < l);
assertTrue(l < m);
}
use of javax.xml.transform.sax.TransformerHandler in project tika by apache.
the class OutlookParserTest method testOutlookHTMLVersion.
@Test
public void testOutlookHTMLVersion() throws Exception {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
// Check the HTML version
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(sw));
try (InputStream stream = OutlookParserTest.class.getResourceAsStream("/test-documents/testMSG_chinese.msg")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
// As the HTML version should have been processed, ensure
// we got some of the links
String content = sw.toString();
assertContains("<dd>tests.chang@fengttt.com</dd>", content);
assertContains("<p>Alfresco MSG format testing", content);
assertContains("<li>1", content);
assertContains("<li>2", content);
// Make sure we don't have nested html docs
assertEquals(2, content.split("<body>").length);
assertEquals(2, content.split("<\\/body>").length);
// Make sure that the Chinese actually came through
assertContains("張毓倫", metadata.get(TikaCoreProperties.CREATOR));
assertContains("陳惠珍", content);
assertEquals("tests.chang@fengttt.com", metadata.get(Message.MESSAGE_TO_EMAIL));
assertEquals("Tests Chang@FT (張毓倫)", metadata.get(Office.MAPI_FROM_REPRESENTING_NAME));
assertEquals("/O=FT GROUP/OU=FT/CN=RECIPIENTS/CN=LYDIACHANG", metadata.get(Office.MAPI_FROM_REPRESENTING_EMAIL));
}
use of javax.xml.transform.sax.TransformerHandler in project tika by apache.
the class OutlookParserTest method testOutlookForwarded.
@Test
public void testOutlookForwarded() throws Exception {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
// Check the HTML version
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(sw));
try (InputStream stream = OutlookParserTest.class.getResourceAsStream("/test-documents/testMSG_forwarded.msg")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
// Make sure we don't have nested docs
String content = sw.toString();
assertEquals(2, content.split("<body>").length);
assertEquals(2, content.split("<\\/body>").length);
}
use of javax.xml.transform.sax.TransformerHandler in project sling by apache.
the class SlingTransformer method createTransformerHandler.
private TransformerHandler createTransformerHandler() throws Exception {
SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
xmlReader.setContentHandler(templatesHandler);
InputSource inputSource = new InputSource(getXsltSource());
xmlReader.parse(inputSource);
// Create transformer handler
final TransformerHandler handler = transformerFactory.newTransformerHandler(templatesHandler.getTemplates());
return handler;
}
use of javax.xml.transform.sax.TransformerHandler in project sling by apache.
the class SimpleXmlSerializationManager method buildSerializationData.
@Override
public SerializationData buildSerializationData(File contentSyncRoot, ResourceProxy resource) throws SerializationException {
if (resource == null) {
return null;
}
Map<String, Object> content = resource.getProperties();
if (content == null || content.isEmpty()) {
return null;
}
try {
SAXTransformerFactory f = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
ByteArrayOutputStream result = new ByteArrayOutputStream();
StreamResult sr = new StreamResult(result);
TransformerHandler handler = f.newTransformerHandler();
Transformer t = handler.getTransformer();
t.setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(sr);
handler.startDocument();
startElement(handler, TAG_RESOURCE);
Set<Entry<String, Object>> entrySet = new TreeMap<>(content).entrySet();
for (Map.Entry<String, Object> property : entrySet) {
Object value = property.getValue();
if (value instanceof String) {
String tagName = property.getKey();
String tagValue = (String) value;
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", ATT_PROPERTY_NAME, ATT_PROPERTY_NAME, null, tagName);
handler.startElement("", TAG_PROPERTY, TAG_PROPERTY, attributes);
handler.characters(tagValue.toCharArray(), 0, tagValue.length());
handler.endElement("", TAG_PROPERTY, TAG_PROPERTY);
} else {
// TODO multi-valued properties, other primitives
System.err.println("Can't yet handle property " + property.getKey() + " of type " + value.getClass());
}
}
endElement(handler, TAG_RESOURCE);
handler.endDocument();
// TODO - also add the serialization type
return new SerializationData(resource.getPath(), CONTENT_XML, result.toByteArray(), null);
} catch (TransformerConfigurationException | TransformerFactoryConfigurationError | SAXException e) {
// TODO proper exception handling
throw new RuntimeException(e);
}
}
Aggregations