use of org.xwiki.officeimporter.document.XHTMLOfficeDocument in project xwiki-platform by xwiki.
the class DefaultXHTMLOfficeDocumentBuilder method build.
@Override
public XHTMLOfficeDocument build(InputStream officeFileStream, String officeFileName, DocumentReference reference, boolean filterStyles) throws OfficeImporterException {
// Accents seems to cause issues in some conditions
// See https://jira.xwiki.org/browse/XWIKI-14692
String cleanedOfficeFileName = StringUtils.stripAccents(officeFileName);
// Invoke the office document converter.
Map<String, InputStream> inputStreams = new HashMap<String, InputStream>();
inputStreams.put(cleanedOfficeFileName, officeFileStream);
Map<String, byte[]> artifacts;
// The office converter uses the output file name extension to determine the output format/syntax.
String outputFileName = StringUtils.substringBeforeLast(cleanedOfficeFileName, ".") + ".html";
try {
artifacts = this.officeServer.getConverter().convert(inputStreams, cleanedOfficeFileName, outputFileName);
} catch (OfficeConverterException ex) {
String message = "Error while converting document [%s] into html.";
throw new OfficeImporterException(String.format(message, officeFileName), ex);
}
// Prepare the parameters for HTML cleaning.
Map<String, String> params = new HashMap<String, String>();
params.put("targetDocument", this.entityReferenceSerializer.serialize(reference));
// Extract the images that are embedded through the Data URI scheme and add them to the other artifacts so that
// they end up as attachments.
params.put("attachEmbeddedImages", "true");
if (filterStyles) {
params.put("filterStyles", "strict");
}
// Parse and clean the HTML output.
HTMLCleanerConfiguration configuration = this.officeHtmlCleaner.getDefaultConfiguration();
configuration.setParameters(params);
Reader html = getReader(artifacts.remove(outputFileName));
Document xhtmlDoc = this.officeHtmlCleaner.clean(html, configuration);
@SuppressWarnings("unchecked") Map<String, byte[]> embeddedImages = (Map<String, byte[]>) xhtmlDoc.getUserData("embeddedImages");
if (embeddedImages != null) {
artifacts.putAll(embeddedImages);
}
// Return a new XHTMLOfficeDocument instance.
return new XHTMLOfficeDocument(xhtmlDoc, artifacts);
}
use of org.xwiki.officeimporter.document.XHTMLOfficeDocument in project xwiki-platform by xwiki.
the class DefaultXHTMLOfficeDocumentBuilderTest method testXHTMLOfficeDocumentBuilding.
@Test
public void testXHTMLOfficeDocumentBuilding() throws Exception {
DocumentReference documentReference = new DocumentReference("wiki", Arrays.asList("Path", "To"), "Page");
when(this.entityReferenceSerializer.serialize(documentReference)).thenReturn("wiki:Path.To.Page");
InputStream officeFileStream = new ByteArrayInputStream("office content".getBytes());
Map<String, byte[]> artifacts = new HashMap<String, byte[]>();
artifacts.put("file.html", "HTML content".getBytes());
artifacts.put("file.txt", "Text content".getBytes());
when(this.officeConverter.convert(Collections.singletonMap("file.odt", officeFileStream), "file.odt", "file.html")).thenReturn(artifacts);
Map<String, byte[]> embeddedImages = Collections.singletonMap("image.png", "Image content".getBytes());
Document xhtmlDoc = mock(Document.class);
when(xhtmlDoc.getUserData("embeddedImages")).thenReturn(embeddedImages);
HTMLCleanerConfiguration config = mock(HTMLCleanerConfiguration.class);
when(this.officeHTMLCleaner.getDefaultConfiguration()).thenReturn(config);
when(this.officeHTMLCleaner.clean(any(Reader.class), eq(config))).thenReturn(xhtmlDoc);
XHTMLOfficeDocument result = this.mocker.getComponentUnderTest().build(officeFileStream, "file.odt", documentReference, true);
Map<String, String> params = new HashMap<String, String>();
params.put("targetDocument", "wiki:Path.To.Page");
params.put("attachEmbeddedImages", "true");
params.put("filterStyles", "strict");
verify(config).setParameters(params);
assertEquals(xhtmlDoc, result.getContentDocument());
Map<String, byte[]> expectedArtifacts = new HashMap<String, byte[]>();
expectedArtifacts.put("file.txt", artifacts.get("file.txt"));
expectedArtifacts.put("image.png", embeddedImages.get("image.png"));
assertEquals(expectedArtifacts, result.getArtifacts());
}
use of org.xwiki.officeimporter.document.XHTMLOfficeDocument in project xwiki-platform by xwiki.
the class DefaultXDOMOfficeDocumentBuilder method build.
@Override
public XDOMOfficeDocument build(XHTMLOfficeDocument xhtmlOfficeDocument) throws OfficeImporterException {
Document xhtmlDoc = xhtmlOfficeDocument.getContentDocument();
HTMLUtils.stripHTMLEnvelope(xhtmlDoc);
XDOM xdom = null;
try {
xdom = this.xHtmlParser.parse(new StringReader(HTMLUtils.toString(xhtmlDoc)));
} catch (ParseException ex) {
throw new OfficeImporterException("Error: Could not parse xhtml office content.", ex);
}
return new XDOMOfficeDocument(xdom, xhtmlOfficeDocument.getArtifacts(), this.componentManager);
}
Aggregations