use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class DefaultXHTMLOfficeDocumentBuilder method build.
@Override
public XHTMLOfficeDocument build(InputStream officeFileStream, String officeFileName, DocumentReference reference, boolean filterStyles) throws OfficeImporterException {
// Accents seems to cause issues in some conditions
// See https://jira.xwiki.org/browse/XWIKI-14692
String cleanedOfficeFileName = StringUtils.stripAccents(officeFileName);
// Invoke the office document converter.
Map<String, InputStream> inputStreams = new HashMap<String, InputStream>();
inputStreams.put(cleanedOfficeFileName, officeFileStream);
Map<String, byte[]> artifacts;
// The office converter uses the output file name extension to determine the output format/syntax.
String outputFileName = StringUtils.substringBeforeLast(cleanedOfficeFileName, ".") + ".html";
try {
artifacts = this.officeServer.getConverter().convert(inputStreams, cleanedOfficeFileName, outputFileName);
} catch (OfficeConverterException ex) {
String message = "Error while converting document [%s] into html.";
throw new OfficeImporterException(String.format(message, officeFileName), ex);
}
// Prepare the parameters for HTML cleaning.
Map<String, String> params = new HashMap<String, String>();
params.put("targetDocument", this.entityReferenceSerializer.serialize(reference));
// Extract the images that are embedded through the Data URI scheme and add them to the other artifacts so that
// they end up as attachments.
params.put("attachEmbeddedImages", "true");
if (filterStyles) {
params.put("filterStyles", "strict");
}
// Parse and clean the HTML output.
HTMLCleanerConfiguration configuration = this.officeHtmlCleaner.getDefaultConfiguration();
configuration.setParameters(params);
Reader html = getReader(artifacts.remove(outputFileName));
Document xhtmlDoc = this.officeHtmlCleaner.clean(html, configuration);
@SuppressWarnings("unchecked") Map<String, byte[]> embeddedImages = (Map<String, byte[]>) xhtmlDoc.getUserData("embeddedImages");
if (embeddedImages != null) {
artifacts.putAll(embeddedImages);
}
// Return a new XHTMLOfficeDocument instance.
return new XHTMLOfficeDocument(xhtmlDoc, artifacts);
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class OfficeHTMLCleaner method clean.
@Override
public Document clean(Reader originalHtmlContent) {
// Add special parameters used in filters
HTMLCleanerConfiguration configuration = getDefaultConfiguration();
configuration.setParameters(Collections.singletonMap("filterStyles", "strict"));
return clean(originalHtmlContent, configuration);
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class DefaultHTMLCleaner method clean.
@Override
public String clean(String dirtyHTML) {
// Sort the list of specific filters based on their priority.
Collections.sort(specificFilters, new Comparator<HTMLFilter>() {
@Override
public int compare(HTMLFilter alice, HTMLFilter bob) {
return alice.getPriority() - bob.getPriority();
}
});
// We have to remove or replace the HTML elements that were added by the WYSIWYG editor only for internal
// reasons, before any cleaning filter is applied. Otherwise cleaning filters might transform these
// WYSIWYG-specific HTML elements making their removal difficult. We cannot transform the WYSIWYG output on the
// client side because the editor is a widget that can be used independently inside or outside an HTML form and
// thus it doesn't know when its current value is submitted.
HTMLCleanerConfiguration config = cleaner.getDefaultConfiguration();
List<org.xwiki.xml.html.filter.HTMLFilter> filters = new ArrayList<org.xwiki.xml.html.filter.HTMLFilter>();
filters.addAll(specificFilters);
filters.addAll(config.getFilters());
config.setFilters(filters);
Document document = cleaner.clean(new StringReader(dirtyHTML), config);
return HTMLUtils.toString(document);
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class DefaultPresentationBuilderTest method build.
@Test
public void build() throws Exception {
DocumentReference documentReference = new DocumentReference("wiki", Arrays.asList("Path", "To"), "Page");
when(this.entityReferenceSerializer.serialize(documentReference)).thenReturn("wiki:Path.To.Page");
DocumentModelBridge document = mock(DocumentModelBridge.class);
DocumentAccessBridge dab = this.mocker.getInstance(DocumentAccessBridge.class);
when(dab.getTranslatedDocumentInstance(documentReference)).thenReturn(document);
when(document.getSyntax()).thenReturn(Syntax.XWIKI_2_1);
InputStream officeFileStream = new ByteArrayInputStream("Presentation content".getBytes());
Map<String, byte[]> artifacts = new HashMap<String, byte[]>();
byte[] firstSlide = "first slide".getBytes();
byte[] secondSlide = "second slide".getBytes();
artifacts.put("img0.jpg", firstSlide);
artifacts.put("img0.html", new byte[0]);
artifacts.put("text0.html", new byte[0]);
artifacts.put("img1.jpg", secondSlide);
artifacts.put("img1.html", new byte[0]);
artifacts.put("text1.html", new byte[0]);
when(this.officeConverter.convert(Collections.singletonMap("file.odp", officeFileStream), "file.odp", "img0.html")).thenReturn(artifacts);
HTMLCleanerConfiguration config = mock(HTMLCleanerConfiguration.class);
when(this.officeHTMLCleaner.getDefaultConfiguration()).thenReturn(config);
Document xhtmlDoc = XMLUtils.createDOMDocument();
xhtmlDoc.appendChild(xhtmlDoc.createElement("html"));
String presentationHTML = "<p><img src=\"file-slide0.jpg\"/></p><p><img src=\"file-slide1.jpg\"/></p>";
when(this.officeHTMLCleaner.clean(any(Reader.class), eq(config))).then(returnMatchingDocument(presentationHTML, xhtmlDoc));
XDOM galleryContent = new XDOM(Collections.<Block>emptyList());
when(this.xhtmlParser.parse(any(Reader.class))).thenReturn(galleryContent);
XDOMOfficeDocument result = this.mocker.getComponentUnderTest().build(officeFileStream, "file.odp", documentReference);
verify(config).setParameters(Collections.singletonMap("targetDocument", "wiki:Path.To.Page"));
Map<String, byte[]> expectedArtifacts = new HashMap<String, byte[]>();
expectedArtifacts.put("file-slide0.jpg", firstSlide);
expectedArtifacts.put("file-slide1.jpg", secondSlide);
assertEquals(expectedArtifacts, result.getArtifacts());
assertEquals("wiki:Path.To.Page", result.getContentDocument().getMetaData().getMetaData(MetaData.BASE));
List<ExpandedMacroBlock> macros = result.getContentDocument().getBlocks(new ClassBlockMatcher(ExpandedMacroBlock.class), Block.Axes.CHILD);
Assert.assertEquals(1, macros.size());
Assert.assertEquals("gallery", macros.get(0).getId());
Assert.assertEquals(galleryContent, macros.get(0).getChildren().get(0));
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class PdfExportImpl method convertToStrictXHtml.
/**
* Cleans up an HTML document, turning it into valid XHTML.
*
* @param input the source HTML to process
* @return the cleaned up source
*/
private String convertToStrictXHtml(String input) {
LOGGER.debug("Cleaning HTML:\n{}", input);
HTMLCleaner cleaner = Utils.getComponent(HTMLCleaner.class);
HTMLCleanerConfiguration config = cleaner.getDefaultConfiguration();
List<HTMLFilter> filters = new ArrayList<HTMLFilter>(config.getFilters());
filters.add(Utils.getComponent(HTMLFilter.class, "uniqueId"));
config.setFilters(filters);
String result = HTMLUtils.toString(cleaner.clean(new StringReader(input), config));
LOGGER.debug("Cleaned XHTML:\n{}", result);
return result;
}
Aggregations