use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class MiscWysiwygCleaningTest method testParagraphsWithNamespaces.
/**
* Test cleaning of HTML paragraphs with namespaces specified.
*/
@Test
public void testParagraphsWithNamespaces() {
String html = header + "<w:p>paragraph</w:p>" + footer;
HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
configuration.setParameters(Collections.singletonMap(HTMLCleanerConfiguration.NAMESPACES_AWARE, "false"));
Document doc = wysiwygHTMLCleaner.clean(new StringReader(html), configuration);
NodeList nodes = doc.getElementsByTagName("p");
Assert.assertEquals(1, nodes.getLength());
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class DefaultXHTMLOfficeDocumentBuilderTest method testXHTMLOfficeDocumentBuilding.
@Test
public void testXHTMLOfficeDocumentBuilding() throws Exception {
DocumentReference documentReference = new DocumentReference("wiki", Arrays.asList("Path", "To"), "Page");
when(this.entityReferenceSerializer.serialize(documentReference)).thenReturn("wiki:Path.To.Page");
InputStream officeFileStream = new ByteArrayInputStream("office content".getBytes());
Map<String, byte[]> artifacts = new HashMap<String, byte[]>();
artifacts.put("file.html", "HTML content".getBytes());
artifacts.put("file.txt", "Text content".getBytes());
when(this.officeConverter.convert(Collections.singletonMap("file.odt", officeFileStream), "file.odt", "file.html")).thenReturn(artifacts);
Map<String, byte[]> embeddedImages = Collections.singletonMap("image.png", "Image content".getBytes());
Document xhtmlDoc = mock(Document.class);
when(xhtmlDoc.getUserData("embeddedImages")).thenReturn(embeddedImages);
HTMLCleanerConfiguration config = mock(HTMLCleanerConfiguration.class);
when(this.officeHTMLCleaner.getDefaultConfiguration()).thenReturn(config);
when(this.officeHTMLCleaner.clean(any(Reader.class), eq(config))).thenReturn(xhtmlDoc);
XHTMLOfficeDocument result = this.mocker.getComponentUnderTest().build(officeFileStream, "file.odt", documentReference, true);
Map<String, String> params = new HashMap<String, String>();
params.put("targetDocument", "wiki:Path.To.Page");
params.put("attachEmbeddedImages", "true");
params.put("filterStyles", "strict");
verify(config).setParameters(params);
assertEquals(xhtmlDoc, result.getContentDocument());
Map<String, byte[]> expectedArtifacts = new HashMap<String, byte[]>();
expectedArtifacts.put("file.txt", artifacts.get("file.txt"));
expectedArtifacts.put("image.png", embeddedImages.get("image.png"));
assertEquals(expectedArtifacts, result.getArtifacts());
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class LineBreakOfficeCleaningTest method checkLineBreakReplacements.
/**
* Utility methods for checking if {@code <br/>} elements are properly converted to {@code<div
* class="wikikmodel-emptyline"/>} elements.
*
* @param html the html content.
* @param expectedBrCount expected count of {@code <br/>} elements after cleaning.
* @param expectedDivCount expected count of {@code<div class="wikikmodel-emptyline"/>} elements after cleaning.
*/
private void checkLineBreakReplacements(String html, int expectedBrCount, int expectedDivCount) {
getMockery().checking(new Expectations() {
{
allowing(mockDocumentReferenceResolver).resolve("Import.Test");
will(returnValue(new DocumentReference("wiki", "Import", "Test")));
}
});
HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
configuration.setParameters(Collections.singletonMap("targetDocument", "Import.Test"));
Document doc = officeHTMLCleaner.clean(new StringReader(header + html + footer), configuration);
NodeList lineBreaks = doc.getElementsByTagName("br");
Assert.assertEquals(expectedBrCount, lineBreaks.getLength());
NodeList divs = doc.getElementsByTagName("div");
Assert.assertEquals(expectedDivCount, divs.getLength());
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class DefaultPresentationBuilder method cleanPresentationHTML.
/**
* Cleans the presentation HTML. This method must be called mainly to ensure that the slide image URLs are updated
* to point to the corresponding attachments.
*
* @param dirtyHTML the HTML to be cleaned
* @param targetDocumentReference the document where the slide images will be attached
* @return the cleaned HTML
*/
protected String cleanPresentationHTML(String dirtyHTML, DocumentReference targetDocumentReference) {
HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
configuration.setParameters(Collections.singletonMap("targetDocument", this.entityReferenceSerializer.serialize(targetDocumentReference)));
Document xhtmlDocument = this.officeHTMLCleaner.clean(new StringReader(dirtyHTML), configuration);
HTMLUtils.stripHTMLEnvelope(xhtmlDocument);
return HTMLUtils.toString(xhtmlDocument);
}
use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.
the class OfficeHTMLCleaner method getDefaultConfiguration.
@Override
public HTMLCleanerConfiguration getDefaultConfiguration() {
HTMLCleanerConfiguration configuration = this.defaultHtmlCleaner.getDefaultConfiguration();
// Add office cleaning filters after the default filters.
List<HTMLFilter> filters = new ArrayList<HTMLFilter>(configuration.getFilters());
filters.addAll(Arrays.asList(this.stripperFilter, this.styleFilter, this.redundancyFilter, this.paragraphFilter, this.imageFilter, this.anchorFilter, this.listFilter, this.tableFilter, this.lineBreakFilter));
configuration.setFilters(filters);
return configuration;
}
Aggregations