Search in sources :

Example 6 with HTMLCleanerConfiguration

use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.

the class MiscWysiwygCleaningTest method testParagraphsWithNamespaces.

/**
 * Test cleaning of HTML paragraphs with namespaces specified.
 */
@Test
public void testParagraphsWithNamespaces() {
    String html = header + "<w:p>paragraph</w:p>" + footer;
    HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
    configuration.setParameters(Collections.singletonMap(HTMLCleanerConfiguration.NAMESPACES_AWARE, "false"));
    Document doc = wysiwygHTMLCleaner.clean(new StringReader(html), configuration);
    NodeList nodes = doc.getElementsByTagName("p");
    Assert.assertEquals(1, nodes.getLength());
}
Also used : NodeList(org.w3c.dom.NodeList) StringReader(java.io.StringReader) Document(org.w3c.dom.Document) HTMLCleanerConfiguration(org.xwiki.xml.html.HTMLCleanerConfiguration) Test(org.junit.Test)

Example 7 with HTMLCleanerConfiguration

use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.

the class DefaultXHTMLOfficeDocumentBuilderTest method testXHTMLOfficeDocumentBuilding.

@Test
public void testXHTMLOfficeDocumentBuilding() throws Exception {
    DocumentReference documentReference = new DocumentReference("wiki", Arrays.asList("Path", "To"), "Page");
    when(this.entityReferenceSerializer.serialize(documentReference)).thenReturn("wiki:Path.To.Page");
    InputStream officeFileStream = new ByteArrayInputStream("office content".getBytes());
    Map<String, byte[]> artifacts = new HashMap<String, byte[]>();
    artifacts.put("file.html", "HTML content".getBytes());
    artifacts.put("file.txt", "Text content".getBytes());
    when(this.officeConverter.convert(Collections.singletonMap("file.odt", officeFileStream), "file.odt", "file.html")).thenReturn(artifacts);
    Map<String, byte[]> embeddedImages = Collections.singletonMap("image.png", "Image content".getBytes());
    Document xhtmlDoc = mock(Document.class);
    when(xhtmlDoc.getUserData("embeddedImages")).thenReturn(embeddedImages);
    HTMLCleanerConfiguration config = mock(HTMLCleanerConfiguration.class);
    when(this.officeHTMLCleaner.getDefaultConfiguration()).thenReturn(config);
    when(this.officeHTMLCleaner.clean(any(Reader.class), eq(config))).thenReturn(xhtmlDoc);
    XHTMLOfficeDocument result = this.mocker.getComponentUnderTest().build(officeFileStream, "file.odt", documentReference, true);
    Map<String, String> params = new HashMap<String, String>();
    params.put("targetDocument", "wiki:Path.To.Page");
    params.put("attachEmbeddedImages", "true");
    params.put("filterStyles", "strict");
    verify(config).setParameters(params);
    assertEquals(xhtmlDoc, result.getContentDocument());
    Map<String, byte[]> expectedArtifacts = new HashMap<String, byte[]>();
    expectedArtifacts.put("file.txt", artifacts.get("file.txt"));
    expectedArtifacts.put("image.png", embeddedImages.get("image.png"));
    assertEquals(expectedArtifacts, result.getArtifacts());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) XHTMLOfficeDocument(org.xwiki.officeimporter.document.XHTMLOfficeDocument) Reader(java.io.Reader) XHTMLOfficeDocument(org.xwiki.officeimporter.document.XHTMLOfficeDocument) Document(org.w3c.dom.Document) DocumentReference(org.xwiki.model.reference.DocumentReference) HTMLCleanerConfiguration(org.xwiki.xml.html.HTMLCleanerConfiguration) Test(org.junit.Test)

Example 8 with HTMLCleanerConfiguration

use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.

the class LineBreakOfficeCleaningTest method checkLineBreakReplacements.

/**
 * Utility methods for checking if {@code <br/>} elements are properly converted to {@code<div
 * class="wikikmodel-emptyline"/>} elements.
 *
 * @param html the html content.
 * @param expectedBrCount expected count of {@code <br/>} elements after cleaning.
 * @param expectedDivCount expected count of {@code<div class="wikikmodel-emptyline"/>} elements after cleaning.
 */
private void checkLineBreakReplacements(String html, int expectedBrCount, int expectedDivCount) {
    getMockery().checking(new Expectations() {

        {
            allowing(mockDocumentReferenceResolver).resolve("Import.Test");
            will(returnValue(new DocumentReference("wiki", "Import", "Test")));
        }
    });
    HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
    configuration.setParameters(Collections.singletonMap("targetDocument", "Import.Test"));
    Document doc = officeHTMLCleaner.clean(new StringReader(header + html + footer), configuration);
    NodeList lineBreaks = doc.getElementsByTagName("br");
    Assert.assertEquals(expectedBrCount, lineBreaks.getLength());
    NodeList divs = doc.getElementsByTagName("div");
    Assert.assertEquals(expectedDivCount, divs.getLength());
}
Also used : Expectations(org.jmock.Expectations) NodeList(org.w3c.dom.NodeList) StringReader(java.io.StringReader) Document(org.w3c.dom.Document) DocumentReference(org.xwiki.model.reference.DocumentReference) HTMLCleanerConfiguration(org.xwiki.xml.html.HTMLCleanerConfiguration)

Example 9 with HTMLCleanerConfiguration

use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.

the class DefaultPresentationBuilder method cleanPresentationHTML.

/**
 * Cleans the presentation HTML. This method must be called mainly to ensure that the slide image URLs are updated
 * to point to the corresponding attachments.
 *
 * @param dirtyHTML the HTML to be cleaned
 * @param targetDocumentReference the document where the slide images will be attached
 * @return the cleaned HTML
 */
protected String cleanPresentationHTML(String dirtyHTML, DocumentReference targetDocumentReference) {
    HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
    configuration.setParameters(Collections.singletonMap("targetDocument", this.entityReferenceSerializer.serialize(targetDocumentReference)));
    Document xhtmlDocument = this.officeHTMLCleaner.clean(new StringReader(dirtyHTML), configuration);
    HTMLUtils.stripHTMLEnvelope(xhtmlDocument);
    return HTMLUtils.toString(xhtmlDocument);
}
Also used : StringReader(java.io.StringReader) Document(org.w3c.dom.Document) XDOMOfficeDocument(org.xwiki.officeimporter.document.XDOMOfficeDocument) HTMLCleanerConfiguration(org.xwiki.xml.html.HTMLCleanerConfiguration)

Example 10 with HTMLCleanerConfiguration

use of org.xwiki.xml.html.HTMLCleanerConfiguration in project xwiki-platform by xwiki.

the class OfficeHTMLCleaner method getDefaultConfiguration.

@Override
public HTMLCleanerConfiguration getDefaultConfiguration() {
    HTMLCleanerConfiguration configuration = this.defaultHtmlCleaner.getDefaultConfiguration();
    // Add office cleaning filters after the default filters.
    List<HTMLFilter> filters = new ArrayList<HTMLFilter>(configuration.getFilters());
    filters.addAll(Arrays.asList(this.stripperFilter, this.styleFilter, this.redundancyFilter, this.paragraphFilter, this.imageFilter, this.anchorFilter, this.listFilter, this.tableFilter, this.lineBreakFilter));
    configuration.setFilters(filters);
    return configuration;
}
Also used : ArrayList(java.util.ArrayList) HTMLFilter(org.xwiki.xml.html.filter.HTMLFilter) HTMLCleanerConfiguration(org.xwiki.xml.html.HTMLCleanerConfiguration)

Aggregations

HTMLCleanerConfiguration (org.xwiki.xml.html.HTMLCleanerConfiguration)11 Document (org.w3c.dom.Document)7 StringReader (java.io.StringReader)5 ArrayList (java.util.ArrayList)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 InputStream (java.io.InputStream)3 Reader (java.io.Reader)3 HashMap (java.util.HashMap)3 Test (org.junit.Test)3 DocumentReference (org.xwiki.model.reference.DocumentReference)3 HTMLFilter (org.xwiki.xml.html.filter.HTMLFilter)3 NodeList (org.w3c.dom.NodeList)2 XDOMOfficeDocument (org.xwiki.officeimporter.document.XDOMOfficeDocument)2 XHTMLOfficeDocument (org.xwiki.officeimporter.document.XHTMLOfficeDocument)2 InputStreamReader (java.io.InputStreamReader)1 Map (java.util.Map)1 Expectations (org.jmock.Expectations)1 DocumentAccessBridge (org.xwiki.bridge.DocumentAccessBridge)1 DocumentModelBridge (org.xwiki.bridge.DocumentModelBridge)1 OfficeImporterException (org.xwiki.officeimporter.OfficeImporterException)1