Search in sources :

Example 1 with IMicroNode

use of com.helger.xml.microdom.IMicroNode in project ph-css by phax.

the class MainFetchW3C_CSSTests method _fetch.

private static void _fetch(final String sURL, final String sDestDir) throws MalformedURLException {
    final ICommonsList<String> aCSSFilenames = new CommonsArrayList<>();
    System.out.println("Fetching from " + sURL);
    final ICommonsList<String> aIndex = StreamHelper.readStreamLines(new URLResource(sURL + "index.html"), StandardCharsets.UTF_8);
    {
        // Remove doctype
        aIndex.remove(0);
        // Fix HTML to be XML
        for (int i = 0; i < aIndex.size(); ++i) {
            final String sLine = aIndex.get(i);
            if (sLine.contains("<link"))
                aIndex.set(i, sLine + "</link>");
        }
    }
    final IMicroDocument aDoc = MicroReader.readMicroXML(StringHelper.getImploded('\n', aIndex));
    MicroVisitor.visit(aDoc, new DefaultHierarchyVisitorCallback<IMicroNode>() {

        @Override
        public EHierarchyVisitorReturn onItemBeforeChildren(final IMicroNode aItem) {
            if (aItem.isElement()) {
                final IMicroElement e = (IMicroElement) aItem;
                if (e.getTagName().equals("a")) {
                    final String sHref = e.getAttributeValue("href");
                    if (sHref.endsWith(".xml"))
                        aCSSFilenames.add(StringHelper.replaceAll(sHref, ".xml", ".css"));
                }
            }
            return EHierarchyVisitorReturn.CONTINUE;
        }
    });
    System.out.println("Fetching a total of " + aCSSFilenames.size() + " files");
    int i = 0;
    for (final String sCSSFilename : aCSSFilenames) {
        System.out.println("  " + (++i) + ".: " + sCSSFilename);
        final String sContent = StreamHelper.getAllBytesAsString(new URLResource(sURL + sCSSFilename), StandardCharsets.UTF_8);
        SimpleFileIO.writeFile(new File(sDestDir, sCSSFilename), sContent, StandardCharsets.UTF_8);
    }
}
Also used : URLResource(com.helger.commons.io.resource.URLResource) IMicroElement(com.helger.xml.microdom.IMicroElement) IMicroNode(com.helger.xml.microdom.IMicroNode) EHierarchyVisitorReturn(com.helger.commons.hierarchy.visit.EHierarchyVisitorReturn) IMicroDocument(com.helger.xml.microdom.IMicroDocument) File(java.io.File) CommonsArrayList(com.helger.commons.collection.impl.CommonsArrayList)

Example 2 with IMicroNode

use of com.helger.xml.microdom.IMicroNode in project ph-schematron by phax.

the class SchematronHelper method _recursiveResolveAllSchematronIncludes.

@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
@Nonnull
private static ESuccess _recursiveResolveAllSchematronIncludes(@Nonnull final IMicroElement eRoot, @Nonnull final IReadableResource aResource, @Nullable final ISAXReaderSettings aSettings, @Nonnull final IPSErrorHandler aErrorHandler) {
    if (eRoot != null) {
        final DefaultSchematronIncludeResolver aIncludeResolver = new DefaultSchematronIncludeResolver(aResource);
        for (final IMicroElement aElement : eRoot.getAllChildElementsRecursive()) if (CSchematron.NAMESPACE_SCHEMATRON.equals(aElement.getNamespaceURI()) && aElement.getLocalName().equals(CSchematronXML.ELEMENT_INCLUDE)) {
            String sHref = aElement.getAttributeValue(CSchematronXML.ATTR_HREF);
            try {
                final int nHashIndex = sHref.indexOf('#');
                String sAnchor = null;
                if (nHashIndex >= 0) {
                    sAnchor = sHref.substring(nHashIndex + 1);
                    sHref = sHref.substring(0, nHashIndex);
                }
                final IReadableResource aIncludeRes = aIncludeResolver.getResolvedSchematronResource(sHref);
                if (aIncludeRes == null) {
                    aErrorHandler.error(aResource, null, "Failed to resolve include '" + sHref + "'", null);
                    return ESuccess.FAILURE;
                }
                if (s_aLogger.isDebugEnabled())
                    s_aLogger.debug("Resolved '" + sHref + "' relative to '" + aIncludeResolver.getBaseHref() + "' as '" + aIncludeRes.getPath() + "'");
                // Read XML to be included
                final IMicroDocument aIncludedDoc = MicroReader.readMicroXML(aIncludeRes, aSettings);
                if (aIncludedDoc == null) {
                    aErrorHandler.error(aResource, null, "Failed to parse include " + aIncludeRes, null);
                    return ESuccess.FAILURE;
                }
                IMicroElement aIncludedContent;
                if (sAnchor == null) {
                    // no anchor present - include the whole document
                    // Return the document element
                    aIncludedContent = aIncludedDoc.getDocumentElement();
                } else {
                    final String sFinalAnchor = sAnchor;
                    final Wrapper<IMicroElement> aMatch = new Wrapper<>();
                    // Also include the root element in the search
                    ChildrenProviderHierarchyVisitor.visitFrom(aIncludedDoc.getDocumentElement(), new DefaultHierarchyVisitorCallback<IMicroNode>() {

                        @Override
                        public EHierarchyVisitorReturn onItemBeforeChildren(final IMicroNode aItem) {
                            if (aItem.isElement()) {
                                final IMicroElement aCurElement = (IMicroElement) aItem;
                                final String sID = aCurElement.getAttributeValue("id");
                                if (sFinalAnchor.equals(sID))
                                    aMatch.set(aCurElement);
                            }
                            return EHierarchyVisitorReturn.CONTINUE;
                        }
                    }, true);
                    aIncludedContent = aMatch.get();
                    if (aIncludedContent == null) {
                        aErrorHandler.warn(aResource, null, "Failed to resolve an element with the ID '" + sAnchor + "' in " + aIncludeRes + "! Therefore including the whole document!");
                        aIncludedContent = aIncludedDoc.getDocumentElement();
                    }
                }
                // Important to detach from parent!
                aIncludedContent.detachFromParent();
                // Check for correct namespace URI of included content
                if (!CSchematron.NAMESPACE_SCHEMATRON.equals(aIncludedContent.getNamespaceURI())) {
                    aErrorHandler.error(aResource, null, "The included resource " + aIncludeRes + " contains the wrong XML namespace URI '" + aIncludedContent.getNamespaceURI() + "' but was expected to have '" + CSchematron.NAMESPACE_SCHEMATRON + "'", null);
                    return ESuccess.FAILURE;
                }
                // Check that not a whole Schema but only a part is included
                if (CSchematronXML.ELEMENT_SCHEMA.equals(aIncludedContent.getLocalName())) {
                    aErrorHandler.warn(aResource, null, "The included resource " + aIncludeRes + " seems to be a complete schema. To includes parts of a schema the respective element must be the root element of the included resource.");
                }
                // Recursive resolve includes
                if (_recursiveResolveAllSchematronIncludes(aIncludedContent, aIncludeRes, aSettings, aErrorHandler).isFailure())
                    return ESuccess.FAILURE;
                // Now replace "include" element with content in MicroDOM
                aElement.getParent().replaceChild(aElement, aIncludedContent);
            } catch (final IOException ex) {
                aErrorHandler.error(aResource, null, "Failed to read include '" + sHref + "'", ex);
                return ESuccess.FAILURE;
            }
        }
    }
    return ESuccess.SUCCESS;
}
Also used : Wrapper(com.helger.commons.wrapper.Wrapper) IMicroElement(com.helger.xml.microdom.IMicroElement) IReadableResource(com.helger.commons.io.resource.IReadableResource) IMicroNode(com.helger.xml.microdom.IMicroNode) IMicroDocument(com.helger.xml.microdom.IMicroDocument) IOException(java.io.IOException) DefaultHierarchyVisitorCallback(com.helger.commons.hierarchy.visit.DefaultHierarchyVisitorCallback) DefaultSchematronIncludeResolver(com.helger.schematron.resolve.DefaultSchematronIncludeResolver) Nonnull(javax.annotation.Nonnull) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Aggregations

IMicroDocument (com.helger.xml.microdom.IMicroDocument)2 IMicroElement (com.helger.xml.microdom.IMicroElement)2 IMicroNode (com.helger.xml.microdom.IMicroNode)2 CommonsArrayList (com.helger.commons.collection.impl.CommonsArrayList)1 DefaultHierarchyVisitorCallback (com.helger.commons.hierarchy.visit.DefaultHierarchyVisitorCallback)1 EHierarchyVisitorReturn (com.helger.commons.hierarchy.visit.EHierarchyVisitorReturn)1 IReadableResource (com.helger.commons.io.resource.IReadableResource)1 URLResource (com.helger.commons.io.resource.URLResource)1 Wrapper (com.helger.commons.wrapper.Wrapper)1 DefaultSchematronIncludeResolver (com.helger.schematron.resolve.DefaultSchematronIncludeResolver)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 File (java.io.File)1 IOException (java.io.IOException)1 Nonnull (javax.annotation.Nonnull)1