use of com.helger.xml.microdom.IMicroNode in project ph-css by phax.
the class MainFetchW3C_CSSTests method _fetch.
private static void _fetch(final String sURL, final String sDestDir) throws MalformedURLException {
final ICommonsList<String> aCSSFilenames = new CommonsArrayList<>();
System.out.println("Fetching from " + sURL);
final ICommonsList<String> aIndex = StreamHelper.readStreamLines(new URLResource(sURL + "index.html"), StandardCharsets.UTF_8);
{
// Remove doctype
aIndex.remove(0);
// Fix HTML to be XML
for (int i = 0; i < aIndex.size(); ++i) {
final String sLine = aIndex.get(i);
if (sLine.contains("<link"))
aIndex.set(i, sLine + "</link>");
}
}
final IMicroDocument aDoc = MicroReader.readMicroXML(StringHelper.getImploded('\n', aIndex));
MicroVisitor.visit(aDoc, new DefaultHierarchyVisitorCallback<IMicroNode>() {
@Override
public EHierarchyVisitorReturn onItemBeforeChildren(final IMicroNode aItem) {
if (aItem.isElement()) {
final IMicroElement e = (IMicroElement) aItem;
if (e.getTagName().equals("a")) {
final String sHref = e.getAttributeValue("href");
if (sHref.endsWith(".xml"))
aCSSFilenames.add(StringHelper.replaceAll(sHref, ".xml", ".css"));
}
}
return EHierarchyVisitorReturn.CONTINUE;
}
});
System.out.println("Fetching a total of " + aCSSFilenames.size() + " files");
int i = 0;
for (final String sCSSFilename : aCSSFilenames) {
System.out.println(" " + (++i) + ".: " + sCSSFilename);
final String sContent = StreamHelper.getAllBytesAsString(new URLResource(sURL + sCSSFilename), StandardCharsets.UTF_8);
SimpleFileIO.writeFile(new File(sDestDir, sCSSFilename), sContent, StandardCharsets.UTF_8);
}
}
use of com.helger.xml.microdom.IMicroNode in project ph-schematron by phax.
the class SchematronHelper method _recursiveResolveAllSchematronIncludes.
@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
@Nonnull
private static ESuccess _recursiveResolveAllSchematronIncludes(@Nonnull final IMicroElement eRoot, @Nonnull final IReadableResource aResource, @Nullable final ISAXReaderSettings aSettings, @Nonnull final IPSErrorHandler aErrorHandler) {
if (eRoot != null) {
final DefaultSchematronIncludeResolver aIncludeResolver = new DefaultSchematronIncludeResolver(aResource);
for (final IMicroElement aElement : eRoot.getAllChildElementsRecursive()) if (CSchematron.NAMESPACE_SCHEMATRON.equals(aElement.getNamespaceURI()) && aElement.getLocalName().equals(CSchematronXML.ELEMENT_INCLUDE)) {
String sHref = aElement.getAttributeValue(CSchematronXML.ATTR_HREF);
try {
final int nHashIndex = sHref.indexOf('#');
String sAnchor = null;
if (nHashIndex >= 0) {
sAnchor = sHref.substring(nHashIndex + 1);
sHref = sHref.substring(0, nHashIndex);
}
final IReadableResource aIncludeRes = aIncludeResolver.getResolvedSchematronResource(sHref);
if (aIncludeRes == null) {
aErrorHandler.error(aResource, null, "Failed to resolve include '" + sHref + "'", null);
return ESuccess.FAILURE;
}
if (s_aLogger.isDebugEnabled())
s_aLogger.debug("Resolved '" + sHref + "' relative to '" + aIncludeResolver.getBaseHref() + "' as '" + aIncludeRes.getPath() + "'");
// Read XML to be included
final IMicroDocument aIncludedDoc = MicroReader.readMicroXML(aIncludeRes, aSettings);
if (aIncludedDoc == null) {
aErrorHandler.error(aResource, null, "Failed to parse include " + aIncludeRes, null);
return ESuccess.FAILURE;
}
IMicroElement aIncludedContent;
if (sAnchor == null) {
// no anchor present - include the whole document
// Return the document element
aIncludedContent = aIncludedDoc.getDocumentElement();
} else {
final String sFinalAnchor = sAnchor;
final Wrapper<IMicroElement> aMatch = new Wrapper<>();
// Also include the root element in the search
ChildrenProviderHierarchyVisitor.visitFrom(aIncludedDoc.getDocumentElement(), new DefaultHierarchyVisitorCallback<IMicroNode>() {
@Override
public EHierarchyVisitorReturn onItemBeforeChildren(final IMicroNode aItem) {
if (aItem.isElement()) {
final IMicroElement aCurElement = (IMicroElement) aItem;
final String sID = aCurElement.getAttributeValue("id");
if (sFinalAnchor.equals(sID))
aMatch.set(aCurElement);
}
return EHierarchyVisitorReturn.CONTINUE;
}
}, true);
aIncludedContent = aMatch.get();
if (aIncludedContent == null) {
aErrorHandler.warn(aResource, null, "Failed to resolve an element with the ID '" + sAnchor + "' in " + aIncludeRes + "! Therefore including the whole document!");
aIncludedContent = aIncludedDoc.getDocumentElement();
}
}
// Important to detach from parent!
aIncludedContent.detachFromParent();
// Check for correct namespace URI of included content
if (!CSchematron.NAMESPACE_SCHEMATRON.equals(aIncludedContent.getNamespaceURI())) {
aErrorHandler.error(aResource, null, "The included resource " + aIncludeRes + " contains the wrong XML namespace URI '" + aIncludedContent.getNamespaceURI() + "' but was expected to have '" + CSchematron.NAMESPACE_SCHEMATRON + "'", null);
return ESuccess.FAILURE;
}
// Check that not a whole Schema but only a part is included
if (CSchematronXML.ELEMENT_SCHEMA.equals(aIncludedContent.getLocalName())) {
aErrorHandler.warn(aResource, null, "The included resource " + aIncludeRes + " seems to be a complete schema. To includes parts of a schema the respective element must be the root element of the included resource.");
}
// Recursive resolve includes
if (_recursiveResolveAllSchematronIncludes(aIncludedContent, aIncludeRes, aSettings, aErrorHandler).isFailure())
return ESuccess.FAILURE;
// Now replace "include" element with content in MicroDOM
aElement.getParent().replaceChild(aElement, aIncludedContent);
} catch (final IOException ex) {
aErrorHandler.error(aResource, null, "Failed to read include '" + sHref + "'", ex);
return ESuccess.FAILURE;
}
}
}
return ESuccess.SUCCESS;
}
Aggregations