use of org.w3c.dom.DocumentType in project nokogiri by sparklemotion.
the class XmlDtd method newEmpty.
public static XmlDtd newEmpty(Ruby runtime, Document doc, IRubyObject name, IRubyObject external_id, IRubyObject system_id) {
DocumentType placeholder;
if (doc.getDoctype() == null) {
String javaName = NokogiriHelpers.rubyStringToString(name);
String javaExternalId = NokogiriHelpers.rubyStringToString(external_id);
String javaSystemId = NokogiriHelpers.rubyStringToString(system_id);
placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId);
doc.appendChild(placeholder);
} else {
placeholder = doc.getDoctype();
}
// FIXME: what if the document had a doc type, why are we here ?
XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
dtd.setNode(runtime, placeholder);
dtd.name = name;
dtd.pubId = external_id;
dtd.sysId = system_id;
return dtd;
}
use of org.w3c.dom.DocumentType in project nokogiri by sparklemotion.
the class XmlDtd method accept.
@Override
public void accept(ThreadContext context, SaveContextVisitor visitor) {
// since we use nekoDTD to parse dtd, node might be ElementImpl type
// An external subset doesn't need to show up, so this method just see docType.
DocumentType docType = node.getOwnerDocument().getDoctype();
visitor.enter(docType);
visitor.leave(docType);
}
use of org.w3c.dom.DocumentType in project nokogiri by sparklemotion.
the class DOM2DTM method getUnparsedEntityURI.
/**
* The getUnparsedEntityURI function returns the URI of the unparsed
* entity with the specified name in the same document as the context
* node (see [3.3 Unparsed Entities]). It returns the empty string if
* there is no such entity.
* <p>
* XML processors may choose to use the System Identifier (if one
* is provided) to resolve the entity, rather than the URI in the
* Public Identifier. The details are dependent on the processor, and
* we would have to support some form of plug-in resolver to handle
* this properly. Currently, we simply return the System Identifier if
* present, and hope that it a usable URI or that our caller can
* map it to one.
* TODO: Resolve Public Identifiers... or consider changing function name.
* <p>
* If we find a relative URI
* reference, XML expects it to be resolved in terms of the base URI
* of the document. The DOM doesn't do that for us, and it isn't
* entirely clear whether that should be done here; currently that's
* pushed up to a higher level of our application. (Note that DOM Level
* 1 didn't store the document's base URI.)
* TODO: Consider resolving Relative URIs.
* <p>
* (The DOM's statement that "An XML processor may choose to
* completely expand entities before the structure model is passed
* to the DOM" refers only to parsed entities, not unparsed, and hence
* doesn't affect this function.)
*
* @param name A string containing the Entity Name of the unparsed
* entity.
*
* @return String containing the URI of the Unparsed Entity, or an
* empty string if no such entity exists.
*/
public String getUnparsedEntityURI(String name) {
String url = "";
Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) ? (Document) m_root : m_root.getOwnerDocument();
if (null != doc) {
DocumentType doctype = doc.getDoctype();
if (null != doctype) {
NamedNodeMap entities = doctype.getEntities();
if (null == entities) {
return url;
}
Entity entity = (Entity) entities.getNamedItem(name);
if (null == entity) {
return url;
}
String notationName = entity.getNotationName();
if (null != notationName) {
// then it's unparsed
// The draft says: "The XSLT processor may use the public
// identifier to generate a URI for the entity instead of the URI
// specified in the system identifier. If the XSLT processor does
// not use the public identifier to generate the URI, it must use
// the system identifier; if the system identifier is a relative
// URI, it must be resolved into an absolute URI using the URI of
// the resource containing the entity declaration as the base
// URI [RFC2396]."
// So I'm falling a bit short here.
url = entity.getSystemId();
if (null == url) {
url = entity.getPublicId();
} else {
// This should be resolved to an absolute URL, but that's hard
// to do from here.
}
}
}
}
return url;
}
use of org.w3c.dom.DocumentType in project generator by mybatis.
the class XmlFileMergerJaxp method getMergedSource.
public static String getMergedSource(InputSource newFile, InputSource existingFile, String existingFileName) throws IOException, SAXException, ParserConfigurationException, ShellException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
factory.setExpandEntityReferences(false);
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new NullEntityResolver());
Document existingDocument = builder.parse(existingFile);
Document newDocument = builder.parse(newFile);
DocumentType newDocType = newDocument.getDoctype();
DocumentType existingDocType = existingDocument.getDoctype();
if (!newDocType.getName().equals(existingDocType.getName())) {
throw new ShellException(getString(// $NON-NLS-1$
"Warning.12", existingFileName));
}
Element existingRootElement = existingDocument.getDocumentElement();
Element newRootElement = newDocument.getDocumentElement();
// reconcile the root element attributes -
// take all attributes from the new element and add to the existing
// element
// remove all attributes from the existing root element
NamedNodeMap attributes = existingRootElement.getAttributes();
int attributeCount = attributes.getLength();
for (int i = attributeCount - 1; i >= 0; i--) {
Node node = attributes.item(i);
existingRootElement.removeAttribute(node.getNodeName());
}
// add attributes from the new root node to the old root node
attributes = newRootElement.getAttributes();
attributeCount = attributes.getLength();
for (int i = 0; i < attributeCount; i++) {
Node node = attributes.item(i);
existingRootElement.setAttribute(node.getNodeName(), node.getNodeValue());
}
// remove the old generated elements and any
// white space before the old nodes
List<Node> nodesToDelete = new ArrayList<>();
NodeList children = existingRootElement.getChildNodes();
int length = children.getLength();
for (int i = 0; i < length; i++) {
Node node = children.item(i);
if (isGeneratedNode(node)) {
nodesToDelete.add(node);
} else if (isWhiteSpace(node) && isGeneratedNode(children.item(i + 1))) {
nodesToDelete.add(node);
}
}
for (Node node : nodesToDelete) {
existingRootElement.removeChild(node);
}
// add the new generated elements
children = newRootElement.getChildNodes();
length = children.getLength();
Node firstChild = existingRootElement.getFirstChild();
for (int i = 0; i < length; i++) {
Node node = children.item(i);
// don't add the last node if it is only white space
if (i == length - 1 && isWhiteSpace(node)) {
break;
}
Node newNode = existingDocument.importNode(node, true);
if (firstChild == null) {
existingRootElement.appendChild(newNode);
} else {
existingRootElement.insertBefore(newNode, firstChild);
}
}
// pretty print the result
return prettyPrint(existingDocument);
}
use of org.w3c.dom.DocumentType in project jsoup by jhy.
the class W3CDomTest method convertsGoogle.
@Test
public void convertsGoogle() throws IOException {
File in = ParseTest.getFile("/htmltests/google-ipod.html.gz");
org.jsoup.nodes.Document doc = Jsoup.parse(in, "UTF8");
W3CDom w3c = new W3CDom();
Document wDoc = w3c.fromJsoup(doc);
Node htmlEl = wDoc.getChildNodes().item(1);
assertNull(htmlEl.getNamespaceURI());
assertEquals("html", htmlEl.getLocalName());
assertEquals("html", htmlEl.getNodeName());
DocumentType doctype = wDoc.getDoctype();
Node doctypeNode = wDoc.getChildNodes().item(0);
assertSame(doctype, doctypeNode);
assertEquals("html", doctype.getName());
String xml = W3CDom.asString(wDoc, W3CDom.OutputXml());
assertTrue(xml.contains("ipod"));
Document roundTrip = parseXml(xml, true);
assertEquals("Images", roundTrip.getElementsByTagName("a").item(0).getTextContent());
}
Aggregations