use of org.apache.xerces.parsers.DOMParser in project nutch by apache.
the class TestNodeWalker method testSkipChildren.
@Test
public void testSkipChildren() {
DOMParser parser = new DOMParser();
try {
parser.setFeature("http://xml.org/sax/features/validation", false);
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
} catch (Exception e) {
e.printStackTrace();
}
StringBuffer sb = new StringBuffer();
NodeWalker walker = new NodeWalker(parser.getDocument());
while (walker.hasNext()) {
Node currentNode = walker.nextNode();
short nodeType = currentNode.getNodeType();
if (nodeType == Node.TEXT_NODE) {
String text = currentNode.getNodeValue();
text = text.replaceAll("\\s+", " ");
sb.append(text);
}
}
Assert.assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString()));
StringBuffer sbSkip = new StringBuffer();
NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
while (walkerSkip.hasNext()) {
Node currentNode = walkerSkip.nextNode();
String nodeName = currentNode.getNodeName();
short nodeType = currentNode.getNodeType();
if ("ul".equalsIgnoreCase(nodeName)) {
walkerSkip.skipChildren();
}
if (nodeType == Node.TEXT_NODE) {
String text = currentNode.getNodeValue();
text = text.replaceAll("\\s+", " ");
sbSkip.append(text);
}
}
Assert.assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString()));
}
use of org.apache.xerces.parsers.DOMParser in project mondrian by pentaho.
the class XmlUtil method getParser.
// ////////////////////////////////////////////////////////////////////////
// parse
// ////////////////////////////////////////////////////////////////////////
/**
* Get your non-cached DOM parser which can be configured to do schema
* based validation of the instance Document.
*/
public static DOMParser getParser(String schemaLocationPropertyValue, EntityResolver entityResolver, boolean validate) throws SAXNotRecognizedException, SAXNotSupportedException {
boolean doingValidation = (validate || (schemaLocationPropertyValue != null));
DOMParser parser = new DOMParser();
parser.setEntityResolver(entityResolver);
parser.setErrorHandler(new SaxErrorHandler());
parser.setFeature(DEFER_NODE_EXPANSION, false);
parser.setFeature(NAMESPACES_FEATURE_ID, true);
parser.setFeature(SCHEMA_VALIDATION_FEATURE_ID, doingValidation);
parser.setFeature(VALIDATION_FEATURE_ID, doingValidation);
if (schemaLocationPropertyValue != null) {
parser.setProperty(SCHEMA_LOCATION, schemaLocationPropertyValue.replace('\\', '/'));
}
return parser;
}
use of org.apache.xerces.parsers.DOMParser in project cogtool by cogtool.
the class ImportCogToolXML method importXML.
public boolean importXML(Reader input, String imageDirPath, TaskParent taskParent, CognitiveModelGenerator modelGen) throws IOException, SAXException {
modelGenerator = modelGen;
// Create a Xerces DOM Parser
DOMParser parser = new DOMParser();
// Set the path for loading images
directoryPath = imageDirPath;
// Parse the Document and traverse the DOM
parser.parse(new InputSource(input));
Document document = parser.getDocument();
parseFile(document, taskParent);
if (failedImages.size() > 0) {
String failedImageString = "Failed to load the following images:";
Iterator<String> fIter = failedImages.iterator();
while (fIter.hasNext()) {
failedImageString += System.getProperty("file.separator") + fIter.next();
}
throw new GraphicsUtil.ImageException(failedImageString);
}
return true;
}
use of org.apache.xerces.parsers.DOMParser in project ACS by ACS-Community.
the class EbeDocument method load.
/** Load the data from the selected path */
public void load() {
if (parser == null) {
parser = new DOMParser();
}
try {
parser.parse(path);
doc = parser.getDocument();
} catch (Exception e) {
e.getMessage();
}
nodes.clear();
// Only one Type
Node no = doc.getElementsByTagName(EbeDocument.getClassType().name).item(0);
readNode(no, this);
// Check for Completions
NodeList completionNodes = doc.getElementsByTagName(Completion.getClassType().name);
for (int i = 0; i < completionNodes.getLength(); i++) {
Node coNo = completionNodes.item(i);
Completion coObj = new Completion();
readNode(coNo, coObj);
this.putNode(coObj);
}
NodeList errorNodes = doc.getElementsByTagName(Error.getClassType().name);
for (int i = 0; i < errorNodes.getLength(); i++) {
Node erNo = errorNodes.item(i);
Error erObj = new Error();
readNode(erNo, erObj);
Node meNo = erNo.getFirstChild();
while (meNo != null) {
if (meNo.getNodeName().matches("Member")) {
Member meObj = new Member();
readNode(meNo, meObj);
erObj.putMember(meObj);
}
meNo = meNo.getNextSibling();
}
this.putNode(erObj);
}
}
use of org.apache.xerces.parsers.DOMParser in project ACS by ACS-Community.
the class XmlSeeker method getXmls.
/**
* Get an Arraylist with the XMLs on the added dirs that conforms with the xsd filename.
* @param xsd the filename of the xsd to seek inside the XML files
* @return the XMLs list */
public ArrayList getXmls(String xsd) {
File[] fileArr;
ArrayList<File> files = new ArrayList<File>();
File[] dirArr = new File[dirs.size()];
dirs.toArray(dirArr);
AcsFileFinder fileFinder = new AcsFileFinder(dirArr, this, null);
fileArr = fileFinder.getAllFiles();
for (int i = 0; i < fileArr.length; i++) {
DOMParser dp;
dp = new DOMParser();
try {
dp.parse(fileArr[i].getAbsolutePath());
} catch (Exception e) {
e.printStackTrace();
}
Document doc = dp.getDocument();
// Hack to support both Errors and Logs
// TODO: Please do this thing in a generic way
Node typeNode = doc.getElementsByTagName("Type").item(0);
// Type node may not even exist
if (typeNode != null) {
NamedNodeMap atributes = typeNode.getAttributes();
Node schema = atributes.getNamedItem("xsi:schemaLocation");
if (schema.getNodeValue().contentEquals(xsd))
files.add(fileArr[i]);
}
}
return (files);
}
Aggregations