use of org.exist.dom.memtree.SAXAdapter in project exist by eXist-db.
the class TestCase method loadVarFromURI.
public NodeImpl loadVarFromURI(XQueryContext context, String uri) throws IOException {
SAXAdapter adapter = new SAXAdapter(context);
SAXParserFactory factory = ExistSAXParserFactory.getSAXParserFactory();
factory.setNamespaceAware(true);
XMLReader xr;
try {
SAXParser parser = factory.newSAXParser();
xr = parser.getXMLReader();
xr.setContentHandler(adapter);
xr.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter);
} catch (Exception e) {
throw new IOException(e);
}
try (final InputStreamReader isr = new InputStreamReader(new FileInputStream(uri), "UTF-8")) {
// URL url = new URL(uri);
// InputStreamReader isr = new InputStreamReader(url.openStream(), "UTF-8");
InputSource src = new InputSource(isr);
xr.parse(src);
adapter.getDocument().setDocumentURI(new File(uri).getAbsoluteFile().toString());
return (NodeImpl) adapter.getDocument();
} catch (SAXException e) {
// workaround BOM
if (e.getMessage().equals("Content is not allowed in prolog.")) {
try {
String xml = readFileAsString(Paths.get(uri));
xml = xml.trim().replaceFirst("^([\\W]+)<", "<");
InputSource src = new InputSource(new StringReader(xml));
xr.parse(src);
adapter.getDocument().setDocumentURI(new File(uri).getAbsoluteFile().toString());
return (NodeImpl) adapter.getDocument();
} catch (SAXException e1) {
throw new IOException(e);
}
}
throw new IOException(e);
}
}
use of org.exist.dom.memtree.SAXAdapter in project exist by eXist-db.
the class CollectionTest method asInMemoryDocument.
private DocumentImpl asInMemoryDocument(final String doc) throws XPathException {
try {
final SAXAdapter saxAdapter = new SAXAdapter();
final SAXParser saxParser = saxParserFactory.newSAXParser();
final XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setContentHandler(saxAdapter);
xmlReader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, saxAdapter);
try (final Reader reader = new StringReader(doc)) {
xmlReader.parse(new InputSource(reader));
}
return saxAdapter.getDocument();
} catch (final ParserConfigurationException | SAXException | IOException e) {
throw new XPathException("Unable to parse document", e);
}
}
use of org.exist.dom.memtree.SAXAdapter in project exist by eXist-db.
the class GetIndexStatistics method eval.
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
final IndexStatistics index = (IndexStatistics) context.getBroker().getBrokerPool().getIndexManager().getIndexById(IndexStatistics.ID);
if (index == null) {
// module may not be enabled
return Sequence.EMPTY_SEQUENCE;
}
final SAXAdapter adapter = new SAXAdapter(context);
try {
adapter.startDocument();
index.toSAX(adapter);
adapter.endDocument();
} catch (final SAXException e) {
throw new XPathException(this, "Error caught while retrieving statistics: " + e.getMessage(), e);
}
final DocumentImpl doc = (DocumentImpl) adapter.getDocument();
return (NodeImpl) doc.getFirstChild();
}
use of org.exist.dom.memtree.SAXAdapter in project exist by eXist-db.
the class RESTServer method parseXML.
private ElementImpl parseXML(final BrokerPool pool, final String content, final NamespaceExtractor nsExtractor) throws SAXException, IOException {
final InputSource src = new InputSource(new StringReader(content));
final XMLReaderPool parserPool = pool.getParserPool();
XMLReader reader = null;
try {
reader = parserPool.borrowXMLReader();
final SAXAdapter adapter = new SAXAdapter();
nsExtractor.setContentHandler(adapter);
reader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter);
nsExtractor.setParent(reader);
nsExtractor.parse(src);
final Document doc = adapter.getDocument();
return (ElementImpl) doc.getDocumentElement();
} finally {
if (reader != null) {
parserPool.returnXMLReader(reader);
}
}
}
use of org.exist.dom.memtree.SAXAdapter in project exist by eXist-db.
the class ModuleUtils method htmlToXHtml.
/**
* Takes a HTML InputSource and creates an XML representation of the HTML by
* tidying it
*
* @param context
* The Context of the calling XQuery
* @param srcHtml
* The InputSource for the HTML
* @param parserFeatures
* The features to set on the Parser
* @param parserProperties
* The properties to set on the Parser
*
* @throws SAXException in case of a SAX error
* @throws IOException in case of error reading input source
* @return An in-memory Document representing the XML'ised HTML
*/
public static DocumentImpl htmlToXHtml(final XQueryContext context, final InputSource srcHtml, final Map<String, Boolean> parserFeatures, final Map<String, String> parserProperties) throws IOException, SAXException {
// use the configures HTML parser to parse the HTML content to XML
final Optional<Either<Throwable, XMLReader>> maybeReaderInst = HtmlToXmlParser.getHtmlToXmlParser(context.getBroker().getConfiguration());
if (maybeReaderInst.isPresent()) {
final Either<Throwable, XMLReader> readerInst = maybeReaderInst.get();
if (readerInst.isLeft()) {
final String msg = "Unable to parse HTML to XML please ensure the parser is configured in conf.xml and is present on the classpath";
final Throwable t = readerInst.left().get();
LOG.error(msg, t);
throw new IOException(msg, t);
} else {
final XMLReader reader = readerInst.right().get();
if (parserFeatures != null) {
for (final Map.Entry<String, Boolean> parserFeature : parserFeatures.entrySet()) {
reader.setFeature(parserFeature.getKey(), parserFeature.getValue());
}
}
if (parserProperties != null) {
for (final Map.Entry<String, String> parserProperty : parserProperties.entrySet()) {
reader.setProperty(parserProperty.getKey(), parserProperty.getValue());
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Converting HTML to XML using: {}", reader.getClass().getName());
}
final SAXAdapter adapter = new SAXAdapter();
// allow multiple attributes of the same name attached to the same element
// to enhance resilience against bad HTML. The last attribute value wins.
adapter.setReplaceAttributeFlag(true);
reader.setContentHandler(adapter);
reader.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter);
reader.parse(srcHtml);
final Document doc = adapter.getDocument();
// we use eXist's in-memory DOM implementation
final DocumentImpl memtreeDoc = (DocumentImpl) doc;
memtreeDoc.setContext(context);
return memtreeDoc;
}
} else {
throw new SAXException("There is no HTML to XML parser configured in conf.xml");
}
}
Aggregations