Search in sources :

Example 16 with ContentHandler

use of org.xml.sax.ContentHandler in project nokogiri by sparklemotion.

the class DOM2DTMExt method dispatchToEvents.

/**
     * Directly create SAX parser events from a subtree.
     *
     * @param nodeHandle The node ID.
     * @param ch A non-null reference to a ContentHandler.
     *
     * @throws org.xml.sax.SAXException
     */
public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) throws org.xml.sax.SAXException {
    TreeWalker treeWalker = m_walker;
    ContentHandler prevCH = treeWalker.getContentHandler();
    if (null != prevCH) {
        treeWalker = new TreeWalker(null);
    }
    treeWalker.setContentHandler(ch);
    try {
        Node node = getNode(nodeHandle);
        treeWalker.traverseFragment(node);
    } finally {
        treeWalker.setContentHandler(null);
    }
}
Also used : Node(org.w3c.dom.Node) TreeWalker(org.apache.xml.utils.TreeWalker) ContentHandler(org.xml.sax.ContentHandler)

Example 17 with ContentHandler

use of org.xml.sax.ContentHandler in project camel by apache.

the class StAXEndpoint method doStart.

@Override
protected void doStart() throws Exception {
    super.doStart();
    Processor target;
    if (EndpointHelper.isReferenceParameter(contentHandlerClass)) {
        ContentHandler handler = EndpointHelper.resolveReferenceParameter(getCamelContext(), contentHandlerClass.substring(1), ContentHandler.class, true);
        target = new StAXProcessor(handler);
    } else {
        Class<ContentHandler> clazz = getCamelContext().getClassResolver().resolveMandatoryClass(contentHandlerClass, ContentHandler.class);
        target = new StAXProcessor(clazz);
    }
    setProcessor(target);
}
Also used : Processor(org.apache.camel.Processor) ContentHandler(org.xml.sax.ContentHandler)

Example 18 with ContentHandler

use of org.xml.sax.ContentHandler in project camel by apache.

the class TikaProducer method doParse.

private Object doParse(Exchange exchange) throws TikaException, IOException, SAXException, TransformerConfigurationException {
    InputStream inputStream = exchange.getIn().getBody(InputStream.class);
    OutputStream result = new ByteArrayOutputStream();
    ContentHandler contentHandler = getContentHandler(this.tikaConfiguration, result);
    ParseContext context = new ParseContext();
    context.set(Parser.class, this.parser);
    Metadata metadata = new Metadata();
    this.parser.parse(inputStream, contentHandler, metadata, context);
    convertMetadataToHeaders(metadata, exchange);
    return result;
}
Also used : InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) ParseContext(org.apache.tika.parser.ParseContext) Metadata(org.apache.tika.metadata.Metadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) BoilerpipeContentHandler(org.apache.tika.parser.html.BoilerpipeContentHandler) ContentHandler(org.xml.sax.ContentHandler) ExpandedTitleContentHandler(org.apache.tika.sax.ExpandedTitleContentHandler)

Example 19 with ContentHandler

use of org.xml.sax.ContentHandler in project j2objc by google.

the class TransformerIdentityImpl method transform.

/**
   * Process the source tree to the output result.
   * @param source  The input for the source tree.
   *
   * @param outputTarget The output target.
   *
   * @throws TransformerException If an unrecoverable error occurs
   * during the course of the transformation.
   */
public void transform(Source source, Result outputTarget) throws TransformerException {
    createResultContentHandler(outputTarget);
    /*
     * According to JAXP1.2, new SAXSource()/StreamSource()
     * should create an empty input tree, with a default root node. 
     * new DOMSource()creates an empty document using DocumentBuilder.
     * newDocument(); Use DocumentBuilder.newDocument() for all 3 situations,
     * since there is no clear spec. how to create an empty tree when
     * both SAXSource() and StreamSource() are used.
     */
    if ((source instanceof StreamSource && source.getSystemId() == null && ((StreamSource) source).getInputStream() == null && ((StreamSource) source).getReader() == null) || (source instanceof SAXSource && ((SAXSource) source).getInputSource() == null && ((SAXSource) source).getXMLReader() == null) || (source instanceof DOMSource && ((DOMSource) source).getNode() == null)) {
        try {
            DocumentBuilderFactory builderF = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = builderF.newDocumentBuilder();
            String systemID = source.getSystemId();
            source = new DOMSource(builder.newDocument());
            // Copy system ID from original, empty Source to new Source
            if (systemID != null) {
                source.setSystemId(systemID);
            }
        } catch (ParserConfigurationException e) {
            throw new TransformerException(e.getMessage());
        }
    }
    try {
        if (source instanceof DOMSource) {
            DOMSource dsource = (DOMSource) source;
            m_systemID = dsource.getSystemId();
            Node dNode = dsource.getNode();
            if (null != dNode) {
                try {
                    if (dNode.getNodeType() == Node.ATTRIBUTE_NODE)
                        this.startDocument();
                    try {
                        if (dNode.getNodeType() == Node.ATTRIBUTE_NODE) {
                            String data = dNode.getNodeValue();
                            char[] chars = data.toCharArray();
                            characters(chars, 0, chars.length);
                        } else {
                            org.apache.xml.serializer.TreeWalker walker;
                            walker = new org.apache.xml.serializer.TreeWalker(this, m_systemID);
                            walker.traverse(dNode);
                        }
                    } finally {
                        if (dNode.getNodeType() == Node.ATTRIBUTE_NODE)
                            this.endDocument();
                    }
                } catch (SAXException se) {
                    throw new TransformerException(se);
                }
                return;
            } else {
                String messageStr = XSLMessages.createMessage(XSLTErrorResources.ER_ILLEGAL_DOMSOURCE_INPUT, null);
                throw new IllegalArgumentException(messageStr);
            }
        }
        InputSource xmlSource = SAXSource.sourceToInputSource(source);
        if (null == xmlSource) {
            //"Can't transform a Source of type "
            throw new TransformerException(XSLMessages.createMessage(XSLTErrorResources.ER_CANNOT_TRANSFORM_SOURCE_TYPE, new Object[] { source.getClass().getName() }));
        //+ source.getClass().getName() + "!");
        }
        if (null != xmlSource.getSystemId())
            m_systemID = xmlSource.getSystemId();
        XMLReader reader = null;
        boolean managedReader = false;
        try {
            if (source instanceof SAXSource) {
                reader = ((SAXSource) source).getXMLReader();
            }
            if (null == reader) {
                try {
                    reader = XMLReaderManager.getInstance().getXMLReader();
                    managedReader = true;
                } catch (SAXException se) {
                    throw new TransformerException(se);
                }
            } else {
                try {
                    reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
                } catch (org.xml.sax.SAXException se) {
                // We don't care.
                }
            }
            // Get the input content handler, which will handle the 
            // parse events and create the source tree. 
            ContentHandler inputHandler = this;
            reader.setContentHandler(inputHandler);
            if (inputHandler instanceof org.xml.sax.DTDHandler)
                reader.setDTDHandler((org.xml.sax.DTDHandler) inputHandler);
            try {
                if (inputHandler instanceof org.xml.sax.ext.LexicalHandler)
                    reader.setProperty("http://xml.org/sax/properties/lexical-handler", inputHandler);
                if (inputHandler instanceof org.xml.sax.ext.DeclHandler)
                    reader.setProperty("http://xml.org/sax/properties/declaration-handler", inputHandler);
            } catch (org.xml.sax.SAXException se) {
            }
            try {
                if (inputHandler instanceof org.xml.sax.ext.LexicalHandler)
                    reader.setProperty("http://xml.org/sax/handlers/LexicalHandler", inputHandler);
                if (inputHandler instanceof org.xml.sax.ext.DeclHandler)
                    reader.setProperty("http://xml.org/sax/handlers/DeclHandler", inputHandler);
            } catch (org.xml.sax.SAXNotRecognizedException snre) {
            }
            reader.parse(xmlSource);
        } catch (org.apache.xml.utils.WrappedRuntimeException wre) {
            Throwable throwable = wre.getException();
            while (throwable instanceof org.apache.xml.utils.WrappedRuntimeException) {
                throwable = ((org.apache.xml.utils.WrappedRuntimeException) throwable).getException();
            }
            throw new TransformerException(wre.getException());
        } catch (org.xml.sax.SAXException se) {
            throw new TransformerException(se);
        } catch (IOException ioe) {
            throw new TransformerException(ioe);
        } finally {
            if (managedReader) {
                XMLReaderManager.getInstance().releaseXMLReader(reader);
            }
        }
    } finally {
        if (null != m_outputStream) {
            try {
                m_outputStream.close();
            } catch (IOException ioe) {
            }
            m_outputStream = null;
        }
    }
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) InputSource(org.xml.sax.InputSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Node(org.w3c.dom.Node) ContentHandler(org.xml.sax.ContentHandler) SAXException(org.xml.sax.SAXException) DeclHandler(org.xml.sax.ext.DeclHandler) LexicalHandler(org.xml.sax.ext.LexicalHandler) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) TransformerException(javax.xml.transform.TransformerException) XMLReader(org.xml.sax.XMLReader) StreamSource(javax.xml.transform.stream.StreamSource) IOException(java.io.IOException) SAXSource(javax.xml.transform.sax.SAXSource) DocumentBuilder(javax.xml.parsers.DocumentBuilder) SAXException(org.xml.sax.SAXException) DTDHandler(org.xml.sax.DTDHandler)

Example 20 with ContentHandler

use of org.xml.sax.ContentHandler in project j2objc by google.

the class TransformerImpl method transformToRTF.

/**
   * Given a stylesheet element, create a result tree fragment from it's
   * contents.
   * @param templateParent The template element that holds the fragment.
   * @param dtmFrag The DTM to write the RTF into
   * @return the NodeHandle for the root node of the resulting RTF.
   *
   * @throws TransformerException
   * @xsl.usage advanced
   */
private int transformToRTF(ElemTemplateElement templateParent, DTM dtmFrag) throws TransformerException {
    XPathContext xctxt = m_xcontext;
    ContentHandler rtfHandler = dtmFrag.getContentHandler();
    // Obtain the ResultTreeFrag's root node.
    // NOTE: In SAX2RTFDTM, this value isn't available until after
    // the startDocument has been issued, so assignment has been moved
    // down a bit in the code.
    // not yet reliably = dtmFrag.getDocument();
    int resultFragment;
    // Save the current result tree handler.
    SerializationHandler savedRTreeHandler = this.m_serializationHandler;
    // And make a new handler for the RTF.
    ToSAXHandler h = new ToXMLSAXHandler();
    h.setContentHandler(rtfHandler);
    h.setTransformer(this);
    // Replace the old handler (which was already saved)
    m_serializationHandler = h;
    // use local variable for the current handler
    SerializationHandler rth = m_serializationHandler;
    try {
        rth.startDocument();
        // startDocument is "bottlenecked" in RTH. We need it acted upon immediately,
        // to set the DTM's state as in-progress, so that if the xsl:variable's body causes
        // further RTF activity we can keep that from bashing this DTM.
        rth.flushPending();
        try {
            // Do the transformation of the child elements.
            executeChildTemplates(templateParent, true);
            // Make sure everything is flushed!
            rth.flushPending();
            // Get the document ID. May not exist until the RTH has not only
            // received, but flushed, the startDocument, and may be invalid
            // again after the document has been closed (still debating that)
            // ... so waiting until just before the end seems simplest/safest. 
            resultFragment = dtmFrag.getDocument();
        } finally {
            rth.endDocument();
        }
    } catch (org.xml.sax.SAXException se) {
        throw new TransformerException(se);
    } finally {
        // Restore the previous result tree handler.
        this.m_serializationHandler = savedRTreeHandler;
    }
    return resultFragment;
}
Also used : ToSAXHandler(org.apache.xml.serializer.ToSAXHandler) SAXException(org.xml.sax.SAXException) SerializationHandler(org.apache.xml.serializer.SerializationHandler) XPathContext(org.apache.xpath.XPathContext) ContentHandler(org.xml.sax.ContentHandler) ToXMLSAXHandler(org.apache.xml.serializer.ToXMLSAXHandler) TransformerException(javax.xml.transform.TransformerException)

Aggregations

ContentHandler (org.xml.sax.ContentHandler)351 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)229 Metadata (org.apache.tika.metadata.Metadata)228 InputStream (java.io.InputStream)210 Test (org.junit.Test)208 ParseContext (org.apache.tika.parser.ParseContext)163 Parser (org.apache.tika.parser.Parser)105 TikaTest (org.apache.tika.TikaTest)103 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)102 TikaInputStream (org.apache.tika.io.TikaInputStream)75 ByteArrayInputStream (java.io.ByteArrayInputStream)63 SAXException (org.xml.sax.SAXException)40 IOException (java.io.IOException)34 TeeContentHandler (org.apache.tika.sax.TeeContentHandler)27 TikaException (org.apache.tika.exception.TikaException)24 ExcelParserTest (org.apache.tika.parser.microsoft.ExcelParserTest)24 WordParserTest (org.apache.tika.parser.microsoft.WordParserTest)24 AttributesImpl (org.xml.sax.helpers.AttributesImpl)21 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)20 InputSource (org.xml.sax.InputSource)20