Search in sources :

Example 1 with BoundedInputStream

use of nu.validator.io.BoundedInputStream in project validator by validator.

the class PrudentHttpEntityResolver method resolveEntity.

/**
 * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
 *      java.lang.String)
 */
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
    if (requestsLeft > -1) {
        if (requestsLeft == 0) {
            throw new IOException("Number of permitted HTTP requests exceeded.");
        } else {
            requestsLeft--;
        }
    }
    HttpGet m = null;
    try {
        URL url = null;
        try {
            url = URL.parse(systemId);
        } catch (GalimatiasParseException e) {
            IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
            SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw ioe;
        }
        String scheme = url.scheme();
        if (!("http".equals(scheme) || "https".equals(scheme))) {
            String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D.";
            SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        systemId = url.toString();
        try {
            m = new HttpGet(systemId);
        } catch (IllegalArgumentException e) {
            SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        m.setHeader("User-Agent", userAgent);
        m.setHeader("Accept", buildAccept());
        m.setHeader("Accept-Encoding", "gzip");
        if (request != null && request.getAttribute("http://validator.nu/properties/accept-language") != null) {
            m.setHeader("Accept-Language", (String) request.getAttribute("http://validator.nu/properties/accept-language"));
        }
        log4j.info(systemId);
        try {
            if (url.port() > 65535) {
                throw new IOException("Port number must be less than 65536.");
            }
        } catch (NumberFormatException e) {
            throw new IOException("Port number must be less than 65536.");
        }
        HttpResponse response = client.execute(m);
        boolean ignoreResponseStatus = false;
        if (request != null && request.getAttribute("http://validator.nu/properties/ignore-response-status") != null) {
            ignoreResponseStatus = (boolean) request.getAttribute("http://validator.nu/properties/ignore-response-status");
        }
        int statusCode = response.getStatusLine().getStatusCode();
        if (statusCode != 200 && !ignoreResponseStatus) {
            String msg = "HTTP resource not retrievable." + " The HTTP status from the remote server was: " + statusCode + ".";
            SAXParseException spe = new SAXParseException(msg, publicId, m.getURI().toString(), -1, -1, new SystemIdIOException(m.getURI().toString(), msg));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw new ResourceNotRetrievableException(String.format("%s: %s", m.getURI().toString(), msg));
        }
        HttpEntity entity = response.getEntity();
        long len = entity.getContentLength();
        if (sizeLimit > -1 && len > sizeLimit) {
            SAXParseException spe = new SAXParseException("Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit."));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        TypedInputSource is;
        org.apache.http.Header ct = response.getFirstHeader("Content-Type");
        String contentType = null;
        final String baseUri = m.getURI().toString();
        if (ct != null) {
            contentType = ct.getValue();
        }
        is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType);
        Header cl = response.getFirstHeader("Content-Language");
        if (cl != null) {
            is.setLanguage(cl.getValue().trim());
        }
        Header xuac = response.getFirstHeader("X-UA-Compatible");
        if (xuac != null) {
            String val = xuac.getValue().trim();
            if (!"ie=edge".equalsIgnoreCase(val)) {
                SAXParseException spe = new SAXParseException("X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1);
                errorHandler.error(spe);
            }
        }
        Header csp = response.getFirstHeader("Content-Security-Policy");
        if (csp != null) {
            try {
                ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim());
            } catch (DatatypeException e) {
                SAXParseException spe = new SAXParseException("Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1);
                Html5DatatypeException ex5 = (Html5DatatypeException) e;
                if (ex5.isWarning()) {
                    errorHandler.warning(spe);
                } else {
                    errorHandler.error(spe);
                }
            }
        }
        final HttpGet meth = m;
        InputStream stream = entity.getContent();
        if (sizeLimit > -1) {
            stream = new BoundedInputStream(stream, sizeLimit, baseUri);
        }
        Header ce = response.getFirstHeader("Content-Encoding");
        if (ce != null) {
            String val = ce.getValue().trim();
            if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) {
                stream = new GZIPInputStream(stream);
                if (sizeLimit > -1) {
                    stream = new BoundedInputStream(stream, sizeLimit, baseUri);
                }
            }
        }
        is.setByteStream(new ObservableInputStream(stream, new StreamObserver() {

            private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver");

            private boolean released = false;

            @Override
            public void closeCalled() {
                log4j.debug("closeCalled");
                if (!released) {
                    log4j.debug("closeCalled, not yet released");
                    released = true;
                    try {
                        meth.releaseConnection();
                    } catch (Exception e) {
                        log4j.debug("closeCalled, releaseConnection", e);
                    }
                }
            }

            @Override
            public void exceptionOccurred(Exception ex) throws IOException {
                if (!released) {
                    released = true;
                    try {
                        meth.abort();
                    } catch (Exception e) {
                        log4j.debug("exceptionOccurred, abort", e);
                    } finally {
                        try {
                            meth.releaseConnection();
                        } catch (Exception e) {
                            log4j.debug("exceptionOccurred, releaseConnection", e);
                        }
                    }
                }
                if (ex instanceof SystemIdIOException) {
                    throw (SystemIdIOException) ex;
                } else if (ex instanceof IOException) {
                    IOException ioe = (IOException) ex;
                    throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe);
                } else if (ex instanceof RuntimeException) {
                    throw (RuntimeException) ex;
                } else {
                    throw new RuntimeException("API contract violation. Wrong exception type.", ex);
                }
            }

            @Override
            public void finalizerCalled() {
                if (!released) {
                    released = true;
                    try {
                        meth.abort();
                    } catch (Exception e) {
                        log4j.debug("finalizerCalled, abort", e);
                    } finally {
                        try {
                            meth.releaseConnection();
                        } catch (Exception e) {
                            log4j.debug("finalizerCalled, releaseConnection", e);
                        }
                    }
                }
            }
        }));
        return is;
    } catch (IOException | RuntimeException | SAXException e) {
        if (m != null) {
            try {
                m.abort();
            } catch (Exception ex) {
                log4j.debug("abort", ex);
            } finally {
                try {
                    m.releaseConnection();
                } catch (Exception ex) {
                    log4j.debug("releaseConnection", ex);
                }
            }
        }
        throw e;
    }
}
Also used : SystemIdIOException(nu.validator.io.SystemIdIOException) Html5DatatypeException(nu.validator.datatype.Html5DatatypeException) HttpEntity(org.apache.http.HttpEntity) HttpGet(org.apache.http.client.methods.HttpGet) Logger(org.apache.log4j.Logger) Header(org.apache.http.Header) URL(io.mola.galimatias.URL) StreamBoundException(nu.validator.io.StreamBoundException) SAXException(org.xml.sax.SAXException) GZIPInputStream(java.util.zip.GZIPInputStream) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) Html5DatatypeException(nu.validator.datatype.Html5DatatypeException) DatatypeException(org.relaxng.datatype.DatatypeException) SAXParseException(org.xml.sax.SAXParseException) StreamObserver(nu.validator.io.StreamObserver) GZIPInputStream(java.util.zip.GZIPInputStream) BoundedInputStream(nu.validator.io.BoundedInputStream) ObservableInputStream(nu.validator.io.ObservableInputStream) InputStream(java.io.InputStream) HttpResponse(org.apache.http.HttpResponse) SystemIdIOException(nu.validator.io.SystemIdIOException) IOException(java.io.IOException) SystemIdIOException(nu.validator.io.SystemIdIOException) Html5DatatypeException(nu.validator.datatype.Html5DatatypeException) KeyStoreException(java.security.KeyStoreException) DatatypeException(org.relaxng.datatype.DatatypeException) StreamBoundException(nu.validator.io.StreamBoundException) IOException(java.io.IOException) KeyManagementException(java.security.KeyManagementException) CertificateException(java.security.cert.CertificateException) SAXParseException(org.xml.sax.SAXParseException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) SAXException(org.xml.sax.SAXException) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) ObservableInputStream(nu.validator.io.ObservableInputStream) Header(org.apache.http.Header) BoundedInputStream(nu.validator.io.BoundedInputStream)

Example 2 with BoundedInputStream

use of nu.validator.io.BoundedInputStream in project validator by validator.

the class ParseTreePrinter method service.

public void service() throws IOException {
    request.setCharacterEncoding("utf-8");
    String content = null;
    String document = scrubUrl(request.getParameter("doc"));
    document = ("".equals(document)) ? null : document;
    try (Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8")) {
        if (document == null && methodIsGet() && (content = request.getParameter("content")) == null) {
            response.setContentType("text/html; charset=utf-8");
            writer.write(FORM_HTML);
            writer.flush();
            return;
        }
        response.setContentType("text/plain; charset=utf-8");
        try {
            PrudentHttpEntityResolver entityResolver = new PrudentHttpEntityResolver(2048 * 1024, false, null);
            entityResolver.setAllowGenericXml(false);
            entityResolver.setAcceptAllKnownXmlTypes(false);
            entityResolver.setAllowHtml(true);
            entityResolver.setAllowXhtml(true);
            TypedInputSource documentInput;
            if (methodIsGet()) {
                if (content == null) {
                    documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
                } else {
                    documentInput = new TypedInputSource(new StringReader(content));
                    if ("xml".equals(request.getParameter("parser"))) {
                        documentInput.setType("application/xhtml+xml");
                    } else {
                        documentInput.setType("text/html");
                    }
                }
            } else {
                // POST
                String postContentType = request.getContentType();
                if (postContentType == null) {
                    response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
                    return;
                } else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
                    response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
                    return;
                }
                long len = request.getContentLength();
                if (len > SIZE_LIMIT) {
                    throw new StreamBoundException("Resource size exceeds limit.");
                }
                ContentTypeParser contentTypeParser = new ContentTypeParser(null, false);
                contentTypeParser.setAllowGenericXml(false);
                contentTypeParser.setAcceptAllKnownXmlTypes(false);
                contentTypeParser.setAllowHtml(true);
                contentTypeParser.setAllowXhtml(true);
                documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
                documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
                documentInput.setSystemId(request.getHeader("Content-Location"));
            }
            String type = documentInput.getType();
            XMLReader parser;
            if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
                writer.write("HTML parser\n\n#document\n");
                parser = new nu.validator.htmlparser.sax.HtmlParser();
                parser.setProperty("http://validator.nu/properties/heuristics", Heuristics.ALL);
                parser.setProperty("http://validator.nu/properties/xml-policy", XmlViolationPolicy.ALLOW);
            } else if ("application/xhtml+xml".equals(type)) {
                writer.write("XML parser\n\n#document\n");
                parser = new SAXDriver();
                parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
                parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
                parser.setEntityResolver(new NullEntityResolver());
            } else {
                writer.write("Unsupported content type.\n");
                writer.flush();
                return;
            }
            TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
            ListErrorHandler listErrorHandler = new ListErrorHandler();
            parser.setContentHandler(treeDumpContentHandler);
            parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
            parser.setErrorHandler(listErrorHandler);
            parser.parse(documentInput);
            writer.write("#errors\n");
            for (String err : listErrorHandler.getErrors()) {
                writer.write(err);
                writer.write('\n');
            }
        } catch (SAXException e) {
            writer.write("SAXException:\n");
            writer.write(e.getMessage());
            writer.write("\n");
        } catch (IOException e) {
            writer.write("IOException:\n");
            writer.write(e.getMessage());
            writer.write("\n");
        } finally {
            writer.flush();
        }
    }
}
Also used : NullEntityResolver(nu.validator.xml.NullEntityResolver) TypedInputSource(nu.validator.xml.TypedInputSource) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) IOException(java.io.IOException) StreamBoundException(nu.validator.io.StreamBoundException) ContentTypeParser(nu.validator.xml.ContentTypeParser) SAXException(org.xml.sax.SAXException) SAXDriver(nu.validator.gnu.xml.aelfred2.SAXDriver) BoundedInputStream(nu.validator.io.BoundedInputStream) StringReader(java.io.StringReader) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) XMLReader(org.xml.sax.XMLReader)

Example 3 with BoundedInputStream

use of nu.validator.io.BoundedInputStream in project validator by validator.

the class VerifierServletTransaction method loadDocumentInput.

/**
 * @throws SAXException
 * @throws IOException
 */
protected void loadDocumentInput() throws SAXException, IOException {
    if (documentInput != null) {
        return;
    }
    if (methodIsGet) {
        documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
        errorHandler.setLoggingOk(true);
    } else {
        // POST
        long len = request.getContentLength();
        if (len > SIZE_LIMIT) {
            throw new StreamBoundException("Resource size exceeds limit.");
        }
        documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
        documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
        documentInput.setSystemId(request.getHeader("Content-Location"));
    }
    if (imageCollector != null) {
        baseUriTracker = new BaseUriTracker(documentInput.getSystemId(), documentInput.getLanguage());
        imageCollector.initializeContext(baseUriTracker);
    }
}
Also used : BaseUriTracker(nu.validator.xml.BaseUriTracker) BoundedInputStream(nu.validator.io.BoundedInputStream) StreamBoundException(nu.validator.io.StreamBoundException)

Aggregations

BoundedInputStream (nu.validator.io.BoundedInputStream)3 StreamBoundException (nu.validator.io.StreamBoundException)3 IOException (java.io.IOException)2 SAXException (org.xml.sax.SAXException)2 GalimatiasParseException (io.mola.galimatias.GalimatiasParseException)1 URL (io.mola.galimatias.URL)1 InputStream (java.io.InputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 StringReader (java.io.StringReader)1 Writer (java.io.Writer)1 KeyManagementException (java.security.KeyManagementException)1 KeyStoreException (java.security.KeyStoreException)1 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1 CertificateException (java.security.cert.CertificateException)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 Html5DatatypeException (nu.validator.datatype.Html5DatatypeException)1 SAXDriver (nu.validator.gnu.xml.aelfred2.SAXDriver)1 ObservableInputStream (nu.validator.io.ObservableInputStream)1 StreamObserver (nu.validator.io.StreamObserver)1 SystemIdIOException (nu.validator.io.SystemIdIOException)1