Search in sources :

Example 1 with URL

use of io.mola.galimatias.URL in project validator by validator.

the class PrudentHttpEntityResolver method resolveEntity.

/**
 * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
 *      java.lang.String)
 */
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
    if (requestsLeft > -1) {
        if (requestsLeft == 0) {
            throw new IOException("Number of permitted HTTP requests exceeded.");
        } else {
            requestsLeft--;
        }
    }
    HttpGet m = null;
    try {
        URL url = null;
        try {
            url = URL.parse(systemId);
        } catch (GalimatiasParseException e) {
            IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
            SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw ioe;
        }
        String scheme = url.scheme();
        if (!("http".equals(scheme) || "https".equals(scheme))) {
            String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D.";
            SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        systemId = url.toString();
        try {
            m = new HttpGet(systemId);
        } catch (IllegalArgumentException e) {
            SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        m.setHeader("User-Agent", userAgent);
        m.setHeader("Accept", buildAccept());
        m.setHeader("Accept-Encoding", "gzip");
        if (request != null && request.getAttribute("http://validator.nu/properties/accept-language") != null) {
            m.setHeader("Accept-Language", (String) request.getAttribute("http://validator.nu/properties/accept-language"));
        }
        log4j.info(systemId);
        try {
            if (url.port() > 65535) {
                throw new IOException("Port number must be less than 65536.");
            }
        } catch (NumberFormatException e) {
            throw new IOException("Port number must be less than 65536.");
        }
        HttpResponse response = client.execute(m);
        boolean ignoreResponseStatus = false;
        if (request != null && request.getAttribute("http://validator.nu/properties/ignore-response-status") != null) {
            ignoreResponseStatus = (boolean) request.getAttribute("http://validator.nu/properties/ignore-response-status");
        }
        int statusCode = response.getStatusLine().getStatusCode();
        if (statusCode != 200 && !ignoreResponseStatus) {
            String msg = "HTTP resource not retrievable." + " The HTTP status from the remote server was: " + statusCode + ".";
            SAXParseException spe = new SAXParseException(msg, publicId, m.getURI().toString(), -1, -1, new SystemIdIOException(m.getURI().toString(), msg));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw new ResourceNotRetrievableException(String.format("%s: %s", m.getURI().toString(), msg));
        }
        HttpEntity entity = response.getEntity();
        long len = entity.getContentLength();
        if (sizeLimit > -1 && len > sizeLimit) {
            SAXParseException spe = new SAXParseException("Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit."));
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        TypedInputSource is;
        org.apache.http.Header ct = response.getFirstHeader("Content-Type");
        String contentType = null;
        final String baseUri = m.getURI().toString();
        if (ct != null) {
            contentType = ct.getValue();
        }
        is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType);
        Header cl = response.getFirstHeader("Content-Language");
        if (cl != null) {
            is.setLanguage(cl.getValue().trim());
        }
        Header xuac = response.getFirstHeader("X-UA-Compatible");
        if (xuac != null) {
            String val = xuac.getValue().trim();
            if (!"ie=edge".equalsIgnoreCase(val)) {
                SAXParseException spe = new SAXParseException("X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1);
                errorHandler.error(spe);
            }
        }
        Header csp = response.getFirstHeader("Content-Security-Policy");
        if (csp != null) {
            try {
                ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim());
            } catch (DatatypeException e) {
                SAXParseException spe = new SAXParseException("Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1);
                Html5DatatypeException ex5 = (Html5DatatypeException) e;
                if (ex5.isWarning()) {
                    errorHandler.warning(spe);
                } else {
                    errorHandler.error(spe);
                }
            }
        }
        final HttpGet meth = m;
        InputStream stream = entity.getContent();
        if (sizeLimit > -1) {
            stream = new BoundedInputStream(stream, sizeLimit, baseUri);
        }
        Header ce = response.getFirstHeader("Content-Encoding");
        if (ce != null) {
            String val = ce.getValue().trim();
            if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) {
                stream = new GZIPInputStream(stream);
                if (sizeLimit > -1) {
                    stream = new BoundedInputStream(stream, sizeLimit, baseUri);
                }
            }
        }
        is.setByteStream(new ObservableInputStream(stream, new StreamObserver() {

            private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver");

            private boolean released = false;

            @Override
            public void closeCalled() {
                log4j.debug("closeCalled");
                if (!released) {
                    log4j.debug("closeCalled, not yet released");
                    released = true;
                    try {
                        meth.releaseConnection();
                    } catch (Exception e) {
                        log4j.debug("closeCalled, releaseConnection", e);
                    }
                }
            }

            @Override
            public void exceptionOccurred(Exception ex) throws IOException {
                if (!released) {
                    released = true;
                    try {
                        meth.abort();
                    } catch (Exception e) {
                        log4j.debug("exceptionOccurred, abort", e);
                    } finally {
                        try {
                            meth.releaseConnection();
                        } catch (Exception e) {
                            log4j.debug("exceptionOccurred, releaseConnection", e);
                        }
                    }
                }
                if (ex instanceof SystemIdIOException) {
                    throw (SystemIdIOException) ex;
                } else if (ex instanceof IOException) {
                    IOException ioe = (IOException) ex;
                    throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe);
                } else if (ex instanceof RuntimeException) {
                    throw (RuntimeException) ex;
                } else {
                    throw new RuntimeException("API contract violation. Wrong exception type.", ex);
                }
            }

            @Override
            public void finalizerCalled() {
                if (!released) {
                    released = true;
                    try {
                        meth.abort();
                    } catch (Exception e) {
                        log4j.debug("finalizerCalled, abort", e);
                    } finally {
                        try {
                            meth.releaseConnection();
                        } catch (Exception e) {
                            log4j.debug("finalizerCalled, releaseConnection", e);
                        }
                    }
                }
            }
        }));
        return is;
    } catch (IOException | RuntimeException | SAXException e) {
        if (m != null) {
            try {
                m.abort();
            } catch (Exception ex) {
                log4j.debug("abort", ex);
            } finally {
                try {
                    m.releaseConnection();
                } catch (Exception ex) {
                    log4j.debug("releaseConnection", ex);
                }
            }
        }
        throw e;
    }
}
Also used : SystemIdIOException(nu.validator.io.SystemIdIOException) Html5DatatypeException(nu.validator.datatype.Html5DatatypeException) HttpEntity(org.apache.http.HttpEntity) HttpGet(org.apache.http.client.methods.HttpGet) Logger(org.apache.log4j.Logger) Header(org.apache.http.Header) URL(io.mola.galimatias.URL) StreamBoundException(nu.validator.io.StreamBoundException) SAXException(org.xml.sax.SAXException) GZIPInputStream(java.util.zip.GZIPInputStream) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) Html5DatatypeException(nu.validator.datatype.Html5DatatypeException) DatatypeException(org.relaxng.datatype.DatatypeException) SAXParseException(org.xml.sax.SAXParseException) StreamObserver(nu.validator.io.StreamObserver) GZIPInputStream(java.util.zip.GZIPInputStream) BoundedInputStream(nu.validator.io.BoundedInputStream) ObservableInputStream(nu.validator.io.ObservableInputStream) InputStream(java.io.InputStream) HttpResponse(org.apache.http.HttpResponse) SystemIdIOException(nu.validator.io.SystemIdIOException) IOException(java.io.IOException) SystemIdIOException(nu.validator.io.SystemIdIOException) Html5DatatypeException(nu.validator.datatype.Html5DatatypeException) KeyStoreException(java.security.KeyStoreException) DatatypeException(org.relaxng.datatype.DatatypeException) StreamBoundException(nu.validator.io.StreamBoundException) IOException(java.io.IOException) KeyManagementException(java.security.KeyManagementException) CertificateException(java.security.cert.CertificateException) SAXParseException(org.xml.sax.SAXParseException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) SAXException(org.xml.sax.SAXException) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) ObservableInputStream(nu.validator.io.ObservableInputStream) Header(org.apache.http.Header) BoundedInputStream(nu.validator.io.BoundedInputStream)

Example 2 with URL

use of io.mola.galimatias.URL in project validator by validator.

the class DataUriEntityResolver method resolveEntity.

@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
    if (DataUri.startsWithData(systemId)) {
        URL url;
        try {
            url = URL.parse(systemId);
        } catch (GalimatiasParseException e) {
            IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
            SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
            if (errorHandler != null) {
                errorHandler.fatalError(spe);
            }
            throw spe;
        }
        systemId = url.toString();
        DataUri du = new DataUri(systemId);
        TypedInputSource is = contentTypeParser.buildTypedInputSource(systemId, publicId, du.getContentType());
        is.setByteStream(du.getInputStream());
        return is;
    } else if (delegate != null) {
        return delegate.resolveEntity(publicId, systemId);
    } else {
        throw new IOException("Unsupported URI scheme.");
    }
}
Also used : GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) SAXParseException(org.xml.sax.SAXParseException) IOException(java.io.IOException) URL(io.mola.galimatias.URL) DataUri(nu.validator.io.DataUri)

Example 3 with URL

use of io.mola.galimatias.URL in project validator by validator.

the class IriRef method checkValid.

@Override
public void checkValid(CharSequence literal) throws DatatypeException {
    String messagePrologue = "";
    int length = literal.length();
    String urlString = literal.toString();
    if (reportValue()) {
        if (length < ELIDE_LIMIT) {
            messagePrologue = "\u201c" + literal + "\u201d: ";
        } else {
            StringBuilder sb = new StringBuilder(ELIDE_LIMIT + 1);
            sb.append(literal, 0, ELIDE_LIMIT / 2);
            sb.append('\u2026');
            sb.append(literal, length - ELIDE_LIMIT / 2, length);
            messagePrologue = "\u201c" + sb.toString() + "\u201d: ";
        }
    }
    if ("".equals(trimHtmlSpaces(urlString))) {
        throw newDatatypeException("Must be non-empty.");
    }
    URL url = null;
    URLParsingSettings settings = URLParsingSettings.create().withErrorHandler(StrictErrorHandler.getInstance());
    boolean data = false;
    try {
        CharSequencePair pair = splitScheme(literal);
        if (pair == null) {
            // no scheme or scheme is private
            if (isAbsolute()) {
                throw newDatatypeException("The string \u201c" + literal + "\u201d is not an absolute URL.");
            } else {
                if (mustBeHttpOrHttps()) {
                    throw newDatatypeException("Must contain only" + " \u201chttp\u201d or \u201chttps\u201d URLs.");
                }
                // in this case, doc's actual base URL isn't relevant,
                // so just use http://example.org/foo/bar as base
                url = URL.parse(settings, URL.parse("http://example.org/foo/bar"), urlString);
            }
        } else {
            CharSequence scheme = pair.getHead();
            CharSequence tail = pair.getTail();
            if (mustBeHttpOrHttps() && !isHttpOrHttps(scheme)) {
                throw newDatatypeException("Must contain only" + " \u201chttp\u201d or \u201chttps\u201d URLs.");
            }
            if (isWellKnown(scheme)) {
                url = URL.parse(settings, urlString);
            } else if ("javascript".contentEquals(scheme)) {
                // Don't bother user with generic IRI syntax
                url = null;
            } else if ("data".contentEquals(scheme)) {
                data = true;
                url = URL.parse(settings, urlString);
            } else if (isHttpAlias(scheme)) {
                StringBuilder sb = new StringBuilder(5 + tail.length());
                sb.append("http:").append(tail);
                url = URL.parse(settings, sb.toString());
            } else {
                StringBuilder sb = new StringBuilder(2 + literal.length());
                sb.append("x-").append(literal);
                url = URL.parse(settings, sb.toString());
            }
        }
    } catch (GalimatiasParseException e) {
        throw newDatatypeException(messagePrologue + e.getMessage() + ".");
    }
    if (url != null) {
        if (data) {
            try {
                DataUri dataUri = new DataUri(url);
                InputStream is = dataUri.getInputStream();
                while (is.read() >= 0) {
                // spin
                }
            } catch (DataUriException e) {
                throw newDatatypeException(e.getIndex(), e.getHead(), e.getLiteral(), e.getTail());
            } catch (IOException e) {
                String msg = e.getMessage();
                if (WARN && "Fragment is not allowed for data: URIs according to RFC 2397.".equals(msg)) {
                    throw newDatatypeException(messagePrologue + msg, WARN);
                } else {
                    throw newDatatypeException(messagePrologue + msg);
                }
            }
        }
    }
}
Also used : InputStream(java.io.InputStream) IOException(java.io.IOException) URLParsingSettings(io.mola.galimatias.URLParsingSettings) URL(io.mola.galimatias.URL) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) DataUriException(nu.validator.io.DataUriException) DataUri(nu.validator.io.DataUri)

Example 4 with URL

use of io.mola.galimatias.URL in project validator by validator.

the class BaseUriTracker method push.

private void push(String relative, String language, Direction dir) {
    String lang = "";
    boolean langSpecified = false;
    if (language != null) {
        try {
            if (!"".equals(language)) {
                Language.THE_INSTANCE.checkValid(language);
            }
            lang = language;
            langSpecified = true;
        } catch (DatatypeException e) {
        }
    }
    Node curr = peek();
    URL base = curr.currentAbsolute;
    if (!langSpecified) {
        lang = curr.lang;
    }
    boolean rtl;
    switch(dir) {
        case RTL:
            rtl = true;
            break;
        case LTR:
            rtl = false;
            break;
        default:
            rtl = curr.rtl;
            break;
    }
    if (relative == null) {
        stack.addLast(new Node(base, null, lang, langSpecified, rtl));
    } else {
        URL newBase;
        String ascii = null;
        try {
            if (base != null) {
                try {
                    newBase = base.resolve(relative);
                } catch (GalimatiasParseException e) {
                    newBase = base;
                }
            } else {
                try {
                    newBase = URL.parse((new URI(ascii)).toString());
                } catch (GalimatiasParseException e) {
                    newBase = null;
                }
            }
        } catch (Exception e) {
            newBase = base;
        }
        stack.addLast(new Node(newBase, ascii, lang, langSpecified, rtl));
    }
}
Also used : DatatypeException(org.relaxng.datatype.DatatypeException) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) URI(java.net.URI) URL(io.mola.galimatias.URL) SAXException(org.xml.sax.SAXException) GalimatiasParseException(io.mola.galimatias.GalimatiasParseException) DatatypeException(org.relaxng.datatype.DatatypeException)

Aggregations

GalimatiasParseException (io.mola.galimatias.GalimatiasParseException)4 URL (io.mola.galimatias.URL)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)2 DataUri (nu.validator.io.DataUri)2 DatatypeException (org.relaxng.datatype.DatatypeException)2 SAXException (org.xml.sax.SAXException)2 SAXParseException (org.xml.sax.SAXParseException)2 URLParsingSettings (io.mola.galimatias.URLParsingSettings)1 URI (java.net.URI)1 KeyManagementException (java.security.KeyManagementException)1 KeyStoreException (java.security.KeyStoreException)1 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1 CertificateException (java.security.cert.CertificateException)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 Html5DatatypeException (nu.validator.datatype.Html5DatatypeException)1 BoundedInputStream (nu.validator.io.BoundedInputStream)1 DataUriException (nu.validator.io.DataUriException)1 ObservableInputStream (nu.validator.io.ObservableInputStream)1 StreamBoundException (nu.validator.io.StreamBoundException)1