Search in sources :

Example 1 with ResourceNotRetrievableException

use of nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException in project validator by validator.

the class VerifierServletTransaction method validate.

/**
 * @throws SAXException
 */
@SuppressWarnings({ "deprecation", "unchecked" })
void validate() throws SAXException {
    if (!willValidate()) {
        return;
    }
    boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
    if (isHtmlOrXhtml) {
        try {
            out.flush();
        } catch (IOException e1) {
            throw new SAXException(e1);
        }
    }
    httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler, request);
    httpRes.setUserAgent(userAgent);
    dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
    contentTypeParser = new ContentTypeParser(errorHandler, laxType);
    entityResolver = new LocalCacheEntityResolver(dataRes);
    setAllowRnc(true);
    setAllowCss(true);
    try {
        this.errorHandler.start(document);
        PropertyMapBuilder pmb = new PropertyMapBuilder();
        pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
        pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
        pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver));
        pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
        RngProperty.CHECK_ID_IDREF.add(pmb);
        jingPropertyMap = pmb.toPropertyMap();
        tryToSetupValidator();
        setAllowRnc(false);
        loadDocAndSetupParser();
        setErrorProfile();
        contentType = documentInput.getType();
        if ("text/css".equals(contentType)) {
            String charset = "UTF-8";
            if (documentInput.getEncoding() != null) {
                charset = documentInput.getEncoding();
            }
            List<InputStream> streams = new ArrayList<>();
            streams.add(new ByteArrayInputStream(CSS_CHECKING_PROLOG));
            streams.add(documentInput.getByteStream());
            streams.add(new ByteArrayInputStream(CSS_CHECKING_EPILOG));
            Enumeration<InputStream> e = Collections.enumeration(streams);
            documentInput.setByteStream(new SequenceInputStream(e));
            documentInput.setEncoding(charset);
            errorHandler.setLineOffset(-1);
            sourceCode.setIsCss();
            parser = ParserMode.HTML;
            loadDocAndSetupParser();
        }
        reader.setErrorHandler(errorHandler);
        sourceCode.initialize(documentInput);
        if (validator == null) {
            checkNormalization = true;
        }
        if (checkNormalization) {
            reader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
        }
        WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(reader);
        ContentHandler recorder = sourceCode.getLocationRecorder();
        if (baseUriTracker == null) {
            wiretap.setWiretapContentHander(recorder);
        } else {
            wiretap.setWiretapContentHander(new CombineContentHandler(recorder, baseUriTracker));
        }
        wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
        reader = wiretap;
        if (htmlParser != null) {
            htmlParser.addCharacterHandler(sourceCode);
            htmlParser.setMappingLangToXmlLang(true);
            htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
            htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
            errorHandler.setHtml(true);
        } else if (xmlParser != null) {
            // this must be after wiretap!
            if (!filteredNamespaces.isEmpty()) {
                reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces);
            }
            xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
            xmlParser.lockErrorHandler();
        } else {
            throw new RuntimeException("Bug. Unreachable.");
        }
        // make
        reader = new AttributesPermutingXMLReaderWrapper(reader);
        // better
        if (charsetOverride != null) {
            String charset = documentInput.getEncoding();
            if (charset == null) {
                errorHandler.warning(new SAXParseException("Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null));
            } else {
                errorHandler.warning(new SAXParseException("Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null));
            }
            documentInput.setEncoding(charsetOverride);
        }
        if (showOutline) {
            reader = new OutlineBuildingXMLReaderWrapper(reader, request, false);
            reader = new OutlineBuildingXMLReaderWrapper(reader, request, true);
        }
        reader.parse(documentInput);
        if (showOutline) {
            outline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/document-outline");
            headingOutline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/heading-outline");
        }
    } catch (CannotFindPresetSchemaException e) {
    } catch (ResourceNotRetrievableException e) {
        log4j.debug(e.getMessage());
    } catch (NonXmlContentTypeException e) {
        log4j.debug(e.getMessage());
    } catch (FatalSAXException e) {
        log4j.debug(e.getMessage());
    } catch (SocketTimeoutException e) {
        errorHandler.ioError(new IOException(e.getMessage(), null));
    } catch (ConnectTimeoutException e) {
        errorHandler.ioError(new IOException(e.getMessage(), null));
    } catch (TooManyErrorsException e) {
        errorHandler.fatalError(e);
    } catch (SAXException e) {
        String msg = e.getMessage();
        if (!cannotRecover.equals(msg) && !changingEncoding.equals(msg)) {
            log4j.debug("SAXException: " + e.getMessage());
        }
    } catch (IOException e) {
        isHtmlOrXhtml = false;
        if (e.getCause() instanceof org.apache.http.TruncatedChunkException) {
            log4j.debug("TruncatedChunkException", e.getCause());
        } else {
            errorHandler.ioError(e);
        }
    } catch (IncorrectSchemaException e) {
        log4j.debug("IncorrectSchemaException", e);
        errorHandler.schemaError(e);
    } catch (RuntimeException e) {
        isHtmlOrXhtml = false;
        log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
        errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
    } catch (Error e) {
        isHtmlOrXhtml = false;
        log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
        errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
    } finally {
        errorHandler.end(successMessage(), failureMessage(), (String) request.getAttribute("http://validator.nu/properties/document-language"));
        gatherStatistics();
    }
    if (isHtmlOrXhtml) {
        XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter(contentHandler, outline, headingOutline);
        outlineEmitter.emitHeadings();
        outlineEmitter.emit();
        emitDetails();
        StatsEmitter.emit(contentHandler, this);
    }
}
Also used : TooManyErrorsException(nu.validator.messages.TooManyErrorsException) WiretapXMLReaderWrapper(nu.validator.xml.WiretapXMLReaderWrapper) ArrayList(java.util.ArrayList) NonXmlContentTypeException(nu.validator.xml.ContentTypeParser.NonXmlContentTypeException) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) CombineContentHandler(nu.validator.xml.CombineContentHandler) ContentHandler(org.xml.sax.ContentHandler) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) SAXException(org.xml.sax.SAXException) ContentTypeParser(nu.validator.xml.ContentTypeParser) CombineContentHandler(nu.validator.xml.CombineContentHandler) NamespaceDroppingXMLReaderWrapper(nu.validator.xml.NamespaceDroppingXMLReaderWrapper) SAXParseException(org.xml.sax.SAXParseException) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) ResourceNotRetrievableException(nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException) PropertyMapBuilder(com.thaiopensource.util.PropertyMapBuilder) DataUriEntityResolver(nu.validator.xml.DataUriEntityResolver) BoundedInputStream(nu.validator.io.BoundedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) SequenceInputStream(java.io.SequenceInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) AttributesPermutingXMLReaderWrapper(nu.validator.xml.AttributesPermutingXMLReaderWrapper) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException) IOException(java.io.IOException) Section(nu.validator.servlet.OutlineBuildingXMLReaderWrapper.Section) LocalCacheEntityResolver(nu.validator.localentities.LocalCacheEntityResolver) SocketTimeoutException(java.net.SocketTimeoutException) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException)

Example 2 with ResourceNotRetrievableException

use of nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException in project validator by validator.

the class SimpleDocumentValidator method checkHttpURL.

/* *
     * Checks a Web document.
     * 
     * @throws IOException if loading of the URL fails for some reason
     */
public void checkHttpURL(String document, String userAgent, ErrorHandler errorHandler) throws IOException, SAXException {
    CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
    validator.reset();
    httpRes = new PrudentHttpEntityResolver(-1, true, errorHandler);
    if (this.allowCss) {
        httpRes.setAllowCss(true);
    }
    httpRes.setAllowHtml(true);
    httpRes.setUserAgent(userAgent);
    try {
        documentInput = (TypedInputSource) httpRes.resolveEntity(null, document);
        String contentType = documentInput.getType();
        documentInput.setSystemId(document);
        for (String param : contentType.replace(" ", "").split(";")) {
            if (param.startsWith("charset=")) {
                documentInput.setEncoding(param.split("=", 2)[1]);
                break;
            }
        }
        if (documentInput.getType().startsWith("text/css")) {
            checkAsCss(documentInput);
        } else if (documentInput.getType().startsWith("text/html")) {
            checkAsHTML(documentInput);
        } else {
            checkAsXML(documentInput);
        }
    } catch (ResourceNotRetrievableException e) {
    }
}
Also used : PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) ResourceNotRetrievableException(nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException)

Aggregations

PrudentHttpEntityResolver (nu.validator.xml.PrudentHttpEntityResolver)2 ResourceNotRetrievableException (nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException)2 PropertyMapBuilder (com.thaiopensource.util.PropertyMapBuilder)1 IncorrectSchemaException (com.thaiopensource.validate.IncorrectSchemaException)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 SequenceInputStream (java.io.SequenceInputStream)1 SocketTimeoutException (java.net.SocketTimeoutException)1 ArrayList (java.util.ArrayList)1 FatalSAXException (nu.validator.gnu.xml.aelfred2.FatalSAXException)1 BoundedInputStream (nu.validator.io.BoundedInputStream)1 LocalCacheEntityResolver (nu.validator.localentities.LocalCacheEntityResolver)1 TooManyErrorsException (nu.validator.messages.TooManyErrorsException)1 Section (nu.validator.servlet.OutlineBuildingXMLReaderWrapper.Section)1 AttributesPermutingXMLReaderWrapper (nu.validator.xml.AttributesPermutingXMLReaderWrapper)1 CombineContentHandler (nu.validator.xml.CombineContentHandler)1 ContentTypeParser (nu.validator.xml.ContentTypeParser)1 NonXmlContentTypeException (nu.validator.xml.ContentTypeParser.NonXmlContentTypeException)1