Search in sources :

Example 1 with IncorrectSchemaException

use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.

the class VerifierServletTransaction method documentMode.

@Override
public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier) throws SAXException {
    if (systemIdentifier != null) {
        if ("about:legacy-compat".equals(systemIdentifier)) {
            aboutLegacyCompat = true;
            errorHandler.warning(new SAXParseException("Documents should not use" + " \u201cabout:legacy-compat\u201d," + " except if generated by legacy systems" + " that can't output the standard" + " \u201c<!DOCTYPE html>\u201d  doctype.", null));
        }
        if (systemIdentifier.contains("http://www.w3.org/TR/xhtml1")) {
            xhtml1Doctype = true;
        }
        if (systemIdentifier.contains("http://www.w3.org/TR/html4")) {
            html4Doctype = true;
        }
    }
    if (publicIdentifier != null) {
        if (publicIdentifier.contains("-//W3C//DTD HTML 4")) {
            html4Doctype = true;
        }
    }
    if (validator == null) {
        try {
            if ("yes".equals(request.getParameter("sniffdoctype"))) {
                if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicIdentifier)) {
                    errorHandler.info("XHTML 1.0 Transitional doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + ".");
                    validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
                } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
                    errorHandler.info("XHTML 1.0 Strict doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + ".");
                    validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
                } else if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
                    errorHandler.info("HTML 4.01 Transitional doctype seen. Using the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + ".");
                    validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
                } else if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
                    errorHandler.info("HTML 4.01 Strict doctype seen. Using the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + ".");
                    validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
                } else if ("-//W3C//DTD HTML 4.0 Transitional//EN".equals(publicIdentifier)) {
                    errorHandler.info("Legacy HTML 4.0 Transitional doctype seen.  Please consider using HTML 4.01 Transitional instead. Proceeding anyway for your convenience with the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + ".");
                    validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
                } else if ("-//W3C//DTD HTML 4.0//EN".equals(publicIdentifier)) {
                    errorHandler.info("Legacy HTML 4.0 Strict doctype seen. Please consider using HTML 4.01 instead. Proceeding anyway for your convenience with the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + ".");
                    validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
                }
            } else {
                schemaIsDefault = true;
                validator = validatorByDoctype(HTML5_SCHEMA);
            }
        } catch (IncorrectSchemaException | IOException e) {
            // At this point the schema comes from memory.
            throw new RuntimeException(e);
        }
        ContentHandler ch = validator.getContentHandler();
        ch.setDocumentLocator(htmlParser.getDocumentLocator());
        ch.startDocument();
        reader.setContentHandler(ch);
    }
}
Also used : SAXParseException(org.xml.sax.SAXParseException) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException) IOException(java.io.IOException) CombineContentHandler(nu.validator.xml.CombineContentHandler) ContentHandler(org.xml.sax.ContentHandler)

Example 2 with IncorrectSchemaException

use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.

the class VerifierServletTransaction method validate.

/**
 * @throws SAXException
 */
@SuppressWarnings({ "deprecation", "unchecked" })
void validate() throws SAXException {
    if (!willValidate()) {
        return;
    }
    boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
    if (isHtmlOrXhtml) {
        try {
            out.flush();
        } catch (IOException e1) {
            throw new SAXException(e1);
        }
    }
    httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler, request);
    httpRes.setUserAgent(userAgent);
    dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
    contentTypeParser = new ContentTypeParser(errorHandler, laxType);
    entityResolver = new LocalCacheEntityResolver(dataRes);
    setAllowRnc(true);
    setAllowCss(true);
    try {
        this.errorHandler.start(document);
        PropertyMapBuilder pmb = new PropertyMapBuilder();
        pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
        pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
        pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver));
        pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
        RngProperty.CHECK_ID_IDREF.add(pmb);
        jingPropertyMap = pmb.toPropertyMap();
        tryToSetupValidator();
        setAllowRnc(false);
        loadDocAndSetupParser();
        setErrorProfile();
        contentType = documentInput.getType();
        if ("text/css".equals(contentType)) {
            String charset = "UTF-8";
            if (documentInput.getEncoding() != null) {
                charset = documentInput.getEncoding();
            }
            List<InputStream> streams = new ArrayList<>();
            streams.add(new ByteArrayInputStream(CSS_CHECKING_PROLOG));
            streams.add(documentInput.getByteStream());
            streams.add(new ByteArrayInputStream(CSS_CHECKING_EPILOG));
            Enumeration<InputStream> e = Collections.enumeration(streams);
            documentInput.setByteStream(new SequenceInputStream(e));
            documentInput.setEncoding(charset);
            errorHandler.setLineOffset(-1);
            sourceCode.setIsCss();
            parser = ParserMode.HTML;
            loadDocAndSetupParser();
        }
        reader.setErrorHandler(errorHandler);
        sourceCode.initialize(documentInput);
        if (validator == null) {
            checkNormalization = true;
        }
        if (checkNormalization) {
            reader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
        }
        WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(reader);
        ContentHandler recorder = sourceCode.getLocationRecorder();
        if (baseUriTracker == null) {
            wiretap.setWiretapContentHander(recorder);
        } else {
            wiretap.setWiretapContentHander(new CombineContentHandler(recorder, baseUriTracker));
        }
        wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
        reader = wiretap;
        if (htmlParser != null) {
            htmlParser.addCharacterHandler(sourceCode);
            htmlParser.setMappingLangToXmlLang(true);
            htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
            htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
            errorHandler.setHtml(true);
        } else if (xmlParser != null) {
            // this must be after wiretap!
            if (!filteredNamespaces.isEmpty()) {
                reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces);
            }
            xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
            xmlParser.lockErrorHandler();
        } else {
            throw new RuntimeException("Bug. Unreachable.");
        }
        // make
        reader = new AttributesPermutingXMLReaderWrapper(reader);
        // better
        if (charsetOverride != null) {
            String charset = documentInput.getEncoding();
            if (charset == null) {
                errorHandler.warning(new SAXParseException("Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null));
            } else {
                errorHandler.warning(new SAXParseException("Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null));
            }
            documentInput.setEncoding(charsetOverride);
        }
        if (showOutline) {
            reader = new OutlineBuildingXMLReaderWrapper(reader, request, false);
            reader = new OutlineBuildingXMLReaderWrapper(reader, request, true);
        }
        reader.parse(documentInput);
        if (showOutline) {
            outline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/document-outline");
            headingOutline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/heading-outline");
        }
    } catch (CannotFindPresetSchemaException e) {
    } catch (ResourceNotRetrievableException e) {
        log4j.debug(e.getMessage());
    } catch (NonXmlContentTypeException e) {
        log4j.debug(e.getMessage());
    } catch (FatalSAXException e) {
        log4j.debug(e.getMessage());
    } catch (SocketTimeoutException e) {
        errorHandler.ioError(new IOException(e.getMessage(), null));
    } catch (ConnectTimeoutException e) {
        errorHandler.ioError(new IOException(e.getMessage(), null));
    } catch (TooManyErrorsException e) {
        errorHandler.fatalError(e);
    } catch (SAXException e) {
        String msg = e.getMessage();
        if (!cannotRecover.equals(msg) && !changingEncoding.equals(msg)) {
            log4j.debug("SAXException: " + e.getMessage());
        }
    } catch (IOException e) {
        isHtmlOrXhtml = false;
        if (e.getCause() instanceof org.apache.http.TruncatedChunkException) {
            log4j.debug("TruncatedChunkException", e.getCause());
        } else {
            errorHandler.ioError(e);
        }
    } catch (IncorrectSchemaException e) {
        log4j.debug("IncorrectSchemaException", e);
        errorHandler.schemaError(e);
    } catch (RuntimeException e) {
        isHtmlOrXhtml = false;
        log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
        errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
    } catch (Error e) {
        isHtmlOrXhtml = false;
        log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
        errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
    } finally {
        errorHandler.end(successMessage(), failureMessage(), (String) request.getAttribute("http://validator.nu/properties/document-language"));
        gatherStatistics();
    }
    if (isHtmlOrXhtml) {
        XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter(contentHandler, outline, headingOutline);
        outlineEmitter.emitHeadings();
        outlineEmitter.emit();
        emitDetails();
        StatsEmitter.emit(contentHandler, this);
    }
}
Also used : TooManyErrorsException(nu.validator.messages.TooManyErrorsException) WiretapXMLReaderWrapper(nu.validator.xml.WiretapXMLReaderWrapper) ArrayList(java.util.ArrayList) NonXmlContentTypeException(nu.validator.xml.ContentTypeParser.NonXmlContentTypeException) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) CombineContentHandler(nu.validator.xml.CombineContentHandler) ContentHandler(org.xml.sax.ContentHandler) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) SAXException(org.xml.sax.SAXException) ContentTypeParser(nu.validator.xml.ContentTypeParser) CombineContentHandler(nu.validator.xml.CombineContentHandler) NamespaceDroppingXMLReaderWrapper(nu.validator.xml.NamespaceDroppingXMLReaderWrapper) SAXParseException(org.xml.sax.SAXParseException) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) ResourceNotRetrievableException(nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException) PropertyMapBuilder(com.thaiopensource.util.PropertyMapBuilder) DataUriEntityResolver(nu.validator.xml.DataUriEntityResolver) BoundedInputStream(nu.validator.io.BoundedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) SequenceInputStream(java.io.SequenceInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) AttributesPermutingXMLReaderWrapper(nu.validator.xml.AttributesPermutingXMLReaderWrapper) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException) IOException(java.io.IOException) Section(nu.validator.servlet.OutlineBuildingXMLReaderWrapper.Section) LocalCacheEntityResolver(nu.validator.localentities.LocalCacheEntityResolver) SocketTimeoutException(java.net.SocketTimeoutException) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException)

Example 3 with IncorrectSchemaException

use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.

the class VerifierServletTransaction method rootNamespace.

void rootNamespace(String namespace, Locator locator) throws SAXException {
    if (validator == null) {
        int index = -1;
        for (int i = 0; i < presetNamespaces.length; i++) {
            if (namespace.equals(presetNamespaces[i])) {
                index = i;
                break;
            }
        }
        if (index == -1) {
            String message = "Cannot find preset schema for namespace: \u201C" + namespace + "\u201D.";
            SAXException se = new SAXException(message);
            errorHandler.schemaError(se);
            throw new CannotFindPresetSchemaException();
        }
        String label = presetLabels[index];
        String urls = presetUrls[index];
        errorHandler.info("Using the preset for " + label + " based on the root namespace.");
        try {
            validator = validatorByUrls(urls);
        } catch (IncorrectSchemaException | IOException e) {
            // At this point the schema comes from memory.
            throw new RuntimeException(e);
        }
        if (bufferingRootNamespaceSniffer == null) {
            throw new RuntimeException("Bug! bufferingRootNamespaceSniffer was null.");
        }
        bufferingRootNamespaceSniffer.setContentHandler(validator.getContentHandler());
    }
    if (!rootNamespaceSeen) {
        rootNamespaceSeen = true;
        if (contentType != null) {
            int i;
            if ((i = Arrays.binarySearch(KNOWN_CONTENT_TYPES, contentType)) > -1) {
                if (!NAMESPACES_FOR_KNOWN_CONTENT_TYPES[i].equals(namespace)) {
                    String message = "".equals(namespace) ? "\u201C" + contentType + "\u201D is not an appropriate Content-Type for a document whose root element is not in a namespace." : "\u201C" + contentType + "\u201D is not an appropriate Content-Type for a document whose root namespace is \u201C" + namespace + "\u201D.";
                    SAXParseException spe = new SAXParseException(message, locator);
                    errorHandler.warning(spe);
                }
            }
        }
    }
}
Also used : SAXParseException(org.xml.sax.SAXParseException) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException) IOException(java.io.IOException) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) SAXException(org.xml.sax.SAXException)

Example 4 with IncorrectSchemaException

use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.

the class VerifierServletTransaction method resolveSchema.

@Override
public Schema resolveSchema(String url, PropertyMap options) throws SAXException, IOException, IncorrectSchemaException {
    int i = Arrays.binarySearch(preloadedSchemaUrls, url);
    if (i > -1) {
        Schema rv = preloadedSchemas[i];
        if (options.contains(WrapProperty.ATTRIBUTE_OWNER)) {
            if (rv instanceof CheckerSchema) {
                errorHandler.error(new SAXParseException("A non-schema checker cannot be used as an attribute schema.", null, url, -1, -1));
                throw new IncorrectSchemaException();
            } else {
            // ugly fall through
            }
        } else {
            return rv;
        }
    }
    externalSchema = true;
    TypedInputSource schemaInput = (TypedInputSource) entityResolver.resolveEntity(null, url);
    SchemaReader sr = null;
    if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) {
        sr = CompactSchemaReader.getInstance();
    } else {
        sr = new AutoSchemaReader();
    }
    Schema sch = sr.createSchema(schemaInput, options);
    if (Statistics.STATISTICS != null && "com.thaiopensource.validate.schematron.SchemaImpl".equals(sch.getClass().getName())) {
        externalSchematron = true;
    }
    return sch;
}
Also used : CheckerSchema(nu.validator.checker.jing.CheckerSchema) CompactSchemaReader(com.thaiopensource.validate.rng.CompactSchemaReader) AutoSchemaReader(com.thaiopensource.validate.auto.AutoSchemaReader) SchemaReader(com.thaiopensource.validate.SchemaReader) TypedInputSource(nu.validator.xml.TypedInputSource) AutoSchemaReader(com.thaiopensource.validate.auto.AutoSchemaReader) SAXParseException(org.xml.sax.SAXParseException) Schema(com.thaiopensource.validate.Schema) CheckerSchema(nu.validator.checker.jing.CheckerSchema) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException)

Aggregations

IncorrectSchemaException (com.thaiopensource.validate.IncorrectSchemaException)4 SAXParseException (org.xml.sax.SAXParseException)4 IOException (java.io.IOException)3 FatalSAXException (nu.validator.gnu.xml.aelfred2.FatalSAXException)2 CombineContentHandler (nu.validator.xml.CombineContentHandler)2 ContentHandler (org.xml.sax.ContentHandler)2 SAXException (org.xml.sax.SAXException)2 PropertyMapBuilder (com.thaiopensource.util.PropertyMapBuilder)1 Schema (com.thaiopensource.validate.Schema)1 SchemaReader (com.thaiopensource.validate.SchemaReader)1 AutoSchemaReader (com.thaiopensource.validate.auto.AutoSchemaReader)1 CompactSchemaReader (com.thaiopensource.validate.rng.CompactSchemaReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 SequenceInputStream (java.io.SequenceInputStream)1 SocketTimeoutException (java.net.SocketTimeoutException)1 ArrayList (java.util.ArrayList)1 CheckerSchema (nu.validator.checker.jing.CheckerSchema)1 BoundedInputStream (nu.validator.io.BoundedInputStream)1