use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.
the class VerifierServletTransaction method documentMode.
@Override
public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier) throws SAXException {
if (systemIdentifier != null) {
if ("about:legacy-compat".equals(systemIdentifier)) {
aboutLegacyCompat = true;
errorHandler.warning(new SAXParseException("Documents should not use" + " \u201cabout:legacy-compat\u201d," + " except if generated by legacy systems" + " that can't output the standard" + " \u201c<!DOCTYPE html>\u201d doctype.", null));
}
if (systemIdentifier.contains("http://www.w3.org/TR/xhtml1")) {
xhtml1Doctype = true;
}
if (systemIdentifier.contains("http://www.w3.org/TR/html4")) {
html4Doctype = true;
}
}
if (publicIdentifier != null) {
if (publicIdentifier.contains("-//W3C//DTD HTML 4")) {
html4Doctype = true;
}
}
if (validator == null) {
try {
if ("yes".equals(request.getParameter("sniffdoctype"))) {
if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicIdentifier)) {
errorHandler.info("XHTML 1.0 Transitional doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + ".");
validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
} else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
errorHandler.info("XHTML 1.0 Strict doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + ".");
validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
} else if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
errorHandler.info("HTML 4.01 Transitional doctype seen. Using the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + ".");
validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
} else if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
errorHandler.info("HTML 4.01 Strict doctype seen. Using the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + ".");
validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
} else if ("-//W3C//DTD HTML 4.0 Transitional//EN".equals(publicIdentifier)) {
errorHandler.info("Legacy HTML 4.0 Transitional doctype seen. Please consider using HTML 4.01 Transitional instead. Proceeding anyway for your convenience with the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + ".");
validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
} else if ("-//W3C//DTD HTML 4.0//EN".equals(publicIdentifier)) {
errorHandler.info("Legacy HTML 4.0 Strict doctype seen. Please consider using HTML 4.01 instead. Proceeding anyway for your convenience with the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + ".");
validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
}
} else {
schemaIsDefault = true;
validator = validatorByDoctype(HTML5_SCHEMA);
}
} catch (IncorrectSchemaException | IOException e) {
// At this point the schema comes from memory.
throw new RuntimeException(e);
}
ContentHandler ch = validator.getContentHandler();
ch.setDocumentLocator(htmlParser.getDocumentLocator());
ch.startDocument();
reader.setContentHandler(ch);
}
}
use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.
the class VerifierServletTransaction method validate.
/**
* @throws SAXException
*/
@SuppressWarnings({ "deprecation", "unchecked" })
void validate() throws SAXException {
if (!willValidate()) {
return;
}
boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
if (isHtmlOrXhtml) {
try {
out.flush();
} catch (IOException e1) {
throw new SAXException(e1);
}
}
httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler, request);
httpRes.setUserAgent(userAgent);
dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
contentTypeParser = new ContentTypeParser(errorHandler, laxType);
entityResolver = new LocalCacheEntityResolver(dataRes);
setAllowRnc(true);
setAllowCss(true);
try {
this.errorHandler.start(document);
PropertyMapBuilder pmb = new PropertyMapBuilder();
pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver));
pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
RngProperty.CHECK_ID_IDREF.add(pmb);
jingPropertyMap = pmb.toPropertyMap();
tryToSetupValidator();
setAllowRnc(false);
loadDocAndSetupParser();
setErrorProfile();
contentType = documentInput.getType();
if ("text/css".equals(contentType)) {
String charset = "UTF-8";
if (documentInput.getEncoding() != null) {
charset = documentInput.getEncoding();
}
List<InputStream> streams = new ArrayList<>();
streams.add(new ByteArrayInputStream(CSS_CHECKING_PROLOG));
streams.add(documentInput.getByteStream());
streams.add(new ByteArrayInputStream(CSS_CHECKING_EPILOG));
Enumeration<InputStream> e = Collections.enumeration(streams);
documentInput.setByteStream(new SequenceInputStream(e));
documentInput.setEncoding(charset);
errorHandler.setLineOffset(-1);
sourceCode.setIsCss();
parser = ParserMode.HTML;
loadDocAndSetupParser();
}
reader.setErrorHandler(errorHandler);
sourceCode.initialize(documentInput);
if (validator == null) {
checkNormalization = true;
}
if (checkNormalization) {
reader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
}
WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(reader);
ContentHandler recorder = sourceCode.getLocationRecorder();
if (baseUriTracker == null) {
wiretap.setWiretapContentHander(recorder);
} else {
wiretap.setWiretapContentHander(new CombineContentHandler(recorder, baseUriTracker));
}
wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
reader = wiretap;
if (htmlParser != null) {
htmlParser.addCharacterHandler(sourceCode);
htmlParser.setMappingLangToXmlLang(true);
htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
errorHandler.setHtml(true);
} else if (xmlParser != null) {
// this must be after wiretap!
if (!filteredNamespaces.isEmpty()) {
reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces);
}
xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
xmlParser.lockErrorHandler();
} else {
throw new RuntimeException("Bug. Unreachable.");
}
// make
reader = new AttributesPermutingXMLReaderWrapper(reader);
// better
if (charsetOverride != null) {
String charset = documentInput.getEncoding();
if (charset == null) {
errorHandler.warning(new SAXParseException("Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null));
} else {
errorHandler.warning(new SAXParseException("Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null));
}
documentInput.setEncoding(charsetOverride);
}
if (showOutline) {
reader = new OutlineBuildingXMLReaderWrapper(reader, request, false);
reader = new OutlineBuildingXMLReaderWrapper(reader, request, true);
}
reader.parse(documentInput);
if (showOutline) {
outline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/document-outline");
headingOutline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/heading-outline");
}
} catch (CannotFindPresetSchemaException e) {
} catch (ResourceNotRetrievableException e) {
log4j.debug(e.getMessage());
} catch (NonXmlContentTypeException e) {
log4j.debug(e.getMessage());
} catch (FatalSAXException e) {
log4j.debug(e.getMessage());
} catch (SocketTimeoutException e) {
errorHandler.ioError(new IOException(e.getMessage(), null));
} catch (ConnectTimeoutException e) {
errorHandler.ioError(new IOException(e.getMessage(), null));
} catch (TooManyErrorsException e) {
errorHandler.fatalError(e);
} catch (SAXException e) {
String msg = e.getMessage();
if (!cannotRecover.equals(msg) && !changingEncoding.equals(msg)) {
log4j.debug("SAXException: " + e.getMessage());
}
} catch (IOException e) {
isHtmlOrXhtml = false;
if (e.getCause() instanceof org.apache.http.TruncatedChunkException) {
log4j.debug("TruncatedChunkException", e.getCause());
} else {
errorHandler.ioError(e);
}
} catch (IncorrectSchemaException e) {
log4j.debug("IncorrectSchemaException", e);
errorHandler.schemaError(e);
} catch (RuntimeException e) {
isHtmlOrXhtml = false;
log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
} catch (Error e) {
isHtmlOrXhtml = false;
log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
} finally {
errorHandler.end(successMessage(), failureMessage(), (String) request.getAttribute("http://validator.nu/properties/document-language"));
gatherStatistics();
}
if (isHtmlOrXhtml) {
XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter(contentHandler, outline, headingOutline);
outlineEmitter.emitHeadings();
outlineEmitter.emit();
emitDetails();
StatsEmitter.emit(contentHandler, this);
}
}
use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.
the class VerifierServletTransaction method rootNamespace.
void rootNamespace(String namespace, Locator locator) throws SAXException {
if (validator == null) {
int index = -1;
for (int i = 0; i < presetNamespaces.length; i++) {
if (namespace.equals(presetNamespaces[i])) {
index = i;
break;
}
}
if (index == -1) {
String message = "Cannot find preset schema for namespace: \u201C" + namespace + "\u201D.";
SAXException se = new SAXException(message);
errorHandler.schemaError(se);
throw new CannotFindPresetSchemaException();
}
String label = presetLabels[index];
String urls = presetUrls[index];
errorHandler.info("Using the preset for " + label + " based on the root namespace.");
try {
validator = validatorByUrls(urls);
} catch (IncorrectSchemaException | IOException e) {
// At this point the schema comes from memory.
throw new RuntimeException(e);
}
if (bufferingRootNamespaceSniffer == null) {
throw new RuntimeException("Bug! bufferingRootNamespaceSniffer was null.");
}
bufferingRootNamespaceSniffer.setContentHandler(validator.getContentHandler());
}
if (!rootNamespaceSeen) {
rootNamespaceSeen = true;
if (contentType != null) {
int i;
if ((i = Arrays.binarySearch(KNOWN_CONTENT_TYPES, contentType)) > -1) {
if (!NAMESPACES_FOR_KNOWN_CONTENT_TYPES[i].equals(namespace)) {
String message = "".equals(namespace) ? "\u201C" + contentType + "\u201D is not an appropriate Content-Type for a document whose root element is not in a namespace." : "\u201C" + contentType + "\u201D is not an appropriate Content-Type for a document whose root namespace is \u201C" + namespace + "\u201D.";
SAXParseException spe = new SAXParseException(message, locator);
errorHandler.warning(spe);
}
}
}
}
}
use of com.thaiopensource.validate.IncorrectSchemaException in project validator by validator.
the class VerifierServletTransaction method resolveSchema.
@Override
public Schema resolveSchema(String url, PropertyMap options) throws SAXException, IOException, IncorrectSchemaException {
int i = Arrays.binarySearch(preloadedSchemaUrls, url);
if (i > -1) {
Schema rv = preloadedSchemas[i];
if (options.contains(WrapProperty.ATTRIBUTE_OWNER)) {
if (rv instanceof CheckerSchema) {
errorHandler.error(new SAXParseException("A non-schema checker cannot be used as an attribute schema.", null, url, -1, -1));
throw new IncorrectSchemaException();
} else {
// ugly fall through
}
} else {
return rv;
}
}
externalSchema = true;
TypedInputSource schemaInput = (TypedInputSource) entityResolver.resolveEntity(null, url);
SchemaReader sr = null;
if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) {
sr = CompactSchemaReader.getInstance();
} else {
sr = new AutoSchemaReader();
}
Schema sch = sr.createSchema(schemaInput, options);
if (Statistics.STATISTICS != null && "com.thaiopensource.validate.schematron.SchemaImpl".equals(sch.getClass().getName())) {
externalSchematron = true;
}
return sch;
}
Aggregations