use of nu.validator.xml.ContentTypeParser in project validator by validator.
the class VerifierServletTransaction method validate.
/**
* @throws SAXException
*/
@SuppressWarnings({ "deprecation", "unchecked" })
void validate() throws SAXException {
if (!willValidate()) {
return;
}
boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
if (isHtmlOrXhtml) {
try {
out.flush();
} catch (IOException e1) {
throw new SAXException(e1);
}
}
httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler, request);
httpRes.setUserAgent(userAgent);
dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
contentTypeParser = new ContentTypeParser(errorHandler, laxType);
entityResolver = new LocalCacheEntityResolver(dataRes);
setAllowRnc(true);
setAllowCss(true);
try {
this.errorHandler.start(document);
PropertyMapBuilder pmb = new PropertyMapBuilder();
pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver));
pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
RngProperty.CHECK_ID_IDREF.add(pmb);
jingPropertyMap = pmb.toPropertyMap();
tryToSetupValidator();
setAllowRnc(false);
loadDocAndSetupParser();
setErrorProfile();
contentType = documentInput.getType();
if ("text/css".equals(contentType)) {
String charset = "UTF-8";
if (documentInput.getEncoding() != null) {
charset = documentInput.getEncoding();
}
List<InputStream> streams = new ArrayList<>();
streams.add(new ByteArrayInputStream(CSS_CHECKING_PROLOG));
streams.add(documentInput.getByteStream());
streams.add(new ByteArrayInputStream(CSS_CHECKING_EPILOG));
Enumeration<InputStream> e = Collections.enumeration(streams);
documentInput.setByteStream(new SequenceInputStream(e));
documentInput.setEncoding(charset);
errorHandler.setLineOffset(-1);
sourceCode.setIsCss();
parser = ParserMode.HTML;
loadDocAndSetupParser();
}
reader.setErrorHandler(errorHandler);
sourceCode.initialize(documentInput);
if (validator == null) {
checkNormalization = true;
}
if (checkNormalization) {
reader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
}
WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(reader);
ContentHandler recorder = sourceCode.getLocationRecorder();
if (baseUriTracker == null) {
wiretap.setWiretapContentHander(recorder);
} else {
wiretap.setWiretapContentHander(new CombineContentHandler(recorder, baseUriTracker));
}
wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
reader = wiretap;
if (htmlParser != null) {
htmlParser.addCharacterHandler(sourceCode);
htmlParser.setMappingLangToXmlLang(true);
htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
errorHandler.setHtml(true);
} else if (xmlParser != null) {
// this must be after wiretap!
if (!filteredNamespaces.isEmpty()) {
reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces);
}
xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
xmlParser.lockErrorHandler();
} else {
throw new RuntimeException("Bug. Unreachable.");
}
// make
reader = new AttributesPermutingXMLReaderWrapper(reader);
// better
if (charsetOverride != null) {
String charset = documentInput.getEncoding();
if (charset == null) {
errorHandler.warning(new SAXParseException("Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null));
} else {
errorHandler.warning(new SAXParseException("Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null));
}
documentInput.setEncoding(charsetOverride);
}
if (showOutline) {
reader = new OutlineBuildingXMLReaderWrapper(reader, request, false);
reader = new OutlineBuildingXMLReaderWrapper(reader, request, true);
}
reader.parse(documentInput);
if (showOutline) {
outline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/document-outline");
headingOutline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/heading-outline");
}
} catch (CannotFindPresetSchemaException e) {
} catch (ResourceNotRetrievableException e) {
log4j.debug(e.getMessage());
} catch (NonXmlContentTypeException e) {
log4j.debug(e.getMessage());
} catch (FatalSAXException e) {
log4j.debug(e.getMessage());
} catch (SocketTimeoutException e) {
errorHandler.ioError(new IOException(e.getMessage(), null));
} catch (ConnectTimeoutException e) {
errorHandler.ioError(new IOException(e.getMessage(), null));
} catch (TooManyErrorsException e) {
errorHandler.fatalError(e);
} catch (SAXException e) {
String msg = e.getMessage();
if (!cannotRecover.equals(msg) && !changingEncoding.equals(msg)) {
log4j.debug("SAXException: " + e.getMessage());
}
} catch (IOException e) {
isHtmlOrXhtml = false;
if (e.getCause() instanceof org.apache.http.TruncatedChunkException) {
log4j.debug("TruncatedChunkException", e.getCause());
} else {
errorHandler.ioError(e);
}
} catch (IncorrectSchemaException e) {
log4j.debug("IncorrectSchemaException", e);
errorHandler.schemaError(e);
} catch (RuntimeException e) {
isHtmlOrXhtml = false;
log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
} catch (Error e) {
isHtmlOrXhtml = false;
log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
} finally {
errorHandler.end(successMessage(), failureMessage(), (String) request.getAttribute("http://validator.nu/properties/document-language"));
gatherStatistics();
}
if (isHtmlOrXhtml) {
XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter(contentHandler, outline, headingOutline);
outlineEmitter.emitHeadings();
outlineEmitter.emit();
emitDetails();
StatsEmitter.emit(contentHandler, this);
}
}
use of nu.validator.xml.ContentTypeParser in project validator by validator.
the class ParseTreePrinter method service.
public void service() throws IOException {
request.setCharacterEncoding("utf-8");
String content = null;
String document = scrubUrl(request.getParameter("doc"));
document = ("".equals(document)) ? null : document;
try (Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8")) {
if (document == null && methodIsGet() && (content = request.getParameter("content")) == null) {
response.setContentType("text/html; charset=utf-8");
writer.write(FORM_HTML);
writer.flush();
return;
}
response.setContentType("text/plain; charset=utf-8");
try {
PrudentHttpEntityResolver entityResolver = new PrudentHttpEntityResolver(2048 * 1024, false, null);
entityResolver.setAllowGenericXml(false);
entityResolver.setAcceptAllKnownXmlTypes(false);
entityResolver.setAllowHtml(true);
entityResolver.setAllowXhtml(true);
TypedInputSource documentInput;
if (methodIsGet()) {
if (content == null) {
documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
} else {
documentInput = new TypedInputSource(new StringReader(content));
if ("xml".equals(request.getParameter("parser"))) {
documentInput.setType("application/xhtml+xml");
} else {
documentInput.setType("text/html");
}
}
} else {
// POST
String postContentType = request.getContentType();
if (postContentType == null) {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
return;
} else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
return;
}
long len = request.getContentLength();
if (len > SIZE_LIMIT) {
throw new StreamBoundException("Resource size exceeds limit.");
}
ContentTypeParser contentTypeParser = new ContentTypeParser(null, false);
contentTypeParser.setAllowGenericXml(false);
contentTypeParser.setAcceptAllKnownXmlTypes(false);
contentTypeParser.setAllowHtml(true);
contentTypeParser.setAllowXhtml(true);
documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
documentInput.setSystemId(request.getHeader("Content-Location"));
}
String type = documentInput.getType();
XMLReader parser;
if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
writer.write("HTML parser\n\n#document\n");
parser = new nu.validator.htmlparser.sax.HtmlParser();
parser.setProperty("http://validator.nu/properties/heuristics", Heuristics.ALL);
parser.setProperty("http://validator.nu/properties/xml-policy", XmlViolationPolicy.ALLOW);
} else if ("application/xhtml+xml".equals(type)) {
writer.write("XML parser\n\n#document\n");
parser = new SAXDriver();
parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
parser.setEntityResolver(new NullEntityResolver());
} else {
writer.write("Unsupported content type.\n");
writer.flush();
return;
}
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
ListErrorHandler listErrorHandler = new ListErrorHandler();
parser.setContentHandler(treeDumpContentHandler);
parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
parser.setErrorHandler(listErrorHandler);
parser.parse(documentInput);
writer.write("#errors\n");
for (String err : listErrorHandler.getErrors()) {
writer.write(err);
writer.write('\n');
}
} catch (SAXException e) {
writer.write("SAXException:\n");
writer.write(e.getMessage());
writer.write("\n");
} catch (IOException e) {
writer.write("IOException:\n");
writer.write(e.getMessage());
writer.write("\n");
} finally {
writer.flush();
}
}
}
Aggregations