use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.
the class VerifierServletTransaction method validate.
/**
* @throws SAXException
*/
@SuppressWarnings({ "deprecation", "unchecked" })
void validate() throws SAXException {
if (!willValidate()) {
return;
}
boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
if (isHtmlOrXhtml) {
try {
out.flush();
} catch (IOException e1) {
throw new SAXException(e1);
}
}
httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler, request);
httpRes.setUserAgent(userAgent);
dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
contentTypeParser = new ContentTypeParser(errorHandler, laxType);
entityResolver = new LocalCacheEntityResolver(dataRes);
setAllowRnc(true);
setAllowCss(true);
try {
this.errorHandler.start(document);
PropertyMapBuilder pmb = new PropertyMapBuilder();
pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver));
pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
RngProperty.CHECK_ID_IDREF.add(pmb);
jingPropertyMap = pmb.toPropertyMap();
tryToSetupValidator();
setAllowRnc(false);
loadDocAndSetupParser();
setErrorProfile();
contentType = documentInput.getType();
if ("text/css".equals(contentType)) {
String charset = "UTF-8";
if (documentInput.getEncoding() != null) {
charset = documentInput.getEncoding();
}
List<InputStream> streams = new ArrayList<>();
streams.add(new ByteArrayInputStream(CSS_CHECKING_PROLOG));
streams.add(documentInput.getByteStream());
streams.add(new ByteArrayInputStream(CSS_CHECKING_EPILOG));
Enumeration<InputStream> e = Collections.enumeration(streams);
documentInput.setByteStream(new SequenceInputStream(e));
documentInput.setEncoding(charset);
errorHandler.setLineOffset(-1);
sourceCode.setIsCss();
parser = ParserMode.HTML;
loadDocAndSetupParser();
}
reader.setErrorHandler(errorHandler);
sourceCode.initialize(documentInput);
if (validator == null) {
checkNormalization = true;
}
if (checkNormalization) {
reader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
}
WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(reader);
ContentHandler recorder = sourceCode.getLocationRecorder();
if (baseUriTracker == null) {
wiretap.setWiretapContentHander(recorder);
} else {
wiretap.setWiretapContentHander(new CombineContentHandler(recorder, baseUriTracker));
}
wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
reader = wiretap;
if (htmlParser != null) {
htmlParser.addCharacterHandler(sourceCode);
htmlParser.setMappingLangToXmlLang(true);
htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
errorHandler.setHtml(true);
} else if (xmlParser != null) {
// this must be after wiretap!
if (!filteredNamespaces.isEmpty()) {
reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces);
}
xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
xmlParser.lockErrorHandler();
} else {
throw new RuntimeException("Bug. Unreachable.");
}
// make
reader = new AttributesPermutingXMLReaderWrapper(reader);
// better
if (charsetOverride != null) {
String charset = documentInput.getEncoding();
if (charset == null) {
errorHandler.warning(new SAXParseException("Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null));
} else {
errorHandler.warning(new SAXParseException("Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null));
}
documentInput.setEncoding(charsetOverride);
}
if (showOutline) {
reader = new OutlineBuildingXMLReaderWrapper(reader, request, false);
reader = new OutlineBuildingXMLReaderWrapper(reader, request, true);
}
reader.parse(documentInput);
if (showOutline) {
outline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/document-outline");
headingOutline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/heading-outline");
}
} catch (CannotFindPresetSchemaException e) {
} catch (ResourceNotRetrievableException e) {
log4j.debug(e.getMessage());
} catch (NonXmlContentTypeException e) {
log4j.debug(e.getMessage());
} catch (FatalSAXException e) {
log4j.debug(e.getMessage());
} catch (SocketTimeoutException e) {
errorHandler.ioError(new IOException(e.getMessage(), null));
} catch (ConnectTimeoutException e) {
errorHandler.ioError(new IOException(e.getMessage(), null));
} catch (TooManyErrorsException e) {
errorHandler.fatalError(e);
} catch (SAXException e) {
String msg = e.getMessage();
if (!cannotRecover.equals(msg) && !changingEncoding.equals(msg)) {
log4j.debug("SAXException: " + e.getMessage());
}
} catch (IOException e) {
isHtmlOrXhtml = false;
if (e.getCause() instanceof org.apache.http.TruncatedChunkException) {
log4j.debug("TruncatedChunkException", e.getCause());
} else {
errorHandler.ioError(e);
}
} catch (IncorrectSchemaException e) {
log4j.debug("IncorrectSchemaException", e);
errorHandler.schemaError(e);
} catch (RuntimeException e) {
isHtmlOrXhtml = false;
log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
} catch (Error e) {
isHtmlOrXhtml = false;
log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
} finally {
errorHandler.end(successMessage(), failureMessage(), (String) request.getAttribute("http://validator.nu/properties/document-language"));
gatherStatistics();
}
if (isHtmlOrXhtml) {
XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter(contentHandler, outline, headingOutline);
outlineEmitter.emitHeadings();
outlineEmitter.emit();
emitDetails();
StatsEmitter.emit(contentHandler, this);
}
}
use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.
the class Downloader method run.
public void run() {
String inLine = null;
for (; ; ) {
try {
while ((inLine = in.readLine()) != null) {
String md5;
String url;
int index = inLine.indexOf('\t');
md5 = inLine.substring(0, index);
url = inLine.substring(index + 1, inLine.length());
InputSource is;
PrudentHttpEntityResolver resolver;
resolver = new PrudentHttpEntityResolver(1024 * 1024, false, null);
resolver.setAcceptAllKnownXmlTypes(false);
resolver.setAllowGenericXml(false);
resolver.setAllowRnc(false);
resolver.setAllowXhtml(false);
resolver.setAllowHtml(true);
try {
is = resolver.resolveEntity(null, url);
} catch (Exception e) {
continue;
}
String charset = is.getEncoding();
if (charset == null || charset.indexOf('\t') != -1) {
charset = "null";
}
File top = new File(rootDir, md5.substring(0, 2));
synchronized (rootDir) {
top.mkdir();
}
File second = new File(top, md5.substring(2, 4));
synchronized (rootDir) {
second.mkdir();
}
File outFile = new File(second, md5 + ".gz");
InputStream inStream = is.getByteStream();
try {
OutputStream outStream = new GZIPOutputStream(new FileOutputStream(outFile));
IO.copy(inStream, outStream);
outStream.flush();
outStream.close();
} catch (Exception e) {
outFile.delete();
continue;
} finally {
inStream.close();
}
out.println(md5 + '\t' + url + '\t' + charset);
}
return;
} catch (Exception e) {
}
}
}
use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.
the class ParseTreePrinter method service.
public void service() throws IOException {
request.setCharacterEncoding("utf-8");
String content = null;
String document = scrubUrl(request.getParameter("doc"));
document = ("".equals(document)) ? null : document;
try (Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8")) {
if (document == null && methodIsGet() && (content = request.getParameter("content")) == null) {
response.setContentType("text/html; charset=utf-8");
writer.write(FORM_HTML);
writer.flush();
return;
}
response.setContentType("text/plain; charset=utf-8");
try {
PrudentHttpEntityResolver entityResolver = new PrudentHttpEntityResolver(2048 * 1024, false, null);
entityResolver.setAllowGenericXml(false);
entityResolver.setAcceptAllKnownXmlTypes(false);
entityResolver.setAllowHtml(true);
entityResolver.setAllowXhtml(true);
TypedInputSource documentInput;
if (methodIsGet()) {
if (content == null) {
documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
} else {
documentInput = new TypedInputSource(new StringReader(content));
if ("xml".equals(request.getParameter("parser"))) {
documentInput.setType("application/xhtml+xml");
} else {
documentInput.setType("text/html");
}
}
} else {
// POST
String postContentType = request.getContentType();
if (postContentType == null) {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
return;
} else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
return;
}
long len = request.getContentLength();
if (len > SIZE_LIMIT) {
throw new StreamBoundException("Resource size exceeds limit.");
}
ContentTypeParser contentTypeParser = new ContentTypeParser(null, false);
contentTypeParser.setAllowGenericXml(false);
contentTypeParser.setAcceptAllKnownXmlTypes(false);
contentTypeParser.setAllowHtml(true);
contentTypeParser.setAllowXhtml(true);
documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
documentInput.setSystemId(request.getHeader("Content-Location"));
}
String type = documentInput.getType();
XMLReader parser;
if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
writer.write("HTML parser\n\n#document\n");
parser = new nu.validator.htmlparser.sax.HtmlParser();
parser.setProperty("http://validator.nu/properties/heuristics", Heuristics.ALL);
parser.setProperty("http://validator.nu/properties/xml-policy", XmlViolationPolicy.ALLOW);
} else if ("application/xhtml+xml".equals(type)) {
writer.write("XML parser\n\n#document\n");
parser = new SAXDriver();
parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
parser.setEntityResolver(new NullEntityResolver());
} else {
writer.write("Unsupported content type.\n");
writer.flush();
return;
}
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
ListErrorHandler listErrorHandler = new ListErrorHandler();
parser.setContentHandler(treeDumpContentHandler);
parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
parser.setErrorHandler(listErrorHandler);
parser.parse(documentInput);
writer.write("#errors\n");
for (String err : listErrorHandler.getErrors()) {
writer.write(err);
writer.write('\n');
}
} catch (SAXException e) {
writer.write("SAXException:\n");
writer.write(e.getMessage());
writer.write("\n");
} catch (IOException e) {
writer.write("IOException:\n");
writer.write(e.getMessage());
writer.write("\n");
} finally {
writer.flush();
}
}
}
use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.
the class VerifierServletTransaction method service.
void service() throws ServletException, IOException {
this.methodIsGet = "GET".equals(request.getMethod()) || "HEAD".equals(request.getMethod());
this.out = response.getOutputStream();
try {
request.setCharacterEncoding("utf-8");
} catch (NoSuchMethodError e) {
log4j.debug("Vintage Servlet API doesn't support setCharacterEncoding().", e);
}
if (!methodIsGet) {
postContentType = request.getContentType();
if (postContentType == null) {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
return;
} else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
return;
}
}
String outFormat = request.getParameter("out");
if (outFormat == null) {
outputFormat = OutputFormat.HTML;
} else {
if ("html".equals(outFormat)) {
outputFormat = OutputFormat.HTML;
} else if ("xhtml".equals(outFormat)) {
outputFormat = OutputFormat.XHTML;
} else if ("text".equals(outFormat)) {
outputFormat = OutputFormat.TEXT;
} else if ("gnu".equals(outFormat)) {
outputFormat = OutputFormat.GNU;
} else if ("xml".equals(outFormat)) {
outputFormat = OutputFormat.XML;
} else if ("json".equals(outFormat)) {
outputFormat = OutputFormat.JSON;
} else {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Unsupported output format");
return;
}
}
if (!methodIsGet) {
document = request.getHeader("Content-Location");
}
if (document == null) {
document = request.getParameter("doc");
}
if (document == null) {
document = request.getParameter("file");
}
document = ("".equals(document)) ? null : document;
if (document != null) {
for (String domain : DENY_LIST) {
if (!"".equals(domain) && document.contains(domain)) {
response.sendError(429, "Too many requests");
return;
}
}
}
String callback = null;
if (outputFormat == OutputFormat.JSON) {
callback = request.getParameter("callback");
if (callback != null) {
Matcher m = JS_IDENTIFIER.matcher(callback);
if (m.matches()) {
if (Arrays.binarySearch(JS_RESERVED_WORDS, callback) >= 0) {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Callback is a reserved word.");
return;
}
} else {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Callback is not a valid ECMA 262 IdentifierName.");
return;
}
}
}
if (willValidate()) {
response.setDateHeader("Expires", 0);
response.setHeader("Cache-Control", "no-cache");
} else if (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML) {
response.setDateHeader("Last-Modified", lastModified);
} else {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "No input document");
return;
}
setup();
String filterString = systemFilterString;
String filterPatternParam = request.getParameter("filterpattern");
if (filterPatternParam != null && !"".equals(filterPatternParam)) {
if ("".equals(filterString)) {
filterString = scrub(filterPatternParam);
} else {
filterString += "|" + scrub(filterPatternParam);
}
}
String filterUrl = request.getParameter("filterurl");
if (filterUrl != null && !"".equals(filterUrl)) {
try {
//
InputSource filterFile = //
(new PrudentHttpEntityResolver(-1, true, null)).resolveEntity(null, filterUrl);
StringBuilder sb = new StringBuilder();
//
BufferedReader reader = new BufferedReader(new InputStreamReader(filterFile.getByteStream()));
String line;
String pipe = "";
while ((line = reader.readLine()) != null) {
if (line.startsWith("#")) {
continue;
}
sb.append(pipe);
sb.append(line);
pipe = "|";
}
if (sb.length() != 0) {
if (!"".equals(filterString)) {
filterString = scrub(sb.toString());
} else {
filterString += "|" + scrub(sb.toString());
}
}
} catch (Exception e) {
response.sendError(500, e.getMessage());
}
}
Pattern filterPattern = null;
if (!"".equals(filterString)) {
filterPattern = Pattern.compile(filterString);
}
if (request.getParameter("useragent") != null) {
userAgent = scrub(request.getParameter("useragent"));
} else {
userAgent = USER_AGENT;
}
if (request.getParameter("acceptlanguage") != null) {
request.setAttribute("http://validator.nu/properties/accept-language", scrub(request.getParameter("acceptlanguage")));
}
Object inputType = request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.type");
showSource = (request.getParameter("showsource") != null);
showSource = (showSource || "textarea".equals(inputType));
showOutline = (request.getParameter("showoutline") != null);
if (request.getParameter("checkerrorpages") != null) {
request.setAttribute("http://validator.nu/properties/ignore-response-status", true);
}
if (request.getParameter("showimagereport") != null) {
imageCollector = new ImageCollector(sourceCode);
}
String charset = request.getParameter("charset");
if (charset != null) {
charset = scrub(charset.trim());
if (!"".equals(charset)) {
charsetOverride = charset;
}
}
String nsfilter = request.getParameter("nsfilter");
if (nsfilter != null) {
for (String ns : SPACE.split(nsfilter)) {
if (ns.length() > 0) {
filteredNamespaces.add(ns);
}
}
}
boolean errorsOnly = ("error".equals(request.getParameter("level")));
boolean asciiQuotes = (request.getParameter("asciiquotes") != null);
int lineOffset = 0;
String lineOffsetStr = request.getParameter("lineoffset");
if (lineOffsetStr != null) {
try {
lineOffset = Integer.parseInt(lineOffsetStr);
} catch (NumberFormatException e) {
}
}
try {
if (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML) {
if (outputFormat == OutputFormat.HTML) {
response.setContentType("text/html; charset=utf-8");
contentHandler = new HtmlSerializer(out);
} else {
response.setContentType("application/xhtml+xml");
contentHandler = new XmlSerializer(out);
}
emitter = new XhtmlSaxEmitter(contentHandler);
errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, imageCollector, lineOffset, false, new XhtmlMessageEmitter(contentHandler));
PageEmitter.emit(contentHandler, this);
} else {
if (outputFormat == OutputFormat.TEXT) {
response.setContentType("text/plain; charset=utf-8");
errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new TextMessageEmitter(out, asciiQuotes));
} else if (outputFormat == OutputFormat.GNU) {
response.setContentType("text/plain; charset=utf-8");
errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new GnuMessageEmitter(out, asciiQuotes));
} else if (outputFormat == OutputFormat.XML) {
response.setContentType("application/xml");
errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new XmlMessageEmitter(new XmlSerializer(out)));
} else if (outputFormat == OutputFormat.JSON) {
if (callback == null) {
response.setContentType("application/json; charset=utf-8");
} else {
response.setContentType("application/javascript; charset=utf-8");
}
errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new JsonMessageEmitter(new nu.validator.json.Serializer(out), callback));
} else {
throw new RuntimeException("Unreachable.");
}
errorHandler.setErrorsOnly(errorsOnly);
validate();
}
} catch (SAXException e) {
log4j.debug("SAXException: " + e.getMessage());
}
}
use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.
the class SimpleDocumentValidator method checkHttpURL.
/* *
* Checks a Web document.
*
* @throws IOException if loading of the URL fails for some reason
*/
public void checkHttpURL(String document, String userAgent, ErrorHandler errorHandler) throws IOException, SAXException {
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
validator.reset();
httpRes = new PrudentHttpEntityResolver(-1, true, errorHandler);
if (this.allowCss) {
httpRes.setAllowCss(true);
}
httpRes.setAllowHtml(true);
httpRes.setUserAgent(userAgent);
try {
documentInput = (TypedInputSource) httpRes.resolveEntity(null, document);
String contentType = documentInput.getType();
documentInput.setSystemId(document);
for (String param : contentType.replace(" ", "").split(";")) {
if (param.startsWith("charset=")) {
documentInput.setEncoding(param.split("=", 2)[1]);
break;
}
}
if (documentInput.getType().startsWith("text/css")) {
checkAsCss(documentInput);
} else if (documentInput.getType().startsWith("text/html")) {
checkAsHTML(documentInput);
} else {
checkAsXML(documentInput);
}
} catch (ResourceNotRetrievableException e) {
}
}
Aggregations