use of nu.validator.io.BoundedInputStream in project validator by validator.
the class PrudentHttpEntityResolver method resolveEntity.
/**
* @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
* java.lang.String)
*/
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
if (requestsLeft > -1) {
if (requestsLeft == 0) {
throw new IOException("Number of permitted HTTP requests exceeded.");
} else {
requestsLeft--;
}
}
HttpGet m = null;
try {
URL url = null;
try {
url = URL.parse(systemId);
} catch (GalimatiasParseException e) {
IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw ioe;
}
String scheme = url.scheme();
if (!("http".equals(scheme) || "https".equals(scheme))) {
String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D.";
SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
systemId = url.toString();
try {
m = new HttpGet(systemId);
} catch (IllegalArgumentException e) {
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
m.setHeader("User-Agent", userAgent);
m.setHeader("Accept", buildAccept());
m.setHeader("Accept-Encoding", "gzip");
if (request != null && request.getAttribute("http://validator.nu/properties/accept-language") != null) {
m.setHeader("Accept-Language", (String) request.getAttribute("http://validator.nu/properties/accept-language"));
}
log4j.info(systemId);
try {
if (url.port() > 65535) {
throw new IOException("Port number must be less than 65536.");
}
} catch (NumberFormatException e) {
throw new IOException("Port number must be less than 65536.");
}
HttpResponse response = client.execute(m);
boolean ignoreResponseStatus = false;
if (request != null && request.getAttribute("http://validator.nu/properties/ignore-response-status") != null) {
ignoreResponseStatus = (boolean) request.getAttribute("http://validator.nu/properties/ignore-response-status");
}
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != 200 && !ignoreResponseStatus) {
String msg = "HTTP resource not retrievable." + " The HTTP status from the remote server was: " + statusCode + ".";
SAXParseException spe = new SAXParseException(msg, publicId, m.getURI().toString(), -1, -1, new SystemIdIOException(m.getURI().toString(), msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw new ResourceNotRetrievableException(String.format("%s: %s", m.getURI().toString(), msg));
}
HttpEntity entity = response.getEntity();
long len = entity.getContentLength();
if (sizeLimit > -1 && len > sizeLimit) {
SAXParseException spe = new SAXParseException("Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit."));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
TypedInputSource is;
org.apache.http.Header ct = response.getFirstHeader("Content-Type");
String contentType = null;
final String baseUri = m.getURI().toString();
if (ct != null) {
contentType = ct.getValue();
}
is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType);
Header cl = response.getFirstHeader("Content-Language");
if (cl != null) {
is.setLanguage(cl.getValue().trim());
}
Header xuac = response.getFirstHeader("X-UA-Compatible");
if (xuac != null) {
String val = xuac.getValue().trim();
if (!"ie=edge".equalsIgnoreCase(val)) {
SAXParseException spe = new SAXParseException("X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1);
errorHandler.error(spe);
}
}
Header csp = response.getFirstHeader("Content-Security-Policy");
if (csp != null) {
try {
ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim());
} catch (DatatypeException e) {
SAXParseException spe = new SAXParseException("Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1);
Html5DatatypeException ex5 = (Html5DatatypeException) e;
if (ex5.isWarning()) {
errorHandler.warning(spe);
} else {
errorHandler.error(spe);
}
}
}
final HttpGet meth = m;
InputStream stream = entity.getContent();
if (sizeLimit > -1) {
stream = new BoundedInputStream(stream, sizeLimit, baseUri);
}
Header ce = response.getFirstHeader("Content-Encoding");
if (ce != null) {
String val = ce.getValue().trim();
if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) {
stream = new GZIPInputStream(stream);
if (sizeLimit > -1) {
stream = new BoundedInputStream(stream, sizeLimit, baseUri);
}
}
}
is.setByteStream(new ObservableInputStream(stream, new StreamObserver() {
private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver");
private boolean released = false;
@Override
public void closeCalled() {
log4j.debug("closeCalled");
if (!released) {
log4j.debug("closeCalled, not yet released");
released = true;
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("closeCalled, releaseConnection", e);
}
}
}
@Override
public void exceptionOccurred(Exception ex) throws IOException {
if (!released) {
released = true;
try {
meth.abort();
} catch (Exception e) {
log4j.debug("exceptionOccurred, abort", e);
} finally {
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("exceptionOccurred, releaseConnection", e);
}
}
}
if (ex instanceof SystemIdIOException) {
throw (SystemIdIOException) ex;
} else if (ex instanceof IOException) {
IOException ioe = (IOException) ex;
throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe);
} else if (ex instanceof RuntimeException) {
throw (RuntimeException) ex;
} else {
throw new RuntimeException("API contract violation. Wrong exception type.", ex);
}
}
@Override
public void finalizerCalled() {
if (!released) {
released = true;
try {
meth.abort();
} catch (Exception e) {
log4j.debug("finalizerCalled, abort", e);
} finally {
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("finalizerCalled, releaseConnection", e);
}
}
}
}
}));
return is;
} catch (IOException | RuntimeException | SAXException e) {
if (m != null) {
try {
m.abort();
} catch (Exception ex) {
log4j.debug("abort", ex);
} finally {
try {
m.releaseConnection();
} catch (Exception ex) {
log4j.debug("releaseConnection", ex);
}
}
}
throw e;
}
}
use of nu.validator.io.BoundedInputStream in project validator by validator.
the class ParseTreePrinter method service.
public void service() throws IOException {
request.setCharacterEncoding("utf-8");
String content = null;
String document = scrubUrl(request.getParameter("doc"));
document = ("".equals(document)) ? null : document;
try (Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8")) {
if (document == null && methodIsGet() && (content = request.getParameter("content")) == null) {
response.setContentType("text/html; charset=utf-8");
writer.write(FORM_HTML);
writer.flush();
return;
}
response.setContentType("text/plain; charset=utf-8");
try {
PrudentHttpEntityResolver entityResolver = new PrudentHttpEntityResolver(2048 * 1024, false, null);
entityResolver.setAllowGenericXml(false);
entityResolver.setAcceptAllKnownXmlTypes(false);
entityResolver.setAllowHtml(true);
entityResolver.setAllowXhtml(true);
TypedInputSource documentInput;
if (methodIsGet()) {
if (content == null) {
documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
} else {
documentInput = new TypedInputSource(new StringReader(content));
if ("xml".equals(request.getParameter("parser"))) {
documentInput.setType("application/xhtml+xml");
} else {
documentInput.setType("text/html");
}
}
} else {
// POST
String postContentType = request.getContentType();
if (postContentType == null) {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
return;
} else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
return;
}
long len = request.getContentLength();
if (len > SIZE_LIMIT) {
throw new StreamBoundException("Resource size exceeds limit.");
}
ContentTypeParser contentTypeParser = new ContentTypeParser(null, false);
contentTypeParser.setAllowGenericXml(false);
contentTypeParser.setAcceptAllKnownXmlTypes(false);
contentTypeParser.setAllowHtml(true);
contentTypeParser.setAllowXhtml(true);
documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
documentInput.setSystemId(request.getHeader("Content-Location"));
}
String type = documentInput.getType();
XMLReader parser;
if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
writer.write("HTML parser\n\n#document\n");
parser = new nu.validator.htmlparser.sax.HtmlParser();
parser.setProperty("http://validator.nu/properties/heuristics", Heuristics.ALL);
parser.setProperty("http://validator.nu/properties/xml-policy", XmlViolationPolicy.ALLOW);
} else if ("application/xhtml+xml".equals(type)) {
writer.write("XML parser\n\n#document\n");
parser = new SAXDriver();
parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
parser.setEntityResolver(new NullEntityResolver());
} else {
writer.write("Unsupported content type.\n");
writer.flush();
return;
}
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
ListErrorHandler listErrorHandler = new ListErrorHandler();
parser.setContentHandler(treeDumpContentHandler);
parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
parser.setErrorHandler(listErrorHandler);
parser.parse(documentInput);
writer.write("#errors\n");
for (String err : listErrorHandler.getErrors()) {
writer.write(err);
writer.write('\n');
}
} catch (SAXException e) {
writer.write("SAXException:\n");
writer.write(e.getMessage());
writer.write("\n");
} catch (IOException e) {
writer.write("IOException:\n");
writer.write(e.getMessage());
writer.write("\n");
} finally {
writer.flush();
}
}
}
use of nu.validator.io.BoundedInputStream in project validator by validator.
the class VerifierServletTransaction method loadDocumentInput.
/**
* @throws SAXException
* @throws IOException
*/
protected void loadDocumentInput() throws SAXException, IOException {
if (documentInput != null) {
return;
}
if (methodIsGet) {
documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
errorHandler.setLoggingOk(true);
} else {
// POST
long len = request.getContentLength();
if (len > SIZE_LIMIT) {
throw new StreamBoundException("Resource size exceeds limit.");
}
documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
documentInput.setSystemId(request.getHeader("Content-Location"));
}
if (imageCollector != null) {
baseUriTracker = new BaseUriTracker(documentInput.getSystemId(), documentInput.getLanguage());
imageCollector.initializeContext(baseUriTracker);
}
}
Aggregations