use of nu.validator.io.SystemIdIOException in project validator by validator.
the class MessageEmitterAdapter method fatalError.
/**
* @param e
* @throws SAXException
*/
private void fatalError(SAXParseException e, boolean exact) throws SAXException {
if ((!batchMode && fatalErrors > 0) || nonDocumentErrors > 0) {
return;
}
this.fatalErrors++;
Exception wrapped = e.getException();
String systemId = null;
if (wrapped instanceof SystemIdIOException) {
SystemIdIOException siie = (SystemIdIOException) wrapped;
systemId = siie.getSystemId();
}
if (wrapped instanceof IOException) {
message(MessageType.IO, wrapped, systemId, -1, -1, false, null);
} else {
messageFromSAXParseException(MessageType.FATAL, e, exact, null);
}
}
use of nu.validator.io.SystemIdIOException in project validator by validator.
the class PrudentHttpEntityResolver method resolveEntity.
/**
* @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
* java.lang.String)
*/
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
if (requestsLeft > -1) {
if (requestsLeft == 0) {
throw new IOException("Number of permitted HTTP requests exceeded.");
} else {
requestsLeft--;
}
}
HttpGet m = null;
try {
URL url = null;
try {
url = URL.parse(systemId);
} catch (GalimatiasParseException e) {
IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw ioe;
}
String scheme = url.scheme();
if (!("http".equals(scheme) || "https".equals(scheme))) {
String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D.";
SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
systemId = url.toString();
try {
m = new HttpGet(systemId);
} catch (IllegalArgumentException e) {
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
m.setHeader("User-Agent", userAgent);
m.setHeader("Accept", buildAccept());
m.setHeader("Accept-Encoding", "gzip");
if (request != null && request.getAttribute("http://validator.nu/properties/accept-language") != null) {
m.setHeader("Accept-Language", (String) request.getAttribute("http://validator.nu/properties/accept-language"));
}
log4j.info(systemId);
try {
if (url.port() > 65535) {
throw new IOException("Port number must be less than 65536.");
}
} catch (NumberFormatException e) {
throw new IOException("Port number must be less than 65536.");
}
HttpResponse response = client.execute(m);
boolean ignoreResponseStatus = false;
if (request != null && request.getAttribute("http://validator.nu/properties/ignore-response-status") != null) {
ignoreResponseStatus = (boolean) request.getAttribute("http://validator.nu/properties/ignore-response-status");
}
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != 200 && !ignoreResponseStatus) {
String msg = "HTTP resource not retrievable." + " The HTTP status from the remote server was: " + statusCode + ".";
SAXParseException spe = new SAXParseException(msg, publicId, m.getURI().toString(), -1, -1, new SystemIdIOException(m.getURI().toString(), msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw new ResourceNotRetrievableException(String.format("%s: %s", m.getURI().toString(), msg));
}
HttpEntity entity = response.getEntity();
long len = entity.getContentLength();
if (sizeLimit > -1 && len > sizeLimit) {
SAXParseException spe = new SAXParseException("Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit."));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
TypedInputSource is;
org.apache.http.Header ct = response.getFirstHeader("Content-Type");
String contentType = null;
final String baseUri = m.getURI().toString();
if (ct != null) {
contentType = ct.getValue();
}
is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType);
Header cl = response.getFirstHeader("Content-Language");
if (cl != null) {
is.setLanguage(cl.getValue().trim());
}
Header xuac = response.getFirstHeader("X-UA-Compatible");
if (xuac != null) {
String val = xuac.getValue().trim();
if (!"ie=edge".equalsIgnoreCase(val)) {
SAXParseException spe = new SAXParseException("X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1);
errorHandler.error(spe);
}
}
Header csp = response.getFirstHeader("Content-Security-Policy");
if (csp != null) {
try {
ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim());
} catch (DatatypeException e) {
SAXParseException spe = new SAXParseException("Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1);
Html5DatatypeException ex5 = (Html5DatatypeException) e;
if (ex5.isWarning()) {
errorHandler.warning(spe);
} else {
errorHandler.error(spe);
}
}
}
final HttpGet meth = m;
InputStream stream = entity.getContent();
if (sizeLimit > -1) {
stream = new BoundedInputStream(stream, sizeLimit, baseUri);
}
Header ce = response.getFirstHeader("Content-Encoding");
if (ce != null) {
String val = ce.getValue().trim();
if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) {
stream = new GZIPInputStream(stream);
if (sizeLimit > -1) {
stream = new BoundedInputStream(stream, sizeLimit, baseUri);
}
}
}
is.setByteStream(new ObservableInputStream(stream, new StreamObserver() {
private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver");
private boolean released = false;
@Override
public void closeCalled() {
log4j.debug("closeCalled");
if (!released) {
log4j.debug("closeCalled, not yet released");
released = true;
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("closeCalled, releaseConnection", e);
}
}
}
@Override
public void exceptionOccurred(Exception ex) throws IOException {
if (!released) {
released = true;
try {
meth.abort();
} catch (Exception e) {
log4j.debug("exceptionOccurred, abort", e);
} finally {
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("exceptionOccurred, releaseConnection", e);
}
}
}
if (ex instanceof SystemIdIOException) {
throw (SystemIdIOException) ex;
} else if (ex instanceof IOException) {
IOException ioe = (IOException) ex;
throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe);
} else if (ex instanceof RuntimeException) {
throw (RuntimeException) ex;
} else {
throw new RuntimeException("API contract violation. Wrong exception type.", ex);
}
}
@Override
public void finalizerCalled() {
if (!released) {
released = true;
try {
meth.abort();
} catch (Exception e) {
log4j.debug("finalizerCalled, abort", e);
} finally {
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("finalizerCalled, releaseConnection", e);
}
}
}
}
}));
return is;
} catch (IOException | RuntimeException | SAXException e) {
if (m != null) {
try {
m.abort();
} catch (Exception ex) {
log4j.debug("abort", ex);
} finally {
try {
m.releaseConnection();
} catch (Exception ex) {
log4j.debug("releaseConnection", ex);
}
}
}
throw e;
}
}
use of nu.validator.io.SystemIdIOException in project validator by validator.
the class ContentTypeParser method buildTypedInputSource.
/**
* @param baseUri
* @param publicId
* @param contentType
* @return
* @throws SAXException
* @throws SAXParseException
*/
public TypedInputSource buildTypedInputSource(String baseUri, String publicId, String contentType) throws SAXException, SAXParseException {
TypedInputSource is;
is = new TypedInputSource();
is.setPublicId(publicId);
is.setSystemId(baseUri);
if (contentType != null) {
String[] params = contentType.split(";");
String type = params[0].trim().toLowerCase();
boolean wasRnc = false;
boolean wasCss = false;
boolean wasHtml = false;
if (isAllowRnc()) {
if (rncContentType(type, is)) {
wasRnc = true;
is.setType("application/relax-ng-compact-syntax");
}
}
if (isAllowCss()) {
if ("text/css".equals(type)) {
wasCss = true;
is.setType("text/css");
}
}
if (!wasRnc && !wasCss) {
if (isAllowHtml()) {
if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
is.setType(type);
wasHtml = true;
} else if (isOnlyHtmlAllowed()) {
if (laxContentType && "text/plain".equals(type)) {
is.setType(type);
wasHtml = true;
if (errorHandler != null) {
errorHandler.warning(new SAXParseException("Being lax about non-HTML Content-Type: " + type, is.getPublicId(), is.getSystemId(), -1, -1));
}
} else if ("application/octet-stream".equals(type)) {
is.setType(type);
wasHtml = true;
} else {
String msg = "Non-HTML Content-Type: \u201C" + type + "\u201D.";
SAXParseException spe = new SAXParseException(msg, publicId, baseUri, -1, -1, new SystemIdIOException(baseUri, msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
}
}
if (!wasHtml && (isAllowGenericXml() || isAllowXhtml() || isAcceptAllKnownXmlTypes())) {
if (!xmlContentType(type, is)) {
String msg = "Non-XML Content-Type: \u201C" + type + "\u201D.";
SAXParseException spe = new SAXParseException(msg, publicId, baseUri, -1, -1, new SystemIdIOException(baseUri, msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw new NonXmlContentTypeException(String.format("%s: %s", baseUri, msg));
} else {
is.setType(type);
}
}
}
String charset = null;
char c;
boolean quoted = false;
StringBuilder sb = new StringBuilder();
for (int i = 1; i < params.length; i++) {
String param = params[i];
int offset;
beforeCharset: for (offset = 0; offset < param.length(); offset++) {
c = param.charAt(offset);
switch(c) {
case ' ':
case '\t':
case '\n':
case '\u000C':
case '\r':
continue;
case 'c':
case 'C':
break beforeCharset;
default:
}
}
inCharset: if (hasCharset(param, offset)) {
if (param.length() == 8) {
malformedContentTypeError(contentType, "Expected an \u201c=\u201d sign but" + " \u201ccharset\u201d parameter" + " ended.");
break inCharset;
}
offset += 7;
c = param.charAt(offset);
switch(c) {
case '=':
offset++;
break;
case ' ':
case '\t':
case '\n':
case '\u000C':
case '\r':
malformedContentTypeError(contentType, "Whitespace is not allowed before the \u201c=\u201d sign in the \u201ccharset\u201d parameter.");
break inCharset;
default:
malformedContentTypeError(contentType, "Expected an \u201c=\u201d sign but saw \u201c" + c + "\u201d instead.");
break inCharset;
}
if (offset == param.length()) {
malformedContentTypeError(contentType, "The empty string is not a valid encoding name.");
break inCharset;
}
c = param.charAt(offset);
switch(c) {
case '"':
offset++;
quoted = true;
break;
case ' ':
case '\t':
case '\n':
case '\u000C':
case '\r':
malformedContentTypeError(contentType, "Whitespace is not allowed after the \u201c=\u201d sign in the parameter value.");
break inCharset;
default:
break;
}
inEncodingName: for (int j = offset; j < param.length(); j++) {
c = param.charAt(j);
switch(c) {
case '"':
if (!quoted) {
malformedContentTypeError(contentType, "Unmatched \u201c\"\u201d character in \u201ccharset\u201d parameter.");
break inCharset;
}
break inEncodingName;
case ' ':
case '\t':
case '\n':
case '\u000C':
case '\r':
break inEncodingName;
default:
}
if (!((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || c == '-' || c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' || c == '+' || c == '_' || c == '`' || c == '{' || c == '}' || c == '~' || c == '^')) {
malformedContentTypeError(contentType, "The character \u201c" + c + "\u201d is not a valid character in an encoding name.");
break inCharset;
}
offset++;
sb.append(c);
}
if (quoted) {
if (param.length() > offset && '"' == param.charAt(offset)) {
offset++;
} else {
malformedContentTypeError(contentType, "Unmatched \u201c\"\u201d character in \u201ccharset\u201d parameter.");
break inCharset;
}
}
if (param.length() > offset) {
for (int k = offset + 1; k < param.length(); k++) {
c = param.charAt(k);
switch(c) {
case ' ':
case '\t':
case '\n':
case '\u000C':
case '\r':
offset++;
continue;
default:
malformedContentTypeError(contentType, "Only whitespace is allowed after the encoding name in the \u201ccharset\u201d parameter. " + "Found \u201c" + c + "\u201d instead.");
break inCharset;
}
}
}
if (sb.length() == 0) {
malformedContentTypeError(contentType, "The empty string is not a valid encoding name.");
}
}
if (sb.length() > 0) {
if ('\'' == sb.charAt(0) && '\'' == sb.charAt(sb.length() - 1)) {
malformedContentTypeError(contentType, "Single-quoted encoding names are not allowed in the \u201ccharset\u201d parameter.");
} else {
charset = sb.toString();
}
}
}
if (charset != null) {
is.setEncoding(charset);
} else if (type.startsWith("text/") && !wasHtml && !wasCss) {
if (laxContentType) {
if (errorHandler != null) {
errorHandler.warning(new SAXParseException("text/* type without a charset parameter seen. Would have defaulted to US-ASCII had the lax option not been chosen.", is.getPublicId(), is.getSystemId(), -1, -1));
}
} else {
is.setEncoding("US-ASCII");
if (errorHandler != null) {
errorHandler.warning(new SAXParseException("text/* type without a charset parameter seen. Defaulting to US-ASCII per section 3.1 of RFC 3023.", is.getPublicId(), is.getSystemId(), -1, -1));
}
}
}
}
return is;
}
use of nu.validator.io.SystemIdIOException in project validator by validator.
the class MessageEmitterAdapter method ioError.
public void ioError(IOException e) throws SAXException {
this.nonDocumentErrors++;
String systemId = null;
if (e instanceof SystemIdIOException) {
SystemIdIOException siie = (SystemIdIOException) e;
systemId = siie.getSystemId();
}
message(MessageType.IO, e, systemId, -1, -1, false, null);
}
Aggregations