use of io.mola.galimatias.URL in project validator by validator.
the class PrudentHttpEntityResolver method resolveEntity.
/**
* @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
* java.lang.String)
*/
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
if (requestsLeft > -1) {
if (requestsLeft == 0) {
throw new IOException("Number of permitted HTTP requests exceeded.");
} else {
requestsLeft--;
}
}
HttpGet m = null;
try {
URL url = null;
try {
url = URL.parse(systemId);
} catch (GalimatiasParseException e) {
IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw ioe;
}
String scheme = url.scheme();
if (!("http".equals(scheme) || "https".equals(scheme))) {
String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D.";
SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
systemId = url.toString();
try {
m = new HttpGet(systemId);
} catch (IllegalArgumentException e) {
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
m.setHeader("User-Agent", userAgent);
m.setHeader("Accept", buildAccept());
m.setHeader("Accept-Encoding", "gzip");
if (request != null && request.getAttribute("http://validator.nu/properties/accept-language") != null) {
m.setHeader("Accept-Language", (String) request.getAttribute("http://validator.nu/properties/accept-language"));
}
log4j.info(systemId);
try {
if (url.port() > 65535) {
throw new IOException("Port number must be less than 65536.");
}
} catch (NumberFormatException e) {
throw new IOException("Port number must be less than 65536.");
}
HttpResponse response = client.execute(m);
boolean ignoreResponseStatus = false;
if (request != null && request.getAttribute("http://validator.nu/properties/ignore-response-status") != null) {
ignoreResponseStatus = (boolean) request.getAttribute("http://validator.nu/properties/ignore-response-status");
}
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != 200 && !ignoreResponseStatus) {
String msg = "HTTP resource not retrievable." + " The HTTP status from the remote server was: " + statusCode + ".";
SAXParseException spe = new SAXParseException(msg, publicId, m.getURI().toString(), -1, -1, new SystemIdIOException(m.getURI().toString(), msg));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw new ResourceNotRetrievableException(String.format("%s: %s", m.getURI().toString(), msg));
}
HttpEntity entity = response.getEntity();
long len = entity.getContentLength();
if (sizeLimit > -1 && len > sizeLimit) {
SAXParseException spe = new SAXParseException("Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit."));
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
TypedInputSource is;
org.apache.http.Header ct = response.getFirstHeader("Content-Type");
String contentType = null;
final String baseUri = m.getURI().toString();
if (ct != null) {
contentType = ct.getValue();
}
is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType);
Header cl = response.getFirstHeader("Content-Language");
if (cl != null) {
is.setLanguage(cl.getValue().trim());
}
Header xuac = response.getFirstHeader("X-UA-Compatible");
if (xuac != null) {
String val = xuac.getValue().trim();
if (!"ie=edge".equalsIgnoreCase(val)) {
SAXParseException spe = new SAXParseException("X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1);
errorHandler.error(spe);
}
}
Header csp = response.getFirstHeader("Content-Security-Policy");
if (csp != null) {
try {
ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim());
} catch (DatatypeException e) {
SAXParseException spe = new SAXParseException("Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1);
Html5DatatypeException ex5 = (Html5DatatypeException) e;
if (ex5.isWarning()) {
errorHandler.warning(spe);
} else {
errorHandler.error(spe);
}
}
}
final HttpGet meth = m;
InputStream stream = entity.getContent();
if (sizeLimit > -1) {
stream = new BoundedInputStream(stream, sizeLimit, baseUri);
}
Header ce = response.getFirstHeader("Content-Encoding");
if (ce != null) {
String val = ce.getValue().trim();
if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) {
stream = new GZIPInputStream(stream);
if (sizeLimit > -1) {
stream = new BoundedInputStream(stream, sizeLimit, baseUri);
}
}
}
is.setByteStream(new ObservableInputStream(stream, new StreamObserver() {
private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver");
private boolean released = false;
@Override
public void closeCalled() {
log4j.debug("closeCalled");
if (!released) {
log4j.debug("closeCalled, not yet released");
released = true;
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("closeCalled, releaseConnection", e);
}
}
}
@Override
public void exceptionOccurred(Exception ex) throws IOException {
if (!released) {
released = true;
try {
meth.abort();
} catch (Exception e) {
log4j.debug("exceptionOccurred, abort", e);
} finally {
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("exceptionOccurred, releaseConnection", e);
}
}
}
if (ex instanceof SystemIdIOException) {
throw (SystemIdIOException) ex;
} else if (ex instanceof IOException) {
IOException ioe = (IOException) ex;
throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe);
} else if (ex instanceof RuntimeException) {
throw (RuntimeException) ex;
} else {
throw new RuntimeException("API contract violation. Wrong exception type.", ex);
}
}
@Override
public void finalizerCalled() {
if (!released) {
released = true;
try {
meth.abort();
} catch (Exception e) {
log4j.debug("finalizerCalled, abort", e);
} finally {
try {
meth.releaseConnection();
} catch (Exception e) {
log4j.debug("finalizerCalled, releaseConnection", e);
}
}
}
}
}));
return is;
} catch (IOException | RuntimeException | SAXException e) {
if (m != null) {
try {
m.abort();
} catch (Exception ex) {
log4j.debug("abort", ex);
} finally {
try {
m.releaseConnection();
} catch (Exception ex) {
log4j.debug("releaseConnection", ex);
}
}
}
throw e;
}
}
use of io.mola.galimatias.URL in project validator by validator.
the class DataUriEntityResolver method resolveEntity.
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
if (DataUri.startsWithData(systemId)) {
URL url;
try {
url = URL.parse(systemId);
} catch (GalimatiasParseException e) {
IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe);
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
systemId = url.toString();
DataUri du = new DataUri(systemId);
TypedInputSource is = contentTypeParser.buildTypedInputSource(systemId, publicId, du.getContentType());
is.setByteStream(du.getInputStream());
return is;
} else if (delegate != null) {
return delegate.resolveEntity(publicId, systemId);
} else {
throw new IOException("Unsupported URI scheme.");
}
}
use of io.mola.galimatias.URL in project validator by validator.
the class IriRef method checkValid.
@Override
public void checkValid(CharSequence literal) throws DatatypeException {
String messagePrologue = "";
int length = literal.length();
String urlString = literal.toString();
if (reportValue()) {
if (length < ELIDE_LIMIT) {
messagePrologue = "\u201c" + literal + "\u201d: ";
} else {
StringBuilder sb = new StringBuilder(ELIDE_LIMIT + 1);
sb.append(literal, 0, ELIDE_LIMIT / 2);
sb.append('\u2026');
sb.append(literal, length - ELIDE_LIMIT / 2, length);
messagePrologue = "\u201c" + sb.toString() + "\u201d: ";
}
}
if ("".equals(trimHtmlSpaces(urlString))) {
throw newDatatypeException("Must be non-empty.");
}
URL url = null;
URLParsingSettings settings = URLParsingSettings.create().withErrorHandler(StrictErrorHandler.getInstance());
boolean data = false;
try {
CharSequencePair pair = splitScheme(literal);
if (pair == null) {
// no scheme or scheme is private
if (isAbsolute()) {
throw newDatatypeException("The string \u201c" + literal + "\u201d is not an absolute URL.");
} else {
if (mustBeHttpOrHttps()) {
throw newDatatypeException("Must contain only" + " \u201chttp\u201d or \u201chttps\u201d URLs.");
}
// in this case, doc's actual base URL isn't relevant,
// so just use http://example.org/foo/bar as base
url = URL.parse(settings, URL.parse("http://example.org/foo/bar"), urlString);
}
} else {
CharSequence scheme = pair.getHead();
CharSequence tail = pair.getTail();
if (mustBeHttpOrHttps() && !isHttpOrHttps(scheme)) {
throw newDatatypeException("Must contain only" + " \u201chttp\u201d or \u201chttps\u201d URLs.");
}
if (isWellKnown(scheme)) {
url = URL.parse(settings, urlString);
} else if ("javascript".contentEquals(scheme)) {
// Don't bother user with generic IRI syntax
url = null;
} else if ("data".contentEquals(scheme)) {
data = true;
url = URL.parse(settings, urlString);
} else if (isHttpAlias(scheme)) {
StringBuilder sb = new StringBuilder(5 + tail.length());
sb.append("http:").append(tail);
url = URL.parse(settings, sb.toString());
} else {
StringBuilder sb = new StringBuilder(2 + literal.length());
sb.append("x-").append(literal);
url = URL.parse(settings, sb.toString());
}
}
} catch (GalimatiasParseException e) {
throw newDatatypeException(messagePrologue + e.getMessage() + ".");
}
if (url != null) {
if (data) {
try {
DataUri dataUri = new DataUri(url);
InputStream is = dataUri.getInputStream();
while (is.read() >= 0) {
// spin
}
} catch (DataUriException e) {
throw newDatatypeException(e.getIndex(), e.getHead(), e.getLiteral(), e.getTail());
} catch (IOException e) {
String msg = e.getMessage();
if (WARN && "Fragment is not allowed for data: URIs according to RFC 2397.".equals(msg)) {
throw newDatatypeException(messagePrologue + msg, WARN);
} else {
throw newDatatypeException(messagePrologue + msg);
}
}
}
}
}
use of io.mola.galimatias.URL in project validator by validator.
the class BaseUriTracker method push.
private void push(String relative, String language, Direction dir) {
String lang = "";
boolean langSpecified = false;
if (language != null) {
try {
if (!"".equals(language)) {
Language.THE_INSTANCE.checkValid(language);
}
lang = language;
langSpecified = true;
} catch (DatatypeException e) {
}
}
Node curr = peek();
URL base = curr.currentAbsolute;
if (!langSpecified) {
lang = curr.lang;
}
boolean rtl;
switch(dir) {
case RTL:
rtl = true;
break;
case LTR:
rtl = false;
break;
default:
rtl = curr.rtl;
break;
}
if (relative == null) {
stack.addLast(new Node(base, null, lang, langSpecified, rtl));
} else {
URL newBase;
String ascii = null;
try {
if (base != null) {
try {
newBase = base.resolve(relative);
} catch (GalimatiasParseException e) {
newBase = base;
}
} else {
try {
newBase = URL.parse((new URI(ascii)).toString());
} catch (GalimatiasParseException e) {
newBase = null;
}
}
} catch (Exception e) {
newBase = base;
}
stack.addLast(new Node(newBase, ascii, lang, langSpecified, rtl));
}
}
Aggregations