Search in sources :

Example 1 with PrudentHttpEntityResolver

use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.

the class VerifierServletTransaction method validate.

/**
 * @throws SAXException
 */
@SuppressWarnings({ "deprecation", "unchecked" })
void validate() throws SAXException {
    if (!willValidate()) {
        return;
    }
    boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
    if (isHtmlOrXhtml) {
        try {
            out.flush();
        } catch (IOException e1) {
            throw new SAXException(e1);
        }
    }
    httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler, request);
    httpRes.setUserAgent(userAgent);
    dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
    contentTypeParser = new ContentTypeParser(errorHandler, laxType);
    entityResolver = new LocalCacheEntityResolver(dataRes);
    setAllowRnc(true);
    setAllowCss(true);
    try {
        this.errorHandler.start(document);
        PropertyMapBuilder pmb = new PropertyMapBuilder();
        pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
        pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
        pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver));
        pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
        RngProperty.CHECK_ID_IDREF.add(pmb);
        jingPropertyMap = pmb.toPropertyMap();
        tryToSetupValidator();
        setAllowRnc(false);
        loadDocAndSetupParser();
        setErrorProfile();
        contentType = documentInput.getType();
        if ("text/css".equals(contentType)) {
            String charset = "UTF-8";
            if (documentInput.getEncoding() != null) {
                charset = documentInput.getEncoding();
            }
            List<InputStream> streams = new ArrayList<>();
            streams.add(new ByteArrayInputStream(CSS_CHECKING_PROLOG));
            streams.add(documentInput.getByteStream());
            streams.add(new ByteArrayInputStream(CSS_CHECKING_EPILOG));
            Enumeration<InputStream> e = Collections.enumeration(streams);
            documentInput.setByteStream(new SequenceInputStream(e));
            documentInput.setEncoding(charset);
            errorHandler.setLineOffset(-1);
            sourceCode.setIsCss();
            parser = ParserMode.HTML;
            loadDocAndSetupParser();
        }
        reader.setErrorHandler(errorHandler);
        sourceCode.initialize(documentInput);
        if (validator == null) {
            checkNormalization = true;
        }
        if (checkNormalization) {
            reader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
        }
        WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(reader);
        ContentHandler recorder = sourceCode.getLocationRecorder();
        if (baseUriTracker == null) {
            wiretap.setWiretapContentHander(recorder);
        } else {
            wiretap.setWiretapContentHander(new CombineContentHandler(recorder, baseUriTracker));
        }
        wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
        reader = wiretap;
        if (htmlParser != null) {
            htmlParser.addCharacterHandler(sourceCode);
            htmlParser.setMappingLangToXmlLang(true);
            htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
            htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
            errorHandler.setHtml(true);
        } else if (xmlParser != null) {
            // this must be after wiretap!
            if (!filteredNamespaces.isEmpty()) {
                reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces);
            }
            xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
            xmlParser.lockErrorHandler();
        } else {
            throw new RuntimeException("Bug. Unreachable.");
        }
        // make
        reader = new AttributesPermutingXMLReaderWrapper(reader);
        // better
        if (charsetOverride != null) {
            String charset = documentInput.getEncoding();
            if (charset == null) {
                errorHandler.warning(new SAXParseException("Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null));
            } else {
                errorHandler.warning(new SAXParseException("Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null));
            }
            documentInput.setEncoding(charsetOverride);
        }
        if (showOutline) {
            reader = new OutlineBuildingXMLReaderWrapper(reader, request, false);
            reader = new OutlineBuildingXMLReaderWrapper(reader, request, true);
        }
        reader.parse(documentInput);
        if (showOutline) {
            outline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/document-outline");
            headingOutline = (Deque<Section>) request.getAttribute("http://validator.nu/properties/heading-outline");
        }
    } catch (CannotFindPresetSchemaException e) {
    } catch (ResourceNotRetrievableException e) {
        log4j.debug(e.getMessage());
    } catch (NonXmlContentTypeException e) {
        log4j.debug(e.getMessage());
    } catch (FatalSAXException e) {
        log4j.debug(e.getMessage());
    } catch (SocketTimeoutException e) {
        errorHandler.ioError(new IOException(e.getMessage(), null));
    } catch (ConnectTimeoutException e) {
        errorHandler.ioError(new IOException(e.getMessage(), null));
    } catch (TooManyErrorsException e) {
        errorHandler.fatalError(e);
    } catch (SAXException e) {
        String msg = e.getMessage();
        if (!cannotRecover.equals(msg) && !changingEncoding.equals(msg)) {
            log4j.debug("SAXException: " + e.getMessage());
        }
    } catch (IOException e) {
        isHtmlOrXhtml = false;
        if (e.getCause() instanceof org.apache.http.TruncatedChunkException) {
            log4j.debug("TruncatedChunkException", e.getCause());
        } else {
            errorHandler.ioError(e);
        }
    } catch (IncorrectSchemaException e) {
        log4j.debug("IncorrectSchemaException", e);
        errorHandler.schemaError(e);
    } catch (RuntimeException e) {
        isHtmlOrXhtml = false;
        log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
        errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
    } catch (Error e) {
        isHtmlOrXhtml = false;
        log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e);
        errorHandler.internalError(e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
    } finally {
        errorHandler.end(successMessage(), failureMessage(), (String) request.getAttribute("http://validator.nu/properties/document-language"));
        gatherStatistics();
    }
    if (isHtmlOrXhtml) {
        XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter(contentHandler, outline, headingOutline);
        outlineEmitter.emitHeadings();
        outlineEmitter.emit();
        emitDetails();
        StatsEmitter.emit(contentHandler, this);
    }
}
Also used : TooManyErrorsException(nu.validator.messages.TooManyErrorsException) WiretapXMLReaderWrapper(nu.validator.xml.WiretapXMLReaderWrapper) ArrayList(java.util.ArrayList) NonXmlContentTypeException(nu.validator.xml.ContentTypeParser.NonXmlContentTypeException) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) CombineContentHandler(nu.validator.xml.CombineContentHandler) ContentHandler(org.xml.sax.ContentHandler) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) SAXException(org.xml.sax.SAXException) ContentTypeParser(nu.validator.xml.ContentTypeParser) CombineContentHandler(nu.validator.xml.CombineContentHandler) NamespaceDroppingXMLReaderWrapper(nu.validator.xml.NamespaceDroppingXMLReaderWrapper) SAXParseException(org.xml.sax.SAXParseException) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) ResourceNotRetrievableException(nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException) PropertyMapBuilder(com.thaiopensource.util.PropertyMapBuilder) DataUriEntityResolver(nu.validator.xml.DataUriEntityResolver) BoundedInputStream(nu.validator.io.BoundedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) SequenceInputStream(java.io.SequenceInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) AttributesPermutingXMLReaderWrapper(nu.validator.xml.AttributesPermutingXMLReaderWrapper) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException) IOException(java.io.IOException) Section(nu.validator.servlet.OutlineBuildingXMLReaderWrapper.Section) LocalCacheEntityResolver(nu.validator.localentities.LocalCacheEntityResolver) SocketTimeoutException(java.net.SocketTimeoutException) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException)

Example 2 with PrudentHttpEntityResolver

use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.

the class Downloader method run.

public void run() {
    String inLine = null;
    for (; ; ) {
        try {
            while ((inLine = in.readLine()) != null) {
                String md5;
                String url;
                int index = inLine.indexOf('\t');
                md5 = inLine.substring(0, index);
                url = inLine.substring(index + 1, inLine.length());
                InputSource is;
                PrudentHttpEntityResolver resolver;
                resolver = new PrudentHttpEntityResolver(1024 * 1024, false, null);
                resolver.setAcceptAllKnownXmlTypes(false);
                resolver.setAllowGenericXml(false);
                resolver.setAllowRnc(false);
                resolver.setAllowXhtml(false);
                resolver.setAllowHtml(true);
                try {
                    is = resolver.resolveEntity(null, url);
                } catch (Exception e) {
                    continue;
                }
                String charset = is.getEncoding();
                if (charset == null || charset.indexOf('\t') != -1) {
                    charset = "null";
                }
                File top = new File(rootDir, md5.substring(0, 2));
                synchronized (rootDir) {
                    top.mkdir();
                }
                File second = new File(top, md5.substring(2, 4));
                synchronized (rootDir) {
                    second.mkdir();
                }
                File outFile = new File(second, md5 + ".gz");
                InputStream inStream = is.getByteStream();
                try {
                    OutputStream outStream = new GZIPOutputStream(new FileOutputStream(outFile));
                    IO.copy(inStream, outStream);
                    outStream.flush();
                    outStream.close();
                } catch (Exception e) {
                    outFile.delete();
                    continue;
                } finally {
                    inStream.close();
                }
                out.println(md5 + '\t' + url + '\t' + charset);
            }
            return;
        } catch (Exception e) {
        }
    }
}
Also used : InputSource(org.xml.sax.InputSource) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) UnsupportedEncodingException(java.io.UnsupportedEncodingException)

Example 3 with PrudentHttpEntityResolver

use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.

the class ParseTreePrinter method service.

public void service() throws IOException {
    request.setCharacterEncoding("utf-8");
    String content = null;
    String document = scrubUrl(request.getParameter("doc"));
    document = ("".equals(document)) ? null : document;
    try (Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8")) {
        if (document == null && methodIsGet() && (content = request.getParameter("content")) == null) {
            response.setContentType("text/html; charset=utf-8");
            writer.write(FORM_HTML);
            writer.flush();
            return;
        }
        response.setContentType("text/plain; charset=utf-8");
        try {
            PrudentHttpEntityResolver entityResolver = new PrudentHttpEntityResolver(2048 * 1024, false, null);
            entityResolver.setAllowGenericXml(false);
            entityResolver.setAcceptAllKnownXmlTypes(false);
            entityResolver.setAllowHtml(true);
            entityResolver.setAllowXhtml(true);
            TypedInputSource documentInput;
            if (methodIsGet()) {
                if (content == null) {
                    documentInput = (TypedInputSource) entityResolver.resolveEntity(null, document);
                } else {
                    documentInput = new TypedInputSource(new StringReader(content));
                    if ("xml".equals(request.getParameter("parser"))) {
                        documentInput.setType("application/xhtml+xml");
                    } else {
                        documentInput.setType("text/html");
                    }
                }
            } else {
                // POST
                String postContentType = request.getContentType();
                if (postContentType == null) {
                    response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
                    return;
                } else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
                    response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
                    return;
                }
                long len = request.getContentLength();
                if (len > SIZE_LIMIT) {
                    throw new StreamBoundException("Resource size exceeds limit.");
                }
                ContentTypeParser contentTypeParser = new ContentTypeParser(null, false);
                contentTypeParser.setAllowGenericXml(false);
                contentTypeParser.setAcceptAllKnownXmlTypes(false);
                contentTypeParser.setAllowHtml(true);
                contentTypeParser.setAllowXhtml(true);
                documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType);
                documentInput.setByteStream(len < 0 ? new BoundedInputStream(request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream());
                documentInput.setSystemId(request.getHeader("Content-Location"));
            }
            String type = documentInput.getType();
            XMLReader parser;
            if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
                writer.write("HTML parser\n\n#document\n");
                parser = new nu.validator.htmlparser.sax.HtmlParser();
                parser.setProperty("http://validator.nu/properties/heuristics", Heuristics.ALL);
                parser.setProperty("http://validator.nu/properties/xml-policy", XmlViolationPolicy.ALLOW);
            } else if ("application/xhtml+xml".equals(type)) {
                writer.write("XML parser\n\n#document\n");
                parser = new SAXDriver();
                parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
                parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
                parser.setEntityResolver(new NullEntityResolver());
            } else {
                writer.write("Unsupported content type.\n");
                writer.flush();
                return;
            }
            TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
            ListErrorHandler listErrorHandler = new ListErrorHandler();
            parser.setContentHandler(treeDumpContentHandler);
            parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
            parser.setErrorHandler(listErrorHandler);
            parser.parse(documentInput);
            writer.write("#errors\n");
            for (String err : listErrorHandler.getErrors()) {
                writer.write(err);
                writer.write('\n');
            }
        } catch (SAXException e) {
            writer.write("SAXException:\n");
            writer.write(e.getMessage());
            writer.write("\n");
        } catch (IOException e) {
            writer.write("IOException:\n");
            writer.write(e.getMessage());
            writer.write("\n");
        } finally {
            writer.flush();
        }
    }
}
Also used : NullEntityResolver(nu.validator.xml.NullEntityResolver) TypedInputSource(nu.validator.xml.TypedInputSource) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) IOException(java.io.IOException) StreamBoundException(nu.validator.io.StreamBoundException) ContentTypeParser(nu.validator.xml.ContentTypeParser) SAXException(org.xml.sax.SAXException) SAXDriver(nu.validator.gnu.xml.aelfred2.SAXDriver) BoundedInputStream(nu.validator.io.BoundedInputStream) StringReader(java.io.StringReader) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) XMLReader(org.xml.sax.XMLReader)

Example 4 with PrudentHttpEntityResolver

use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.

the class VerifierServletTransaction method service.

void service() throws ServletException, IOException {
    this.methodIsGet = "GET".equals(request.getMethod()) || "HEAD".equals(request.getMethod());
    this.out = response.getOutputStream();
    try {
        request.setCharacterEncoding("utf-8");
    } catch (NoSuchMethodError e) {
        log4j.debug("Vintage Servlet API doesn't support setCharacterEncoding().", e);
    }
    if (!methodIsGet) {
        postContentType = request.getContentType();
        if (postContentType == null) {
            response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing");
            return;
        } else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
            response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
            return;
        }
    }
    String outFormat = request.getParameter("out");
    if (outFormat == null) {
        outputFormat = OutputFormat.HTML;
    } else {
        if ("html".equals(outFormat)) {
            outputFormat = OutputFormat.HTML;
        } else if ("xhtml".equals(outFormat)) {
            outputFormat = OutputFormat.XHTML;
        } else if ("text".equals(outFormat)) {
            outputFormat = OutputFormat.TEXT;
        } else if ("gnu".equals(outFormat)) {
            outputFormat = OutputFormat.GNU;
        } else if ("xml".equals(outFormat)) {
            outputFormat = OutputFormat.XML;
        } else if ("json".equals(outFormat)) {
            outputFormat = OutputFormat.JSON;
        } else {
            response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Unsupported output format");
            return;
        }
    }
    if (!methodIsGet) {
        document = request.getHeader("Content-Location");
    }
    if (document == null) {
        document = request.getParameter("doc");
    }
    if (document == null) {
        document = request.getParameter("file");
    }
    document = ("".equals(document)) ? null : document;
    if (document != null) {
        for (String domain : DENY_LIST) {
            if (!"".equals(domain) && document.contains(domain)) {
                response.sendError(429, "Too many requests");
                return;
            }
        }
    }
    String callback = null;
    if (outputFormat == OutputFormat.JSON) {
        callback = request.getParameter("callback");
        if (callback != null) {
            Matcher m = JS_IDENTIFIER.matcher(callback);
            if (m.matches()) {
                if (Arrays.binarySearch(JS_RESERVED_WORDS, callback) >= 0) {
                    response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Callback is a reserved word.");
                    return;
                }
            } else {
                response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Callback is not a valid ECMA 262 IdentifierName.");
                return;
            }
        }
    }
    if (willValidate()) {
        response.setDateHeader("Expires", 0);
        response.setHeader("Cache-Control", "no-cache");
    } else if (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML) {
        response.setDateHeader("Last-Modified", lastModified);
    } else {
        response.sendError(HttpServletResponse.SC_BAD_REQUEST, "No input document");
        return;
    }
    setup();
    String filterString = systemFilterString;
    String filterPatternParam = request.getParameter("filterpattern");
    if (filterPatternParam != null && !"".equals(filterPatternParam)) {
        if ("".equals(filterString)) {
            filterString = scrub(filterPatternParam);
        } else {
            filterString += "|" + scrub(filterPatternParam);
        }
    }
    String filterUrl = request.getParameter("filterurl");
    if (filterUrl != null && !"".equals(filterUrl)) {
        try {
            // 
            InputSource filterFile = // 
            (new PrudentHttpEntityResolver(-1, true, null)).resolveEntity(null, filterUrl);
            StringBuilder sb = new StringBuilder();
            // 
            BufferedReader reader = new BufferedReader(new InputStreamReader(filterFile.getByteStream()));
            String line;
            String pipe = "";
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("#")) {
                    continue;
                }
                sb.append(pipe);
                sb.append(line);
                pipe = "|";
            }
            if (sb.length() != 0) {
                if (!"".equals(filterString)) {
                    filterString = scrub(sb.toString());
                } else {
                    filterString += "|" + scrub(sb.toString());
                }
            }
        } catch (Exception e) {
            response.sendError(500, e.getMessage());
        }
    }
    Pattern filterPattern = null;
    if (!"".equals(filterString)) {
        filterPattern = Pattern.compile(filterString);
    }
    if (request.getParameter("useragent") != null) {
        userAgent = scrub(request.getParameter("useragent"));
    } else {
        userAgent = USER_AGENT;
    }
    if (request.getParameter("acceptlanguage") != null) {
        request.setAttribute("http://validator.nu/properties/accept-language", scrub(request.getParameter("acceptlanguage")));
    }
    Object inputType = request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.type");
    showSource = (request.getParameter("showsource") != null);
    showSource = (showSource || "textarea".equals(inputType));
    showOutline = (request.getParameter("showoutline") != null);
    if (request.getParameter("checkerrorpages") != null) {
        request.setAttribute("http://validator.nu/properties/ignore-response-status", true);
    }
    if (request.getParameter("showimagereport") != null) {
        imageCollector = new ImageCollector(sourceCode);
    }
    String charset = request.getParameter("charset");
    if (charset != null) {
        charset = scrub(charset.trim());
        if (!"".equals(charset)) {
            charsetOverride = charset;
        }
    }
    String nsfilter = request.getParameter("nsfilter");
    if (nsfilter != null) {
        for (String ns : SPACE.split(nsfilter)) {
            if (ns.length() > 0) {
                filteredNamespaces.add(ns);
            }
        }
    }
    boolean errorsOnly = ("error".equals(request.getParameter("level")));
    boolean asciiQuotes = (request.getParameter("asciiquotes") != null);
    int lineOffset = 0;
    String lineOffsetStr = request.getParameter("lineoffset");
    if (lineOffsetStr != null) {
        try {
            lineOffset = Integer.parseInt(lineOffsetStr);
        } catch (NumberFormatException e) {
        }
    }
    try {
        if (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML) {
            if (outputFormat == OutputFormat.HTML) {
                response.setContentType("text/html; charset=utf-8");
                contentHandler = new HtmlSerializer(out);
            } else {
                response.setContentType("application/xhtml+xml");
                contentHandler = new XmlSerializer(out);
            }
            emitter = new XhtmlSaxEmitter(contentHandler);
            errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, imageCollector, lineOffset, false, new XhtmlMessageEmitter(contentHandler));
            PageEmitter.emit(contentHandler, this);
        } else {
            if (outputFormat == OutputFormat.TEXT) {
                response.setContentType("text/plain; charset=utf-8");
                errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new TextMessageEmitter(out, asciiQuotes));
            } else if (outputFormat == OutputFormat.GNU) {
                response.setContentType("text/plain; charset=utf-8");
                errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new GnuMessageEmitter(out, asciiQuotes));
            } else if (outputFormat == OutputFormat.XML) {
                response.setContentType("application/xml");
                errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new XmlMessageEmitter(new XmlSerializer(out)));
            } else if (outputFormat == OutputFormat.JSON) {
                if (callback == null) {
                    response.setContentType("application/json; charset=utf-8");
                } else {
                    response.setContentType("application/javascript; charset=utf-8");
                }
                errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode, showSource, null, lineOffset, false, new JsonMessageEmitter(new nu.validator.json.Serializer(out), callback));
            } else {
                throw new RuntimeException("Unreachable.");
            }
            errorHandler.setErrorsOnly(errorsOnly);
            validate();
        }
    } catch (SAXException e) {
        log4j.debug("SAXException: " + e.getMessage());
    }
}
Also used : Matcher(java.util.regex.Matcher) MessageEmitterAdapter(nu.validator.messages.MessageEmitterAdapter) PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) TextMessageEmitter(nu.validator.messages.TextMessageEmitter) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) SAXException(org.xml.sax.SAXException) GnuMessageEmitter(nu.validator.messages.GnuMessageEmitter) ImageCollector(nu.validator.servlet.imagereview.ImageCollector) XhtmlSaxEmitter(nu.validator.xml.XhtmlSaxEmitter) XmlSerializer(nu.validator.htmlparser.sax.XmlSerializer) HtmlSerializer(nu.validator.htmlparser.sax.HtmlSerializer) XhtmlMessageEmitter(nu.validator.messages.XhtmlMessageEmitter) Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) StreamBoundException(nu.validator.io.StreamBoundException) TooManyErrorsException(nu.validator.messages.TooManyErrorsException) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException) SocketTimeoutException(java.net.SocketTimeoutException) NonXmlContentTypeException(nu.validator.xml.ContentTypeParser.NonXmlContentTypeException) IOException(java.io.IOException) FatalSAXException(nu.validator.gnu.xml.aelfred2.FatalSAXException) SAXNotSupportedException(org.xml.sax.SAXNotSupportedException) ServletException(javax.servlet.ServletException) SAXException(org.xml.sax.SAXException) SAXNotRecognizedException(org.xml.sax.SAXNotRecognizedException) IncorrectSchemaException(com.thaiopensource.validate.IncorrectSchemaException) ResourceNotRetrievableException(nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException) SAXParseException(org.xml.sax.SAXParseException) HtmlSerializer(nu.validator.htmlparser.sax.HtmlSerializer) BufferedReader(java.io.BufferedReader) XmlMessageEmitter(nu.validator.messages.XmlMessageEmitter) JsonMessageEmitter(nu.validator.messages.JsonMessageEmitter) XmlSerializer(nu.validator.htmlparser.sax.XmlSerializer)

Example 5 with PrudentHttpEntityResolver

use of nu.validator.xml.PrudentHttpEntityResolver in project validator by validator.

the class SimpleDocumentValidator method checkHttpURL.

/* *
     * Checks a Web document.
     * 
     * @throws IOException if loading of the URL fails for some reason
     */
public void checkHttpURL(String document, String userAgent, ErrorHandler errorHandler) throws IOException, SAXException {
    CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
    validator.reset();
    httpRes = new PrudentHttpEntityResolver(-1, true, errorHandler);
    if (this.allowCss) {
        httpRes.setAllowCss(true);
    }
    httpRes.setAllowHtml(true);
    httpRes.setUserAgent(userAgent);
    try {
        documentInput = (TypedInputSource) httpRes.resolveEntity(null, document);
        String contentType = documentInput.getType();
        documentInput.setSystemId(document);
        for (String param : contentType.replace(" ", "").split(";")) {
            if (param.startsWith("charset=")) {
                documentInput.setEncoding(param.split("=", 2)[1]);
                break;
            }
        }
        if (documentInput.getType().startsWith("text/css")) {
            checkAsCss(documentInput);
        } else if (documentInput.getType().startsWith("text/html")) {
            checkAsHTML(documentInput);
        } else {
            checkAsXML(documentInput);
        }
    } catch (ResourceNotRetrievableException e) {
    }
}
Also used : PrudentHttpEntityResolver(nu.validator.xml.PrudentHttpEntityResolver) ResourceNotRetrievableException(nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException)

Aggregations

PrudentHttpEntityResolver (nu.validator.xml.PrudentHttpEntityResolver)5 IOException (java.io.IOException)3 ResourceNotRetrievableException (nu.validator.xml.PrudentHttpEntityResolver.ResourceNotRetrievableException)3 SAXException (org.xml.sax.SAXException)3 IncorrectSchemaException (com.thaiopensource.validate.IncorrectSchemaException)2 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 SocketTimeoutException (java.net.SocketTimeoutException)2 FatalSAXException (nu.validator.gnu.xml.aelfred2.FatalSAXException)2 BoundedInputStream (nu.validator.io.BoundedInputStream)2 StreamBoundException (nu.validator.io.StreamBoundException)2 TooManyErrorsException (nu.validator.messages.TooManyErrorsException)2 ContentTypeParser (nu.validator.xml.ContentTypeParser)2 NonXmlContentTypeException (nu.validator.xml.ContentTypeParser.NonXmlContentTypeException)2 ConnectTimeoutException (org.apache.http.conn.ConnectTimeoutException)2 SAXParseException (org.xml.sax.SAXParseException)2 PropertyMapBuilder (com.thaiopensource.util.PropertyMapBuilder)1 BufferedReader (java.io.BufferedReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1