Search in sources :

Example 6 with HtmlParser

use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.

the class ParserPerfHarnessNew method main.

/**
 * @param args
 * @throws IOException
 * @throws SAXException
 * @throws ParserConfigurationException
 */
public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException {
    long duration = Long.parseLong(args[1]) * 60000L;
    String path = args[2];
    char[] testData = loadFileIntoArray(new File(path));
    XmlSerializer ch = new XmlSerializer(new NullWriter());
    XMLReader reader = null;
    if ("h".equals(args[0])) {
        HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
        parser.setContentHandler(ch);
        parser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
        reader = parser;
    } else if ("t".equals(args[0])) {
        Driver driver = new Driver(new TokensToSax(ch));
        driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
        driver.setContentSpacePolicy(XmlViolationPolicy.ALLOW);
        driver.setNamePolicy(XmlViolationPolicy.ALLOW);
        driver.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
        reader = new DriverWrapper(driver);
    } else if ("a".equals(args[0])) {
        reader = new SAXDriver();
        reader.setFeature("http://xml.org/sax/features/namespaces", true);
        reader.setFeature("http://xml.org/sax/features/validation", false);
        reader.setFeature("http://xml.org/sax/features/string-interning", true);
        reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
        reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        reader.setContentHandler(ch);
        reader.setEntityResolver(new NullEntityResolver());
    } else if ("n".equals(args[0])) {
        System.out.println(Version.getVersion());
        reader = new SAXParser();
        reader.setFeature("http://xml.org/sax/features/namespaces", false);
        reader.setFeature("http://xml.org/sax/features/validation", false);
        reader.setFeature("http://xml.org/sax/features/string-interning", true);
        reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
        reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        reader.setContentHandler(ch);
        reader.setEntityResolver(new NullEntityResolver());
    } else {
        System.out.println(Version.getVersion());
        reader = new SAXParser();
        reader.setFeature("http://xml.org/sax/features/namespaces", true);
        reader.setFeature("http://xml.org/sax/features/validation", false);
        reader.setFeature("http://xml.org/sax/features/string-interning", true);
        reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
        reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        reader.setContentHandler(ch);
        reader.setEntityResolver(new NullEntityResolver());
    }
    System.out.println("Warmup:");
    System.out.println((new ParserPerfHarnessNew(System.currentTimeMillis() + duration, reader, testData)).runLoop());
    System.gc();
    System.out.println("Real:");
    System.out.println((new ParserPerfHarnessNew(System.currentTimeMillis() + duration, reader, testData)).runLoop());
}
Also used : NullEntityResolver(nu.validator.xml.NullEntityResolver) SAXDriver(nu.validator.gnu.xml.aelfred2.SAXDriver) Driver(nu.validator.htmlparser.io.Driver) SAXDriver(nu.validator.gnu.xml.aelfred2.SAXDriver) HtmlParser(nu.validator.htmlparser.sax.HtmlParser) SAXParser(org.apache.xerces.parsers.SAXParser) File(java.io.File) XMLReader(org.xml.sax.XMLReader) XmlSerializer(nu.validator.htmlparser.sax.XmlSerializer)

Example 7 with HtmlParser

use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.

the class VerifierServletTransaction method newHtmlParser.

/**
 */
protected void newHtmlParser() {
    htmlParser = new HtmlParser();
    htmlParser.setCommentPolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET);
    htmlParser.setNamePolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
    htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
    htmlParser.setMappingLangToXmlLang(true);
    htmlParser.setHeuristics(Heuristics.ALL);
}
Also used : HtmlParser(nu.validator.htmlparser.sax.HtmlParser)

Example 8 with HtmlParser

use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.

the class Html5AttributeDatatypeBuilder method parseSyntaxDescriptions.

public static Map<Class, DocumentFragment> parseSyntaxDescriptions() throws IOException, SAXException {
    HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
    Html5AttributeDatatypeBuilder handler = new Html5AttributeDatatypeBuilder();
    parser.setContentHandler(handler);
    InputSource in = new InputSource(Html5AttributeDatatypeBuilder.class.getClassLoader().getResourceAsStream("nu/validator/localentities/files/syntax-descriptions"));
    parser.parse(in);
    return handler.getAdvice();
}
Also used : HtmlParser(nu.validator.htmlparser.sax.HtmlParser) InputSource(org.xml.sax.InputSource)

Example 9 with HtmlParser

use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.

the class ImageReportAdviceBuilder method parseAltAdvice.

public static List<DocumentFragment> parseAltAdvice() throws IOException, SAXException {
    HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
    ImageReportAdviceBuilder handler = new ImageReportAdviceBuilder();
    parser.setContentHandler(handler);
    InputSource in = new InputSource(ImageReportAdviceBuilder.class.getClassLoader().getResourceAsStream("nu/validator/localentities/files/vnu-alt-advice"));
    parser.parse(in);
    return handler.getFragments();
}
Also used : HtmlParser(nu.validator.htmlparser.sax.HtmlParser) InputSource(org.xml.sax.InputSource)

Example 10 with HtmlParser

use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.

the class SimpleDocumentValidator method setUpValidatorAndParsers.

/* *
     * Prepares a Validator instance along with HTML and XML parsers, and then
     * attaches the Validator instance and supplied ErrorHandler instance to the
     * parsers so that the ErrorHandler is used for processing of all document-
     * validation problems reported.
     * 
     * @param docValidationErrHandler error handler for doc-validation reporting
     * 
     * @param loadExternalEnts whether XML parser should load remote DTDs, etc.
     * 
     * @param noStream whether HTML parser should buffer instead of streaming
     */
public void setUpValidatorAndParsers(ErrorHandler docValidationErrHandler, boolean noStream, boolean loadExternalEnts) throws SAXException {
    PropertyMapBuilder pmb = new PropertyMapBuilder();
    pmb.put(ValidateProperty.ERROR_HANDLER, docValidationErrHandler);
    pmb.put(ValidateProperty.XML_READER_CREATOR, new Jaxp11XMLReaderCreator());
    RngProperty.CHECK_ID_IDREF.add(pmb);
    PropertyMap jingPropertyMap = pmb.toPropertyMap();
    validator = this.mainSchema.createValidator(jingPropertyMap);
    if (this.hasHtml5Schema) {
        Validator assertionValidator = assertionSchema.createValidator(jingPropertyMap);
        validator = new CombineValidator(validator, assertionValidator);
        validator = new CombineValidator(validator, new CheckerValidator(new TableChecker(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new ConformingButObsoleteWarner(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new MicrodataChecker(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new NormalizationChecker(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new TextContentChecker(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new UncheckedSubtreeWarner(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new UnsupportedFeatureChecker(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new UsemapChecker(), jingPropertyMap));
        validator = new CombineValidator(validator, new CheckerValidator(new XmlPiChecker(), jingPropertyMap));
    }
    HtmlParser htmlParser = new HtmlParser();
    htmlParser.addCharacterHandler(sourceCode);
    htmlParser.setCommentPolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET);
    htmlParser.setNamePolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
    htmlParser.setMappingLangToXmlLang(true);
    htmlParser.setHeuristics(Heuristics.ALL);
    htmlParser.setContentHandler(validator.getContentHandler());
    htmlParser.setErrorHandler(docValidationErrHandler);
    htmlParser.setNamePolicy(XmlViolationPolicy.ALLOW);
    htmlParser.setMappingLangToXmlLang(true);
    htmlParser.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
    if (!noStream) {
        htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
    }
    htmlReader = getWiretap(htmlParser);
    xmlParser = new SAXDriver();
    xmlParser.setContentHandler(validator.getContentHandler());
    if (lexicalHandler != null) {
        xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler);
    }
    xmlReader = new IdFilter(xmlParser);
    xmlReader.setFeature("http://xml.org/sax/features/string-interning", true);
    xmlReader.setContentHandler(validator.getContentHandler());
    xmlReader.setFeature("http://xml.org/sax/features/unicode-normalization-checking", true);
    if (loadExternalEnts) {
        xmlReader.setEntityResolver(entityResolver);
    } else {
        xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
        xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        xmlReader.setEntityResolver(new NullEntityResolver());
    }
    xmlReader = getWiretap(xmlParser);
    xmlParser.setErrorHandler(docValidationErrHandler);
    xmlParser.lockErrorHandler();
}
Also used : IdFilter(nu.validator.xml.IdFilter) NullEntityResolver(nu.validator.xml.NullEntityResolver) Jaxp11XMLReaderCreator(com.thaiopensource.xml.sax.Jaxp11XMLReaderCreator) ConformingButObsoleteWarner(nu.validator.checker.ConformingButObsoleteWarner) XmlPiChecker(nu.validator.checker.XmlPiChecker) TextContentChecker(nu.validator.checker.TextContentChecker) CheckerValidator(nu.validator.checker.jing.CheckerValidator) SAXDriver(nu.validator.gnu.xml.aelfred2.SAXDriver) UnsupportedFeatureChecker(nu.validator.checker.UnsupportedFeatureChecker) HtmlParser(nu.validator.htmlparser.sax.HtmlParser) PropertyMap(com.thaiopensource.util.PropertyMap) NormalizationChecker(nu.validator.checker.NormalizationChecker) UsemapChecker(nu.validator.checker.UsemapChecker) PropertyMapBuilder(com.thaiopensource.util.PropertyMapBuilder) TableChecker(nu.validator.checker.table.TableChecker) UncheckedSubtreeWarner(nu.validator.checker.UncheckedSubtreeWarner) CombineValidator(com.thaiopensource.relaxng.impl.CombineValidator) CheckerValidator(nu.validator.checker.jing.CheckerValidator) Validator(com.thaiopensource.validate.Validator) CombineValidator(com.thaiopensource.relaxng.impl.CombineValidator) MicrodataChecker(nu.validator.checker.MicrodataChecker)

Aggregations

HtmlParser (nu.validator.htmlparser.sax.HtmlParser)10 InputSource (org.xml.sax.InputSource)4 NullEntityResolver (nu.validator.xml.NullEntityResolver)3 XMLReader (org.xml.sax.XMLReader)3 File (java.io.File)2 SAXDriver (nu.validator.gnu.xml.aelfred2.SAXDriver)2 XmlSerializer (nu.validator.htmlparser.sax.XmlSerializer)2 CombineValidator (com.thaiopensource.relaxng.impl.CombineValidator)1 PropertyMap (com.thaiopensource.util.PropertyMap)1 PropertyMapBuilder (com.thaiopensource.util.PropertyMapBuilder)1 Validator (com.thaiopensource.validate.Validator)1 Jaxp11XMLReaderCreator (com.thaiopensource.xml.sax.Jaxp11XMLReaderCreator)1 SAXParserFactory (javax.xml.parsers.SAXParserFactory)1 ConformingButObsoleteWarner (nu.validator.checker.ConformingButObsoleteWarner)1 MicrodataChecker (nu.validator.checker.MicrodataChecker)1 NormalizationChecker (nu.validator.checker.NormalizationChecker)1 TextContentChecker (nu.validator.checker.TextContentChecker)1 UncheckedSubtreeWarner (nu.validator.checker.UncheckedSubtreeWarner)1 UnsupportedFeatureChecker (nu.validator.checker.UnsupportedFeatureChecker)1 UsemapChecker (nu.validator.checker.UsemapChecker)1