use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.
the class Html5SpecBuilder method parseSpec.
private static Spec parseSpec(InputSource in) throws IOException, SAXException {
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
Html5SpecBuilder handler = new Html5SpecBuilder();
parser.setContentHandler(handler);
parser.parse(in);
return handler.buildSpec();
}
use of nu.validator.htmlparser.sax.HtmlParser in project whole by wholeplatform.
the class HtmlPersistenceKit method doReadModel.
protected IEntity doReadModel(IPersistenceProvider pp) throws Exception {
ModelBuilderOperation op = new ModelBuilderOperation();
SaxConsumerHandler saxHandler = new SaxConsumerHandler(op, false);
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
parser.setContentHandler(saxHandler);
parser.setProperty("http://xml.org/sax/properties/lexical-handler", saxHandler);
parser.parse(new InputSource(pp.getInputStream()));
IEntity xhtmlDocument = NormalizerOperation.normalize(op.wGetResult());
return BehaviorUtils.apply("whole:org.whole.lang.html:HTML5Semantics#toHtml", xhtmlDocument);
}
use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.
the class RegisteredRelValuesBuilder method parseRegistry.
public static void parseRegistry() throws IOException, SAXException {
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
RegisteredRelValuesBuilder handler = new RegisteredRelValuesBuilder();
parser.setContentHandler(handler);
InputSource in = new InputSource(RegisteredRelValuesBuilder.class.getClassLoader().getResourceAsStream("nu/validator/localentities/files/existing-rel-values"));
parser.parse(in);
}
use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.
the class ValidationWorker method setupParser.
private XMLReader setupParser() {
HtmlParser htmlParser = new HtmlParser();
htmlParser.setCommentPolicy(XmlViolationPolicy.ALLOW);
htmlParser.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
htmlParser.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET);
htmlParser.setNamePolicy(XmlViolationPolicy.ALLOW);
htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.ALLOW);
htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
htmlParser.setMappingLangToXmlLang(true);
htmlParser.setHeuristics(Heuristics.ALL);
htmlParser.setDoctypeExpectation(DoctypeExpectation.NO_DOCTYPE_ERRORS);
htmlParser.setCheckingNormalization(true);
htmlParser.setDocumentModeHandler(this);
XMLReader rv = new AttributesPermutingXMLReaderWrapper(htmlParser);
rv.setErrorHandler(this);
return rv;
}
use of nu.validator.htmlparser.sax.HtmlParser in project validator by validator.
the class ParserPerfHarness method main.
/**
* @param args
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException {
boolean html = "h".equals(args[0]);
long duration = Long.parseLong(args[1]) * 60000L;
String path = args[2];
char[] testData = loadFileIntoArray(new File(path));
XmlSerializer ch = new XmlSerializer(new NullWriter());
XMLReader reader = null;
if (html) {
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
parser.setContentHandler(ch);
parser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
reader = parser;
} else {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
reader = factory.newSAXParser().getXMLReader();
reader.setContentHandler(ch);
reader.setEntityResolver(new NullEntityResolver());
}
System.out.println("Warmup:");
System.out.println((new ParserPerfHarness(System.currentTimeMillis() + duration, reader, testData)).runLoop());
System.gc();
System.out.println("Real:");
System.out.println((new ParserPerfHarness(System.currentTimeMillis() + duration, reader, testData)).runLoop());
}
Aggregations