use of org.xml.sax.helpers.DefaultHandler in project robovm by robovm.
the class SaxTest method testYesPrefixesYesNamespaces.
/**
* Android's Expat-based SAX parser fails this test because Expat doesn't
* supply us with our much desired {@code xmlns="http://..."} attributes.
*/
public void testYesPrefixesYesNamespaces() throws Exception {
parse(true, true, "<foo bar=\"baz\"/>", new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
assertEquals("", uri);
assertEquals("foo", localName);
assertEquals("foo", qName);
assertEquals(1, attributes.getLength());
assertEquals("", attributes.getURI(0));
assertEquals("bar", attributes.getLocalName(0));
assertEquals("bar", attributes.getQName(0));
}
});
parse(true, true, "<a:foo a:bar=\"baz\" xmlns:a=\"http://quux\"/>", new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
assertEquals("http://quux", uri);
assertEquals("foo", localName);
assertEquals("a:foo", qName);
assertEquals(2, attributes.getLength());
assertEquals("http://quux", attributes.getURI(0));
assertEquals("bar", attributes.getLocalName(0));
assertEquals("a:bar", attributes.getQName(0));
assertEquals("", attributes.getURI(1));
assertEquals("", attributes.getLocalName(1));
assertEquals("xmlns:a", attributes.getQName(1));
}
});
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class TikaTest method getRecursiveMetadata.
protected List<Metadata> getRecursiveMetadata(String filePath, ParseContext context) throws Exception {
Parser p = new AutoDetectParser();
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p, new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
try (InputStream is = getResourceAsStream("/test-documents/" + filePath)) {
wrapper.parse(is, new DefaultHandler(), new Metadata(), context);
}
return wrapper.getMetadata();
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class TestParsers method testEXCELExtraction.
@Test
public void testEXCELExtraction() throws Exception {
final String expected = "Numbers and their Squares";
File file = getResourceAsFile("/test-documents/testEXCEL.xls");
String s1 = tika.parseToString(file);
assertTrue("Text does not contain '" + expected + "'", s1.contains(expected));
Parser parser = tika.getParser();
Metadata metadata = new Metadata();
try (InputStream stream = new FileInputStream(file)) {
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class TestParsers method testWORDxtraction.
@Test
public void testWORDxtraction() throws Exception {
File file = getResourceAsFile("/test-documents/testWORD.doc");
Parser parser = tika.getParser();
Metadata metadata = new Metadata();
try (InputStream stream = new FileInputStream(file)) {
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
}
assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class HtmlParserTest method assertRelativeLink.
private void assertRelativeLink(String url, String base, String relative) throws Exception {
String test = "<html><head><base href=\"" + base + "\"></head>" + "<body><a href=\"" + relative + "\">test</a></body></html>";
final List<String> links = new ArrayList<String>();
new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(UTF_8)), new DefaultHandler() {
@Override
public void startElement(String u, String l, String name, Attributes atts) {
if (name.equals("a") && atts.getValue("", "href") != null) {
links.add(atts.getValue("", "href"));
}
}
}, new Metadata(), new ParseContext());
assertEquals(1, links.size());
assertEquals(url, links.get(0));
}
Aggregations