use of org.xml.sax.Attributes in project tika by apache.
the class HtmlParserTest method testParseAscii.
@Test
public void testParseAscii() throws Exception {
String path = "/test-documents/testHTML.html";
final StringWriter href = new StringWriter();
final StringWriter name = new StringWriter();
ContentHandler body = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = HtmlParserTest.class.getResourceAsStream(path)) {
ContentHandler link = new DefaultHandler() {
@Override
public void startElement(String u, String l, String n, Attributes a) throws SAXException {
if ("a".equals(l)) {
if (a.getValue("href") != null) {
href.append(a.getValue("href"));
} else if (a.getValue("name") != null) {
name.append(a.getValue("name"));
}
}
}
};
new HtmlParser().parse(stream, new TeeContentHandler(body, link), metadata, new ParseContext());
}
assertEquals("Title : Test Indexation Html", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Tika Developers", metadata.get("Author"));
assertEquals("5", metadata.get("refresh"));
assertEquals("51.2312", metadata.get(Geographic.LATITUDE));
assertEquals("-5.1987", metadata.get(Geographic.LONGITUDE));
assertEquals("http://www.apache.org/", href.toString());
assertEquals("test-anchor", name.toString());
String content = body.toString();
assertTrue("Did not contain expected text:" + "Test Indexation Html", content.contains("Test Indexation Html"));
assertTrue("Did not contain expected text:" + "Indexation du fichier", content.contains("Indexation du fichier"));
}
use of org.xml.sax.Attributes in project tika by apache.
the class HtmlParserTest method assertScriptLink.
private void assertScriptLink(String html, String url) throws Exception {
// IdentityHtmlMapper is needed to extract <script> tags
ParseContext context = new ParseContext();
context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "text/html");
final List<String> links = new ArrayList<String>();
new HtmlParser().parse(new ByteArrayInputStream(html.getBytes(UTF_8)), new DefaultHandler() {
@Override
public void startElement(String u, String l, String name, Attributes atts) {
if (name.equals("script") && atts.getValue("", "src") != null) {
links.add(atts.getValue("", "src"));
}
}
}, metadata, context);
assertEquals(1, links.size());
assertEquals(url, links.get(0));
}
use of org.xml.sax.Attributes in project Lucee by lucee.
the class aprint method _eo.
private static void _eo(PrintStream ps, Object o) {
if (o instanceof Enumeration)
_eo(ps, (Enumeration) o);
else if (o instanceof Object[])
_eo(ps, (Object[]) o);
else if (o instanceof boolean[])
_eo(ps, (boolean[]) o);
else if (o instanceof byte[])
_eo(ps, (byte[]) o);
else if (o instanceof int[])
_eo(ps, (int[]) o);
else if (o instanceof float[])
_eo(ps, (float[]) o);
else if (o instanceof long[])
_eo(ps, (long[]) o);
else if (o instanceof double[])
_eo(ps, (double[]) o);
else if (o instanceof char[])
_eo(ps, (char[]) o);
else if (o instanceof short[])
_eo(ps, (short[]) o);
else if (o instanceof Set)
_eo(ps, (Set) o);
else if (o instanceof List)
_eo(ps, (List) o);
else if (o instanceof Map)
_eo(ps, (Map) o);
else if (o instanceof Collection)
_eo(ps, (Collection) o);
else if (o instanceof Iterator)
_eo(ps, (Iterator) o);
else if (o instanceof NamedNodeMap)
_eo(ps, (NamedNodeMap) o);
else if (o instanceof ResultSet)
_eo(ps, (ResultSet) o);
else if (o instanceof Node)
_eo(ps, (Node) o);
else if (o instanceof Throwable)
_eo(ps, (Throwable) o);
else if (o instanceof Attributes)
_eo(ps, (Attributes) o);
else if (o instanceof Cookie) {
Cookie c = (Cookie) o;
ps.println("Cookie(name:" + c.getName() + ";domain:" + c.getDomain() + ";maxage:" + c.getMaxAge() + ";path:" + c.getPath() + ";value:" + c.getValue() + ";version:" + c.getVersion() + ";secure:" + c.getSecure() + ")");
} else if (o instanceof InputSource) {
InputSource is = (InputSource) o;
Reader r = is.getCharacterStream();
try {
ps.println(IOUtil.toString(is.getCharacterStream()));
} catch (IOException e) {
} finally {
IOUtil.closeEL(r);
}
} else
ps.println(o);
}
use of org.xml.sax.Attributes in project spring-security-oauth by spring-projects.
the class SparklrServiceImpl method getSparklrPhotoIds.
public List<String> getSparklrPhotoIds() throws SparklrException {
try {
InputStream photosXML = new ByteArrayInputStream(sparklrRestTemplate.getForObject(URI.create(sparklrPhotoListURL), byte[].class));
final List<String> photoIds = new ArrayList<String>();
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
parserFactory.setValidating(false);
parserFactory.setXIncludeAware(false);
parserFactory.setNamespaceAware(false);
SAXParser parser = parserFactory.newSAXParser();
parser.parse(photosXML, new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("photo".equals(qName)) {
photoIds.add(attributes.getValue("id"));
}
}
});
return photoIds;
} catch (IOException e) {
throw new IllegalStateException(e);
} catch (SAXException e) {
throw new IllegalStateException(e);
} catch (ParserConfigurationException e) {
throw new IllegalStateException(e);
}
}
use of org.xml.sax.Attributes in project spring-security-oauth by spring-projects.
the class GoogleServiceImpl method getLastTenPicasaPictureURLs.
public List<String> getLastTenPicasaPictureURLs() {
// byte[] bytes = getGoogleRestTemplate().getForObject(URI.create("https://picasaweb.google.com/data/feed/api/user/default"), byte[].class);
byte[] bytes = getGoogleRestTemplate().getForObject(URI.create("https://picasaweb.google.com/data/feed/api/user/default?kind=photo&max-results=10"), byte[].class);
InputStream photosXML = new ByteArrayInputStream(bytes);
final List<String> photoUrls = new ArrayList<String>();
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
parserFactory.setValidating(false);
parserFactory.setXIncludeAware(false);
parserFactory.setNamespaceAware(true);
try {
SAXParser parser = parserFactory.newSAXParser();
parser.parse(photosXML, new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("http://search.yahoo.com/mrss/".equals(uri) && "thumbnail".equalsIgnoreCase(localName)) {
int width = 0;
try {
width = Integer.parseInt(attributes.getValue("width"));
if (width > 100 && width < 200) {
// just do the thumbnails that are between 100 and 200 px...
photoUrls.add(attributes.getValue("url"));
}
} catch (NumberFormatException e) {
// fall through...
}
}
}
});
return photoUrls;
} catch (ParserConfigurationException e) {
throw new IllegalStateException(e);
} catch (SAXException e) {
throw new IllegalStateException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
Aggregations