Search in sources :

Example 51 with Attributes

use of org.xml.sax.Attributes in project tika by apache.

the class HtmlParserTest method testParseAscii.

@Test
public void testParseAscii() throws Exception {
    String path = "/test-documents/testHTML.html";
    final StringWriter href = new StringWriter();
    final StringWriter name = new StringWriter();
    ContentHandler body = new BodyContentHandler();
    Metadata metadata = new Metadata();
    try (InputStream stream = HtmlParserTest.class.getResourceAsStream(path)) {
        ContentHandler link = new DefaultHandler() {

            @Override
            public void startElement(String u, String l, String n, Attributes a) throws SAXException {
                if ("a".equals(l)) {
                    if (a.getValue("href") != null) {
                        href.append(a.getValue("href"));
                    } else if (a.getValue("name") != null) {
                        name.append(a.getValue("name"));
                    }
                }
            }
        };
        new HtmlParser().parse(stream, new TeeContentHandler(body, link), metadata, new ParseContext());
    }
    assertEquals("Title : Test Indexation Html", metadata.get(TikaCoreProperties.TITLE));
    assertEquals("Tika Developers", metadata.get("Author"));
    assertEquals("5", metadata.get("refresh"));
    assertEquals("51.2312", metadata.get(Geographic.LATITUDE));
    assertEquals("-5.1987", metadata.get(Geographic.LONGITUDE));
    assertEquals("http://www.apache.org/", href.toString());
    assertEquals("test-anchor", name.toString());
    String content = body.toString();
    assertTrue("Did not contain expected text:" + "Test Indexation Html", content.contains("Test Indexation Html"));
    assertTrue("Did not contain expected text:" + "Indexation du fichier", content.contains("Indexation du fichier"));
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) StringWriter(java.io.StringWriter) ByteArrayInputStream(java.io.ByteArrayInputStream) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) Attributes(org.xml.sax.Attributes) ParseContext(org.apache.tika.parser.ParseContext) TeeContentHandler(org.apache.tika.sax.TeeContentHandler) LinkContentHandler(org.apache.tika.sax.LinkContentHandler) TeeContentHandler(org.apache.tika.sax.TeeContentHandler) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 52 with Attributes

use of org.xml.sax.Attributes in project tika by apache.

the class HtmlParserTest method assertScriptLink.

private void assertScriptLink(String html, String url) throws Exception {
    // IdentityHtmlMapper is needed to extract <script> tags
    ParseContext context = new ParseContext();
    context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "text/html");
    final List<String> links = new ArrayList<String>();
    new HtmlParser().parse(new ByteArrayInputStream(html.getBytes(UTF_8)), new DefaultHandler() {

        @Override
        public void startElement(String u, String l, String name, Attributes atts) {
            if (name.equals("script") && atts.getValue("", "src") != null) {
                links.add(atts.getValue("", "src"));
            }
        }
    }, metadata, context);
    assertEquals(1, links.size());
    assertEquals(url, links.get(0));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ParseContext(org.apache.tika.parser.ParseContext) Metadata(org.apache.tika.metadata.Metadata) ArrayList(java.util.ArrayList) Attributes(org.xml.sax.Attributes) DefaultHandler(org.xml.sax.helpers.DefaultHandler)

Example 53 with Attributes

use of org.xml.sax.Attributes in project Lucee by lucee.

the class aprint method _eo.

private static void _eo(PrintStream ps, Object o) {
    if (o instanceof Enumeration)
        _eo(ps, (Enumeration) o);
    else if (o instanceof Object[])
        _eo(ps, (Object[]) o);
    else if (o instanceof boolean[])
        _eo(ps, (boolean[]) o);
    else if (o instanceof byte[])
        _eo(ps, (byte[]) o);
    else if (o instanceof int[])
        _eo(ps, (int[]) o);
    else if (o instanceof float[])
        _eo(ps, (float[]) o);
    else if (o instanceof long[])
        _eo(ps, (long[]) o);
    else if (o instanceof double[])
        _eo(ps, (double[]) o);
    else if (o instanceof char[])
        _eo(ps, (char[]) o);
    else if (o instanceof short[])
        _eo(ps, (short[]) o);
    else if (o instanceof Set)
        _eo(ps, (Set) o);
    else if (o instanceof List)
        _eo(ps, (List) o);
    else if (o instanceof Map)
        _eo(ps, (Map) o);
    else if (o instanceof Collection)
        _eo(ps, (Collection) o);
    else if (o instanceof Iterator)
        _eo(ps, (Iterator) o);
    else if (o instanceof NamedNodeMap)
        _eo(ps, (NamedNodeMap) o);
    else if (o instanceof ResultSet)
        _eo(ps, (ResultSet) o);
    else if (o instanceof Node)
        _eo(ps, (Node) o);
    else if (o instanceof Throwable)
        _eo(ps, (Throwable) o);
    else if (o instanceof Attributes)
        _eo(ps, (Attributes) o);
    else if (o instanceof Cookie) {
        Cookie c = (Cookie) o;
        ps.println("Cookie(name:" + c.getName() + ";domain:" + c.getDomain() + ";maxage:" + c.getMaxAge() + ";path:" + c.getPath() + ";value:" + c.getValue() + ";version:" + c.getVersion() + ";secure:" + c.getSecure() + ")");
    } else if (o instanceof InputSource) {
        InputSource is = (InputSource) o;
        Reader r = is.getCharacterStream();
        try {
            ps.println(IOUtil.toString(is.getCharacterStream()));
        } catch (IOException e) {
        } finally {
            IOUtil.closeEL(r);
        }
    } else
        ps.println(o);
}
Also used : Cookie(javax.servlet.http.Cookie) InputSource(org.xml.sax.InputSource) Enumeration(java.util.Enumeration) ResultSet(java.sql.ResultSet) Set(java.util.Set) NamedNodeMap(org.w3c.dom.NamedNodeMap) Node(org.w3c.dom.Node) Attributes(org.xml.sax.Attributes) Reader(java.io.Reader) IOException(java.io.IOException) ListIterator(java.util.ListIterator) Iterator(java.util.Iterator) ResultSet(java.sql.ResultSet) Collection(java.util.Collection) List(java.util.List) Map(java.util.Map) NamedNodeMap(org.w3c.dom.NamedNodeMap)

Example 54 with Attributes

use of org.xml.sax.Attributes in project spring-security-oauth by spring-projects.

the class SparklrServiceImpl method getSparklrPhotoIds.

public List<String> getSparklrPhotoIds() throws SparklrException {
    try {
        InputStream photosXML = new ByteArrayInputStream(sparklrRestTemplate.getForObject(URI.create(sparklrPhotoListURL), byte[].class));
        final List<String> photoIds = new ArrayList<String>();
        SAXParserFactory parserFactory = SAXParserFactory.newInstance();
        parserFactory.setValidating(false);
        parserFactory.setXIncludeAware(false);
        parserFactory.setNamespaceAware(false);
        SAXParser parser = parserFactory.newSAXParser();
        parser.parse(photosXML, new DefaultHandler() {

            @Override
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
                if ("photo".equals(qName)) {
                    photoIds.add(attributes.getValue("id"));
                }
            }
        });
        return photoIds;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    } catch (SAXException e) {
        throw new IllegalStateException(e);
    } catch (ParserConfigurationException e) {
        throw new IllegalStateException(e);
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Attributes(org.xml.sax.Attributes) IOException(java.io.IOException) DefaultHandler(org.xml.sax.helpers.DefaultHandler) SAXException(org.xml.sax.SAXException) ByteArrayInputStream(java.io.ByteArrayInputStream) SAXParser(javax.xml.parsers.SAXParser) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) SAXParserFactory(javax.xml.parsers.SAXParserFactory)

Example 55 with Attributes

use of org.xml.sax.Attributes in project spring-security-oauth by spring-projects.

the class GoogleServiceImpl method getLastTenPicasaPictureURLs.

public List<String> getLastTenPicasaPictureURLs() {
    // byte[] bytes = getGoogleRestTemplate().getForObject(URI.create("https://picasaweb.google.com/data/feed/api/user/default"), byte[].class);
    byte[] bytes = getGoogleRestTemplate().getForObject(URI.create("https://picasaweb.google.com/data/feed/api/user/default?kind=photo&max-results=10"), byte[].class);
    InputStream photosXML = new ByteArrayInputStream(bytes);
    final List<String> photoUrls = new ArrayList<String>();
    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
    parserFactory.setValidating(false);
    parserFactory.setXIncludeAware(false);
    parserFactory.setNamespaceAware(true);
    try {
        SAXParser parser = parserFactory.newSAXParser();
        parser.parse(photosXML, new DefaultHandler() {

            @Override
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
                if ("http://search.yahoo.com/mrss/".equals(uri) && "thumbnail".equalsIgnoreCase(localName)) {
                    int width = 0;
                    try {
                        width = Integer.parseInt(attributes.getValue("width"));
                        if (width > 100 && width < 200) {
                            // just do the thumbnails that are between 100 and 200 px...
                            photoUrls.add(attributes.getValue("url"));
                        }
                    } catch (NumberFormatException e) {
                    // fall through...
                    }
                }
            }
        });
        return photoUrls;
    } catch (ParserConfigurationException e) {
        throw new IllegalStateException(e);
    } catch (SAXException e) {
        throw new IllegalStateException(e);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Attributes(org.xml.sax.Attributes) IOException(java.io.IOException) DefaultHandler(org.xml.sax.helpers.DefaultHandler) SAXException(org.xml.sax.SAXException) ByteArrayInputStream(java.io.ByteArrayInputStream) SAXParser(javax.xml.parsers.SAXParser) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) SAXParserFactory(javax.xml.parsers.SAXParserFactory)

Aggregations

Attributes (org.xml.sax.Attributes)279 DefaultHandler (org.xml.sax.helpers.DefaultHandler)74 SAXException (org.xml.sax.SAXException)66 AttributesImpl (org.xml.sax.helpers.AttributesImpl)50 SAXParser (javax.xml.parsers.SAXParser)48 Test (org.junit.Test)46 POSaveFailedException (org.adempiere.pipo.exception.POSaveFailedException)37 InputSource (org.xml.sax.InputSource)33 SAXParserFactory (javax.xml.parsers.SAXParserFactory)30 IOException (java.io.IOException)29 File (java.io.File)22 ByteArrayInputStream (java.io.ByteArrayInputStream)19 InputStream (java.io.InputStream)17 ArrayList (java.util.ArrayList)17 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)17 Test (org.junit.jupiter.api.Test)17 XMLReader (org.xml.sax.XMLReader)17 ContentHandler (org.xml.sax.ContentHandler)15 StringReader (java.io.StringReader)12 Transformer (org.apache.sling.rewriter.Transformer)10