Search in sources :

Example 11 with Element

use of org.jsoup.nodes.Element in project jsoup by jhy.

the class ParseTest method testNewsHomepage.

@Test
public void testNewsHomepage() throws IOException {
    File in = getFile("/htmltests/news-com-au-home.html");
    Document doc = Jsoup.parse(in, "UTF-8", "http://www.news.com.au/");
    assertEquals("News.com.au | News from Australia and around the world online | NewsComAu", doc.title());
    assertEquals("Brace yourself for Metro meltdown", doc.select(".id1225817868581 h4").text().trim());
    Element a = doc.select("a[href=/entertainment/horoscopes]").first();
    assertEquals("/entertainment/horoscopes", a.attr("href"));
    assertEquals("http://www.news.com.au/entertainment/horoscopes", a.attr("abs:href"));
    Element hs = doc.select("a[href*=naughty-corners-are-a-bad-idea]").first();
    assertEquals("http://www.heraldsun.com.au/news/naughty-corners-are-a-bad-idea-for-kids/story-e6frf7jo-1225817899003", hs.attr("href"));
    assertEquals(hs.attr("href"), hs.attr("abs:href"));
}
Also used : Element(org.jsoup.nodes.Element) Document(org.jsoup.nodes.Document) Test(org.junit.Test)

Example 12 with Element

use of org.jsoup.nodes.Element in project jsoup by jhy.

the class ParseTest method testNytArticle.

@Test
public void testNytArticle() throws IOException {
    // has tags like <nyt_text>
    File in = getFile("/htmltests/nyt-article-1.html");
    Document doc = Jsoup.parse(in, null, "http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp");
    Element headline = doc.select("nyt_headline[version=1.0]").first();
    assertEquals("As BP Lays Out Future, It Will Not Include Hayward", headline.text());
}
Also used : Element(org.jsoup.nodes.Element) Document(org.jsoup.nodes.Document) Test(org.junit.Test)

Example 13 with Element

use of org.jsoup.nodes.Element in project jsoup by jhy.

the class Cleaner method createSafeElement.

private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;
    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);
    return new ElementMeta(dest, numDiscarded);
}
Also used : Attribute(org.jsoup.nodes.Attribute) Element(org.jsoup.nodes.Element) Attributes(org.jsoup.nodes.Attributes)

Example 14 with Element

use of org.jsoup.nodes.Element in project jsoup by jhy.

the class Elements method siblings.

private Elements siblings(String query, boolean next, boolean all) {
    Elements els = new Elements();
    Evaluator eval = query != null ? QueryParser.parse(query) : null;
    for (Element e : this) {
        do {
            Element sib = next ? e.nextElementSibling() : e.previousElementSibling();
            if (sib == null)
                break;
            if (eval == null)
                els.add(sib);
            else if (sib.is(eval))
                els.add(sib);
            e = sib;
        } while (all);
    }
    return els;
}
Also used : Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement)

Example 15 with Element

use of org.jsoup.nodes.Element in project jsoup by jhy.

the class Selector method select.

/**
     * Find elements matching selector.
     *
     * @param query CSS selector
     * @param roots root elements to descend into
     * @return matching elements, empty if none
     */
public static Elements select(String query, Iterable<Element> roots) {
    Validate.notEmpty(query);
    Validate.notNull(roots);
    Evaluator evaluator = QueryParser.parse(query);
    ArrayList<Element> elements = new ArrayList<Element>();
    IdentityHashMap<Element, Boolean> seenElements = new IdentityHashMap<Element, Boolean>();
    for (Element root : roots) {
        final Elements found = select(evaluator, root);
        for (Element el : found) {
            if (!seenElements.containsKey(el)) {
                elements.add(el);
                seenElements.put(el, Boolean.TRUE);
            }
        }
    }
    return new Elements(elements);
}
Also used : Element(org.jsoup.nodes.Element) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList)

Aggregations

Element (org.jsoup.nodes.Element)343 Document (org.jsoup.nodes.Document)152 Elements (org.jsoup.select.Elements)95 ElementHandlerImpl (org.asqatasun.ruleimplementation.ElementHandlerImpl)87 IOException (java.io.IOException)63 File (java.io.File)62 ArrayList (java.util.ArrayList)45 Test (org.junit.Test)34 TestSolutionHandler (org.asqatasun.ruleimplementation.TestSolutionHandler)21 URL (java.net.URL)15 TestSolutionHandlerImpl (org.asqatasun.ruleimplementation.TestSolutionHandlerImpl)15 SimpleElementSelector (org.asqatasun.rules.elementselector.SimpleElementSelector)13 TestSolution (org.asqatasun.entity.audit.TestSolution)11 HashMap (java.util.HashMap)9 ElementSelector (org.asqatasun.rules.elementselector.ElementSelector)9 Node (org.jsoup.nodes.Node)9 InputStream (java.io.InputStream)8 EvidenceElement (org.asqatasun.entity.audit.EvidenceElement)8 SSPHandler (org.asqatasun.processor.SSPHandler)7 ProcessRemarkService (org.asqatasun.service.ProcessRemarkService)7