Search in sources :

Example 6 with Document

use of com.smartandroid.sa.tag.nodes.Document in project SmartAndroidSource by jaychou2012.

the class HtmlToPlainText method main.

public static void main(String... args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    // fetch the specified URL and parse to a HTML DOM
    Document doc = SmartTag.connect(url).get();
    HtmlToPlainText formatter = new HtmlToPlainText();
    String plainText = formatter.getPlainText(doc);
    System.out.println(plainText);
}
Also used : Document(com.smartandroid.sa.tag.nodes.Document)

Example 7 with Document

use of com.smartandroid.sa.tag.nodes.Document in project SmartAndroidSource by jaychou2012.

the class ListLinks method main.

public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = SmartTag.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Also used : Element(com.smartandroid.sa.tag.nodes.Element) Document(com.smartandroid.sa.tag.nodes.Document) Elements(com.smartandroid.sa.tag.select.Elements)

Example 8 with Document

use of com.smartandroid.sa.tag.nodes.Document in project SmartAndroidSource by jaychou2012.

the class Parser method parseInput.

public Document parseInput(String html, String baseUri) {
    errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking();
    Document doc = treeBuilder.parse(html, baseUri, errors);
    return doc;
}
Also used : Document(com.smartandroid.sa.tag.nodes.Document)

Example 9 with Document

use of com.smartandroid.sa.tag.nodes.Document in project SmartAndroidSource by jaychou2012.

the class TreeBuilder method initialiseParse.

protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
    Validate.notNull(input, "String input must not be null");
    Validate.notNull(baseUri, "BaseURI must not be null");
    doc = new Document(baseUri);
    reader = new CharacterReader(input);
    this.errors = errors;
    tokeniser = new Tokeniser(reader, errors);
    stack = new DescendableLinkedList<Element>();
    this.baseUri = baseUri;
}
Also used : Element(com.smartandroid.sa.tag.nodes.Element) Document(com.smartandroid.sa.tag.nodes.Document)

Example 10 with Document

use of com.smartandroid.sa.tag.nodes.Document in project SmartAndroidSource by jaychou2012.

the class Cleaner method isValid.

/**
	 * Determines if the input document is valid, against the whitelist. It is
	 * considered valid if all the tags and attributes in the input HTML are
	 * allowed by the whitelist.
	 * <p/>
	 * This method can be used as a validator for user input forms. An invalid
	 * document will still be cleaned successfully using the
	 * {@link #clean(Document)} document. If using as a validator, it is
	 * recommended to still clean the document to ensure enforced attributes are
	 * set correctly, and that the output is tidied.
	 * 
	 * @param dirtyDocument
	 *            document to test
	 * @return true if no tags or attributes need to be removed; false if they
	 *         do
	 */
public boolean isValid(Document dirtyDocument) {
    Validate.notNull(dirtyDocument);
    Document clean = Document.createShell(dirtyDocument.baseUri());
    int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body());
    return numDiscarded == 0;
}
Also used : Document(com.smartandroid.sa.tag.nodes.Document)

Aggregations

Document (com.smartandroid.sa.tag.nodes.Document)11 Element (com.smartandroid.sa.tag.nodes.Element)4 Cleaner (com.smartandroid.sa.tag.safety.Cleaner)3 Node (com.smartandroid.sa.tag.nodes.Node)1 Elements (com.smartandroid.sa.tag.select.Elements)1 IllegalCharsetNameException (java.nio.charset.IllegalCharsetNameException)1