Search in sources :

Example 6 with Node

use of org.htmlparser.Node in project dhis2-core by dhis2.

the class GridUtils method fromHtml.

/**
 * Creates a list of Grids based on the given HTML string. This works only
 * for table-based HTML documents.
 *
 * @param html the HTML string.
 * @param title the title to use for the grids.
 * @return a list of Grids.
 */
public static List<Grid> fromHtml(String html, String title) throws Exception {
    if (html == null || html.trim().isEmpty()) {
        return null;
    }
    List<Grid> grids = new ArrayList<>();
    Parser parser = Parser.createParser(html, "UTF-8");
    Node[] tables = parser.extractAllNodesThatMatch(new TagNameFilter("table")).toNodeArray();
    for (Node t : tables) {
        Grid grid = new ListGrid();
        grid.setTitle(title);
        TableTag table = (TableTag) t;
        TableRow[] rows = table.getRows();
        Integer firstColumnCount = null;
        for (TableRow row : rows) {
            if (// Ignore if no cells
            getColumnCount(row) == 0) {
                log.warn("Ignoring row with no columns");
                continue;
            }
            Node[] cells = row.getChildren().extractAllNodesThatMatch(HTML_ROW_FILTER).toNodeArray();
            if (// First row becomes header
            firstColumnCount == null) {
                firstColumnCount = getColumnCount(row);
                for (Node c : cells) {
                    TagNode cell = (TagNode) c;
                    grid.addHeader(new GridHeader(getValue(cell), false, false));
                    Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan"));
                    if (colSpan != null && colSpan > 1) {
                        grid.addEmptyHeaders((colSpan - 1));
                    }
                }
            } else // Rest becomes rows
            {
                if (// Ignore
                firstColumnCount != getColumnCount(row)) {
                    log.warn("Ignoring row which has " + row.getColumnCount() + " columns since table has " + firstColumnCount + " columns");
                    continue;
                }
                grid.addRow();
                for (Node c : cells) {
                    // TODO row span
                    TagNode cell = (TagNode) c;
                    grid.addValue(getValue(cell));
                    Integer colSpan = MathUtils.parseInt(cell.getAttribute("colspan"));
                    if (colSpan != null && colSpan > 1) {
                        grid.addEmptyValues((colSpan - 1));
                    }
                }
            }
        }
        grids.add(grid);
    }
    return grids;
}
Also used : Grid(org.hisp.dhis.common.Grid) TagNode(org.htmlparser.nodes.TagNode) Node(org.htmlparser.Node) ArrayList(java.util.ArrayList) Parser(org.htmlparser.Parser) GridHeader(org.hisp.dhis.common.GridHeader) TableTag(org.htmlparser.tags.TableTag) TableRow(org.htmlparser.tags.TableRow) TagNameFilter(org.htmlparser.filters.TagNameFilter) TagNode(org.htmlparser.nodes.TagNode)

Aggregations

Node (org.htmlparser.Node)6 Tag (org.htmlparser.Tag)3 TextNode (org.htmlparser.nodes.TextNode)3 ArrayList (java.util.ArrayList)2 ForumException (net.jforum.exceptions.ForumException)2 Parser (org.htmlparser.Parser)2 Lexer (org.htmlparser.lexer.Lexer)2 TagNode (org.htmlparser.nodes.TagNode)2 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1 JasperPrint (net.sf.jasperreports.engine.JasperPrint)1 Grid (org.hisp.dhis.common.Grid)1 GridHeader (org.hisp.dhis.common.GridHeader)1 NodeFilter (org.htmlparser.NodeFilter)1 NodeClassFilter (org.htmlparser.filters.NodeClassFilter)1 OrFilter (org.htmlparser.filters.OrFilter)1 TagNameFilter (org.htmlparser.filters.TagNameFilter)1 LinkTag (org.htmlparser.tags.LinkTag)1 TableRow (org.htmlparser.tags.TableRow)1 TableTag (org.htmlparser.tags.TableTag)1