Search in sources :

Example 1 with Table

use of org.apache.poi.hwpf.usermodel.Table in project poi by apache.

the class AbstractWordConverter method processParagraphes.

protected void processParagraphes(HWPFDocumentCore wordDocument, Element flow, Range range, int currentTableLevel) {
    final int paragraphs = range.numParagraphs();
    for (int p = 0; p < paragraphs; p++) {
        Paragraph paragraph = range.getParagraph(p);
        if (paragraph.isInTable() && paragraph.getTableLevel() != currentTableLevel) {
            if (paragraph.getTableLevel() < currentTableLevel)
                throw new IllegalStateException("Trying to process table cell with higher level (" + paragraph.getTableLevel() + ") than current table level (" + currentTableLevel + ") as inner table part");
            Table table = range.getTable(paragraph);
            processTable(wordDocument, flow, table);
            p += table.numParagraphs();
            p--;
            continue;
        }
        if (paragraph.text().equals("")) {
            processPageBreak(wordDocument, flow);
        }
        boolean processed = false;
        if (paragraph.isInList()) {
            try {
                HWPFList hwpfList = paragraph.getList();
                String label = AbstractWordUtils.getBulletText(numberingState, hwpfList, (char) paragraph.getIlvl());
                processParagraph(wordDocument, flow, currentTableLevel, paragraph, label);
                processed = true;
            } catch (Exception exc) {
                log.log(POILogger.WARN, "Can't process paragraph as list entry, will be processed without list information", exc);
            }
        }
        if (processed == false) {
            processParagraph(wordDocument, flow, currentTableLevel, paragraph, AbstractWordUtils.EMPTY);
        }
    }
}
Also used : Table(org.apache.poi.hwpf.usermodel.Table) HWPFList(org.apache.poi.hwpf.usermodel.HWPFList) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph)

Example 2 with Table

use of org.apache.poi.hwpf.usermodel.Table in project poi by apache.

the class AbstractWordUtilsTest method testBuildTableCellEdgesArray.

/**
     * Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)}
     */
public void testBuildTableCellEdgesArray() {
    HWPFDocument document = HWPFTestDataSamples.openSampleFile("table-merges.doc");
    final Range range = document.getRange();
    Table table = range.getTable(range.getParagraph(0));
    int[] result = AbstractWordUtils.buildTableCellEdgesArray(table);
    assertEquals(6, result.length);
    assertEquals(0000, result[0]);
    assertEquals(1062, result[1]);
    assertEquals(5738, result[2]);
    assertEquals(6872, result[3]);
    assertEquals(8148, result[4]);
    assertEquals(9302, result[5]);
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) Table(org.apache.poi.hwpf.usermodel.Table) Range(org.apache.poi.hwpf.usermodel.Range)

Example 3 with Table

use of org.apache.poi.hwpf.usermodel.Table in project tika by apache.

the class WordExtractor method handleParagraph.

private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, FieldsDocumentPart docPart, PicturesSource pictures, PicturesTable pictureTable, ListManager listManager, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
    //  into nested tables, so currently we don't
    if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) {
        Table t = r.getTable(p);
        xhtml.startElement("table");
        xhtml.startElement("tbody");
        for (int rn = 0; rn < t.numRows(); rn++) {
            TableRow row = t.getRow(rn);
            xhtml.startElement("tr");
            for (int cn = 0; cn < row.numCells(); cn++) {
                TableCell cell = row.getCell(cn);
                xhtml.startElement("td");
                for (int pn = 0; pn < cell.numParagraphs(); pn++) {
                    Paragraph cellP = cell.getParagraph(pn);
                    handleParagraph(cellP, p.getTableLevel(), cell, document, docPart, pictures, pictureTable, listManager, xhtml);
                }
                xhtml.endElement("td");
            }
            xhtml.endElement("tr");
        }
        xhtml.endElement("tbody");
        xhtml.endElement("table");
        return (t.numParagraphs() - 1);
    }
    String text = p.text();
    if (text.replaceAll("[\\r\\n\\s]+", "").isEmpty()) {
        // Skip empty paragraphs
        return 0;
    }
    TagAndStyle tas;
    String numbering = null;
    if (document.getStyleSheet().numStyles() > p.getStyleIndex()) {
        StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex());
        if (style != null && style.getName() != null && style.getName().length() > 0) {
            if (p.isInList()) {
                numbering = listManager.getFormattedNumber(p);
            }
            tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel > 0));
        } else {
            tas = new TagAndStyle("p", null);
        }
    } else {
        tas = new TagAndStyle("p", null);
    }
    if (tas.getStyleClass() != null) {
        xhtml.startElement(tas.getTag(), "class", tas.getStyleClass());
    } else {
        xhtml.startElement(tas.getTag());
    }
    if (numbering != null) {
        xhtml.characters(numbering);
    }
    for (int j = 0; j < p.numCharacterRuns(); j++) {
        CharacterRun cr = p.getCharacterRun(j);
        // FIELD_BEGIN_MARK:
        if (cr.text().getBytes(UTF_8)[0] == 0x13) {
            Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset());
            // 56 is a document link
            if (field != null && (field.getType() == 58 || field.getType() == 56)) {
                // Embedded Object: add a <div
                // class="embedded" id="_X"/> so consumer can see where
                // in the main text each embedded document
                // occurred:
                String id = "_unknown_id";
                //this can return null (TIKA-1956)
                CharacterRun mscr = field.getMarkSeparatorCharacterRun(r);
                if (mscr != null) {
                    id = "_" + mscr.getPicOffset();
                }
                AttributesImpl attributes = new AttributesImpl();
                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
                attributes.addAttribute("", "id", "id", "CDATA", id);
                xhtml.startElement("div", attributes);
                xhtml.endElement("div");
            }
        }
        if (cr.text().equals("")) {
            j += handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml);
        } else if (cr.text().startsWith("")) {
            // Floating Picture(s)
            for (int pn = 0; pn < cr.text().length(); pn++) {
                // Assume they're in the order from the unclaimed list...
                Picture picture = pictures.nextUnclaimed();
                // Output
                handlePictureCharacterRun(cr, picture, pictures, xhtml);
            }
        } else if (pictureTable.hasPicture(cr)) {
            // Inline Picture
            Picture picture = pictures.getFor(cr);
            handlePictureCharacterRun(cr, picture, pictures, xhtml);
        } else {
            handleCharacterRun(cr, tas.isHeading(), xhtml);
        }
    }
    // Close any still open style tags
    if (curStrikeThrough) {
        xhtml.endElement("s");
        curStrikeThrough = false;
    }
    if (curItalic) {
        xhtml.endElement("i");
        curItalic = false;
    }
    if (curBold) {
        xhtml.endElement("b");
        curBold = false;
    }
    xhtml.endElement(tas.getTag());
    return 0;
}
Also used : Field(org.apache.poi.hwpf.usermodel.Field) PicturesTable(org.apache.poi.hwpf.model.PicturesTable) SavedByTable(org.apache.poi.hwpf.model.SavedByTable) Table(org.apache.poi.hwpf.usermodel.Table) TableCell(org.apache.poi.hwpf.usermodel.TableCell) AttributesImpl(org.xml.sax.helpers.AttributesImpl) Picture(org.apache.poi.hwpf.usermodel.Picture) TableRow(org.apache.poi.hwpf.usermodel.TableRow) CharacterRun(org.apache.poi.hwpf.usermodel.CharacterRun) StyleDescription(org.apache.poi.hwpf.model.StyleDescription) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph)

Aggregations

Table (org.apache.poi.hwpf.usermodel.Table)3 Paragraph (org.apache.poi.hwpf.usermodel.Paragraph)2 HWPFDocument (org.apache.poi.hwpf.HWPFDocument)1 PicturesTable (org.apache.poi.hwpf.model.PicturesTable)1 SavedByTable (org.apache.poi.hwpf.model.SavedByTable)1 StyleDescription (org.apache.poi.hwpf.model.StyleDescription)1 CharacterRun (org.apache.poi.hwpf.usermodel.CharacterRun)1 Field (org.apache.poi.hwpf.usermodel.Field)1 HWPFList (org.apache.poi.hwpf.usermodel.HWPFList)1 Picture (org.apache.poi.hwpf.usermodel.Picture)1 Range (org.apache.poi.hwpf.usermodel.Range)1 TableCell (org.apache.poi.hwpf.usermodel.TableCell)1 TableRow (org.apache.poi.hwpf.usermodel.TableRow)1 AttributesImpl (org.xml.sax.helpers.AttributesImpl)1