Search in sources :

Example 31 with HWPFDocument

use of org.apache.poi.hwpf.HWPFDocument in project poi by apache.

the class HWPFLister method dumpPapx.

public void dumpPapx(boolean withProperties, boolean withSprms) throws Exception {
    if (_doc instanceof HWPFDocument) {
        System.out.println("binary PAP pages ");
        HWPFDocument doc = (HWPFDocument) _doc;
        byte[] mainStream = _doc.getMainStream();
        PlexOfCps binTable = new PlexOfCps(doc.getTableStream(), doc.getFileInformationBlock().getFcPlcfbtePapx(), doc.getFileInformationBlock().getLcbPlcfbtePapx(), 4);
        List<PAPX> papxs = new ArrayList<PAPX>();
        int length = binTable.length();
        for (int x = 0; x < length; x++) {
            GenericPropertyNode node = binTable.getProperty(x);
            int pageNum = LittleEndian.getInt(node.getBytes());
            int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
            PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(mainStream, doc.getDataStream(), pageOffset, doc.getTextTable());
            System.out.println("* PFKP: " + pfkp);
            for (PAPX papx : pfkp.getPAPXs()) {
                System.out.println("** " + papx);
                papxs.add(papx);
                if (papx != null && withSprms) {
                    SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
                    dumpSprms(sprmIt, "*** ");
                }
            }
        }
        Collections.sort(papxs);
        System.out.println("* Sorted by END");
        for (PAPX papx : papxs) {
            System.out.println("** " + papx);
            if (papx != null && withSprms) {
                SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
                dumpSprms(sprmIt, "*** ");
            }
        }
    }
    for (PAPX papx : _doc.getParagraphTable().getParagraphs()) {
        System.out.println(papx);
        if (withProperties) {
            Paragraph paragraph = Paragraph.newParagraph(_doc.getOverallRange(), papx);
            System.out.println(paragraph.getProps());
        }
        SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
        dumpSprms(sprmIt, "\t");
    }
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) SprmIterator(org.apache.poi.hwpf.sprm.SprmIterator) PlexOfCps(org.apache.poi.hwpf.model.PlexOfCps) ArrayList(java.util.ArrayList) PAPFormattedDiskPage(org.apache.poi.hwpf.model.PAPFormattedDiskPage) PAPX(org.apache.poi.hwpf.model.PAPX) GenericPropertyNode(org.apache.poi.hwpf.model.GenericPropertyNode) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph)

Example 32 with HWPFDocument

use of org.apache.poi.hwpf.HWPFDocument in project poi by apache.

the class TestTextPieceTable method testUnicodeParts.

/**
     * Check that we do the positions correctly when working with a mix ascii,
     * unicode file
     */
@Test
public void testUnicodeParts() throws Exception {
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile("HeaderFooterUnicode.doc");
    TextPieceTable tbl = doc.getTextTable();
    // In three bits, split every 512 bytes
    assertEquals(3, tbl.getTextPieces().size());
    TextPiece tpA = tbl.getTextPieces().get(0);
    TextPiece tpB = tbl.getTextPieces().get(1);
    TextPiece tpC = tbl.getTextPieces().get(2);
    assertTrue(tpA.isUnicode());
    assertTrue(tpB.isUnicode());
    assertTrue(tpC.isUnicode());
    assertEquals(256, tpA.characterLength());
    assertEquals(256, tpB.characterLength());
    assertEquals(19, tpC.characterLength());
    assertEquals(512, tpA.bytesLength());
    assertEquals(512, tpB.bytesLength());
    assertEquals(38, tpC.bytesLength());
    assertEquals(0, tpA.getStart());
    assertEquals(256, tpA.getEnd());
    assertEquals(256, tpB.getStart());
    assertEquals(512, tpB.getEnd());
    assertEquals(512, tpC.getStart());
    assertEquals(531, tpC.getEnd());
    // Save and re-load
    HWPFDocument docB = saveAndReload(doc);
    tbl = docB.getTextTable();
    assertEquals(3, tbl.getTextPieces().size());
    tpA = tbl.getTextPieces().get(0);
    tpB = tbl.getTextPieces().get(1);
    tpC = tbl.getTextPieces().get(2);
    assertTrue(tpA.isUnicode());
    assertTrue(tpB.isUnicode());
    assertTrue(tpC.isUnicode());
    assertEquals(256, tpA.characterLength());
    assertEquals(256, tpB.characterLength());
    assertEquals(19, tpC.characterLength());
    assertEquals(512, tpA.bytesLength());
    assertEquals(512, tpB.bytesLength());
    assertEquals(38, tpC.bytesLength());
    assertEquals(0, tpA.getStart());
    assertEquals(256, tpA.getEnd());
    assertEquals(256, tpB.getStart());
    assertEquals(512, tpB.getEnd());
    assertEquals(512, tpC.getStart());
    assertEquals(531, tpC.getEnd());
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) Test(org.junit.Test)

Example 33 with HWPFDocument

use of org.apache.poi.hwpf.HWPFDocument in project poi by apache.

the class TestTextPieceTable method saveAndReload.

protected HWPFDocument saveAndReload(HWPFDocument doc) throws Exception {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    doc.write(baos);
    return new HWPFDocument(new ByteArrayInputStream(baos.toByteArray()));
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 34 with HWPFDocument

use of org.apache.poi.hwpf.HWPFDocument in project poi by apache.

the class TestTextPieceTable method test56549_CharIndexRange.

@Test
public void test56549_CharIndexRange() {
    HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ThreeColHeadFoot.doc");
    // there is one range from 2048 - 2387
    TextPieceTable tbl = doc.getTextTable();
    int[][] range = tbl.getCharIndexRanges(0, 0);
    assertEquals(0, range.length);
    range = tbl.getCharIndexRanges(0, 1);
    assertEquals(0, range.length);
    range = tbl.getCharIndexRanges(0, 338);
    assertEquals(0, range.length);
    range = tbl.getCharIndexRanges(0, 339);
    assertEquals(0, range.length);
    range = tbl.getCharIndexRanges(0, 340);
    assertEquals(0, range.length);
    range = tbl.getCharIndexRanges(2030, 2048);
    assertEquals(0, range.length);
    range = tbl.getCharIndexRanges(2030, 2049);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 0, 1 }, range[0]);
    range = tbl.getCharIndexRanges(2048, 2049);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 0, 1 }, range[0]);
    range = tbl.getCharIndexRanges(2048, 2300);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 0, 252 }, range[0]);
    range = tbl.getCharIndexRanges(2049, 2300);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 1, 252 }, range[0]);
    range = tbl.getCharIndexRanges(2049, 2300);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 1, 252 }, range[0]);
    range = tbl.getCharIndexRanges(2049, 2387);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 1, 339 }, range[0]);
    range = tbl.getCharIndexRanges(2049, 2388);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 1, 339 }, range[0]);
    range = tbl.getCharIndexRanges(2387, 2388);
    assertEquals(1, range.length);
    assertArrayEquals(new int[] { 339, 339 }, range[0]);
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) Test(org.junit.Test)

Example 35 with HWPFDocument

use of org.apache.poi.hwpf.HWPFDocument in project poi by apache.

the class TestSprms method testInnerTable.

/**
     * Test correct processing of "sprmPItap" (0x6649) and "sprmPFInTable"
     * (0x2416)
     */
public void testInnerTable() throws Exception {
    InputStream resourceAsStream = POIDataSamples.getDocumentInstance().openResourceAsStream("innertable.doc");
    HWPFDocument hwpfDocument = new HWPFDocument(resourceAsStream);
    resourceAsStream.close();
    testInnerTable(hwpfDocument);
    hwpfDocument = reload(hwpfDocument);
    testInnerTable(hwpfDocument);
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream)

Aggregations

HWPFDocument (org.apache.poi.hwpf.HWPFDocument)126 Test (org.junit.Test)66 InputStream (java.io.InputStream)15 FileInputStream (java.io.FileInputStream)10 Range (org.apache.poi.hwpf.usermodel.Range)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 HSLFSlideShow (org.apache.poi.hslf.usermodel.HSLFSlideShow)7 HSSFWorkbook (org.apache.poi.hssf.usermodel.HSSFWorkbook)7 WordExtractor (org.apache.poi.hwpf.extractor.WordExtractor)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 PicturesTable (org.apache.poi.hwpf.model.PicturesTable)6 Bookmark (org.apache.poi.hwpf.usermodel.Bookmark)6 NPOIFSFileSystem (org.apache.poi.poifs.filesystem.NPOIFSFileSystem)6 File (java.io.File)4 FileOutputStream (java.io.FileOutputStream)4 Transformer (javax.xml.transform.Transformer)4 DOMSource (javax.xml.transform.dom.DOMSource)4 Picture (org.apache.poi.hwpf.usermodel.Picture)4 DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)4 POIFSFileSystem (org.apache.poi.poifs.filesystem.POIFSFileSystem)4