Search in sources :

Example 6 with PDDocumentOutline

use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline in project pdfbox by apache.

the class TestTextStripper method testStripByOutlineItems.

/**
 * Test whether stripping controlled by outline items works properly. The test file has 4
 * outline items at the top level, that point to 0-based pages 0, 2, 3 and 4. We are testing
 * text stripping by outlines pointing to 0-based pages 2 and 3, and also text stripping of the
 * 0-based page 2. The test makes sure that the output is different to a complete strip, not
 * empty, different to each other when different bookmark intervals are used, but identical from
 * bookmark intervals to strips with page intervals. When fed with orphan bookmarks, stripping
 * must be empty.
 *
 * @throws IOException
 * @throws URISyntaxException
 */
public void testStripByOutlineItems() throws IOException, URISyntaxException {
    PDDocument doc = PDDocument.load(new File(TestPDPageTree.class.getResource("with_outline.pdf").toURI()));
    PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
    Iterable<PDOutlineItem> children = outline.children();
    Iterator<PDOutlineItem> it = children.iterator();
    PDOutlineItem oi0 = it.next();
    PDOutlineItem oi2 = it.next();
    PDOutlineItem oi3 = it.next();
    PDOutlineItem oi4 = it.next();
    assertEquals(0, findOutlineItemDestPageNum(doc, oi0));
    assertEquals(2, findOutlineItemDestPageNum(doc, oi2));
    assertEquals(3, findOutlineItemDestPageNum(doc, oi3));
    assertEquals(4, findOutlineItemDestPageNum(doc, oi4));
    String textFull = stripper.getText(doc);
    assertFalse(textFull.isEmpty());
    String expectedTextFull = "First level 1\n" + "First level 2\n" + "Fist level 3\n" + "Some content\n" + "Some other content\n" + "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n" + "Fourth level 1\n" + "Content\n" + "Content\n";
    assertEquals(expectedTextFull, textFull.replaceAll("\r", ""));
    // this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
    // by their bookmarks
    stripper.setStartBookmark(oi2);
    stripper.setEndBookmark(oi3);
    String textoi23 = stripper.getText(doc);
    assertFalse(textoi23.isEmpty());
    assertFalse(textoi23.equals(textFull));
    String expectedTextoi23 = "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n";
    assertEquals(expectedTextoi23, textoi23.replaceAll("\r", ""));
    // this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
    // by their page numbers
    stripper.setStartBookmark(null);
    stripper.setEndBookmark(null);
    stripper.setStartPage(3);
    stripper.setEndPage(4);
    String textp34 = stripper.getText(doc);
    assertFalse(textp34.isEmpty());
    assertFalse(textoi23.equals(textFull));
    assertTrue(textoi23.equals(textp34));
    // this should grab 0-based page 2, i.e. 1-based page 3
    // by the bookmark
    stripper.setStartBookmark(oi2);
    stripper.setEndBookmark(oi2);
    String textoi2 = stripper.getText(doc);
    assertFalse(textoi2.isEmpty());
    assertFalse(textoi2.equals(textoi23));
    assertFalse(textoi23.equals(textFull));
    String expectedTextoi2 = "Second at level 1\n" + "Second level 2\n" + "Content\n";
    assertEquals(expectedTextoi2, textoi2.replaceAll("\r", ""));
    // this should grab 0-based page 2, i.e. 1-based page 3
    // by the page number
    stripper.setStartBookmark(null);
    stripper.setEndBookmark(null);
    stripper.setStartPage(3);
    stripper.setEndPage(3);
    String textp3 = stripper.getText(doc);
    assertFalse(textp3.isEmpty());
    assertFalse(textp3.equals(textp34));
    assertFalse(textoi23.equals(textFull));
    assertTrue(textoi2.equals(textp3));
    // Test with orphan bookmark
    PDOutlineItem oiOrphan = new PDOutlineItem();
    stripper.setStartBookmark(oiOrphan);
    stripper.setEndBookmark(oiOrphan);
    String textOiOrphan = stripper.getText(doc);
    assertTrue(textOiOrphan.isEmpty());
}
Also used : PDDocumentOutline(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) TestPDPageTree(org.apache.pdfbox.pdmodel.TestPDPageTree) File(java.io.File) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem)

Example 7 with PDDocumentOutline

use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline in project pdfbox by apache.

the class TestPDPageTree method indexOfPageFromOutlineDestination.

@Test
public void indexOfPageFromOutlineDestination() throws IOException {
    doc = PDDocument.load(TestPDPageTree.class.getResourceAsStream("with_outline.pdf"));
    PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
    for (PDOutlineItem current : outline.children()) {
        if (current.getTitle().contains("Second")) {
            assertEquals(2, doc.getPages().indexOf(current.findDestinationPage(doc)));
        }
    }
}
Also used : PDDocumentOutline(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem) Test(org.junit.Test)

Aggregations

PDDocumentOutline (org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline)7 PDOutlineItem (org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem)5 File (java.io.File)4 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)4 IOException (java.io.IOException)2 COSDictionary (org.apache.pdfbox.cos.COSDictionary)2 PDDocumentCatalog (org.apache.pdfbox.pdmodel.PDDocumentCatalog)2 PDPage (org.apache.pdfbox.pdmodel.PDPage)2 PDActionGoTo (org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo)2 PDDestination (org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination)2 PDPageDestination (org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination)2 HashMap (java.util.HashMap)1 COSArray (org.apache.pdfbox.cos.COSArray)1 COSNumber (org.apache.pdfbox.cos.COSNumber)1 COSObject (org.apache.pdfbox.cos.COSObject)1 COSStream (org.apache.pdfbox.cos.COSStream)1 PDDocumentInformation (org.apache.pdfbox.pdmodel.PDDocumentInformation)1 PDDocumentNameDestinationDictionary (org.apache.pdfbox.pdmodel.PDDocumentNameDestinationDictionary)1 PDDocumentNameDictionary (org.apache.pdfbox.pdmodel.PDDocumentNameDictionary)1 PDResources (org.apache.pdfbox.pdmodel.PDResources)1