use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline in project pdfbox by apache.
the class TestTextStripper method testStripByOutlineItems.
/**
* Test whether stripping controlled by outline items works properly. The test file has 4
* outline items at the top level, that point to 0-based pages 0, 2, 3 and 4. We are testing
* text stripping by outlines pointing to 0-based pages 2 and 3, and also text stripping of the
* 0-based page 2. The test makes sure that the output is different to a complete strip, not
* empty, different to each other when different bookmark intervals are used, but identical from
* bookmark intervals to strips with page intervals. When fed with orphan bookmarks, stripping
* must be empty.
*
* @throws IOException
* @throws URISyntaxException
*/
public void testStripByOutlineItems() throws IOException, URISyntaxException {
PDDocument doc = PDDocument.load(new File(TestPDPageTree.class.getResource("with_outline.pdf").toURI()));
PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
Iterable<PDOutlineItem> children = outline.children();
Iterator<PDOutlineItem> it = children.iterator();
PDOutlineItem oi0 = it.next();
PDOutlineItem oi2 = it.next();
PDOutlineItem oi3 = it.next();
PDOutlineItem oi4 = it.next();
assertEquals(0, findOutlineItemDestPageNum(doc, oi0));
assertEquals(2, findOutlineItemDestPageNum(doc, oi2));
assertEquals(3, findOutlineItemDestPageNum(doc, oi3));
assertEquals(4, findOutlineItemDestPageNum(doc, oi4));
String textFull = stripper.getText(doc);
assertFalse(textFull.isEmpty());
String expectedTextFull = "First level 1\n" + "First level 2\n" + "Fist level 3\n" + "Some content\n" + "Some other content\n" + "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n" + "Fourth level 1\n" + "Content\n" + "Content\n";
assertEquals(expectedTextFull, textFull.replaceAll("\r", ""));
// this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
// by their bookmarks
stripper.setStartBookmark(oi2);
stripper.setEndBookmark(oi3);
String textoi23 = stripper.getText(doc);
assertFalse(textoi23.isEmpty());
assertFalse(textoi23.equals(textFull));
String expectedTextoi23 = "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n";
assertEquals(expectedTextoi23, textoi23.replaceAll("\r", ""));
// this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
// by their page numbers
stripper.setStartBookmark(null);
stripper.setEndBookmark(null);
stripper.setStartPage(3);
stripper.setEndPage(4);
String textp34 = stripper.getText(doc);
assertFalse(textp34.isEmpty());
assertFalse(textoi23.equals(textFull));
assertTrue(textoi23.equals(textp34));
// this should grab 0-based page 2, i.e. 1-based page 3
// by the bookmark
stripper.setStartBookmark(oi2);
stripper.setEndBookmark(oi2);
String textoi2 = stripper.getText(doc);
assertFalse(textoi2.isEmpty());
assertFalse(textoi2.equals(textoi23));
assertFalse(textoi23.equals(textFull));
String expectedTextoi2 = "Second at level 1\n" + "Second level 2\n" + "Content\n";
assertEquals(expectedTextoi2, textoi2.replaceAll("\r", ""));
// this should grab 0-based page 2, i.e. 1-based page 3
// by the page number
stripper.setStartBookmark(null);
stripper.setEndBookmark(null);
stripper.setStartPage(3);
stripper.setEndPage(3);
String textp3 = stripper.getText(doc);
assertFalse(textp3.isEmpty());
assertFalse(textp3.equals(textp34));
assertFalse(textoi23.equals(textFull));
assertTrue(textoi2.equals(textp3));
// Test with orphan bookmark
PDOutlineItem oiOrphan = new PDOutlineItem();
stripper.setStartBookmark(oiOrphan);
stripper.setEndBookmark(oiOrphan);
String textOiOrphan = stripper.getText(doc);
assertTrue(textOiOrphan.isEmpty());
}
use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline in project pdfbox by apache.
the class TestPDPageTree method indexOfPageFromOutlineDestination.
@Test
public void indexOfPageFromOutlineDestination() throws IOException {
doc = PDDocument.load(TestPDPageTree.class.getResourceAsStream("with_outline.pdf"));
PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
for (PDOutlineItem current : outline.children()) {
if (current.getTitle().contains("Second")) {
assertEquals(2, doc.getPages().indexOf(current.findDestinationPage(doc)));
}
}
}
Aggregations