use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.
the class BookmarkValidationProcess method exploreOutlineLevel.
/**
* This method explores the Outline Item Level and calls a validation method on each Outline Item. If an invalid
* outline item is found, the result list is updated.
*
* @param ctx the preflight context.
* @param inputItem The first outline item of the level.
* @param firstObj The first PDF object of the level.
* @param lastObj The last PDF object of the level.
* @return true if all items are valid in this level.
* @throws ValidationException
*/
protected boolean exploreOutlineLevel(PreflightContext ctx, PDOutlineItem inputItem, COSObject firstObj, COSObject lastObj) throws ValidationException {
PDOutlineItem currentItem = inputItem;
COSObject currentObj = firstObj;
Set<COSObject> levelObjects = new HashSet<>();
levelObjects.add(firstObj);
boolean result = true;
if (currentItem != null && inputItem.getPreviousSibling() != null) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "The value of /Prev of first object " + firstObj + " on a level is " + inputItem.getCOSObject().getItem(COSName.PREV) + ", but shouldn't exist"));
result = false;
}
while (currentItem != null) {
COSObject realPrevObject = currentObj;
if (!validateItem(ctx, currentItem)) {
result = false;
}
currentObj = toCOSObject(currentItem.getCOSObject().getItem(COSName.NEXT));
if (levelObjects.contains(currentObj)) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Loop detected: /Next " + currentObj + " is already in the list"));
return false;
}
if (realPrevObject == null) {
// unclear if this can ever happen
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Outline object before " + currentObj + " is null"));
return false;
}
levelObjects.add(currentObj);
currentItem = currentItem.getNextSibling();
if (currentItem == null) {
if (!realPrevObject.equals(lastObj)) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Last object on a level isn't the expected /Last: " + lastObj + ", but is " + currentObj));
result = false;
}
} else {
COSObject prevObject = toCOSObject(currentItem.getCOSObject().getItem(COSName.PREV));
if (!realPrevObject.equals(prevObject)) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "The value of /Prev at " + currentObj + " doesn't point to previous object " + realPrevObject + ", but to " + prevObject));
result = false;
}
}
}
return result;
}
use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.
the class BookmarkValidationProcess method validateItem.
/**
* This method checks the inputItem dictionary and call the exploreOutlineLevel method on the first child if it is
* not null.
*
* @param ctx the preflight context.
* @param inputItem outline item to validate
* @return the validation result.
* @throws ValidationException
*/
protected boolean validateItem(PreflightContext ctx, PDOutlineItem inputItem) throws ValidationException {
boolean isValid = true;
// Dest entry isn't permitted if the A entry is present
// A entry isn't permitted if the Dest entry is present
// If the A entry is present, the referenced actions is validated
COSDictionary dictionary = inputItem.getCOSObject();
COSBase dest = dictionary.getItem(COSName.DEST);
COSBase action = dictionary.getItem(COSName.A);
if (!checkIndirectObjects(ctx, dictionary)) {
return false;
}
if (action != null && dest != null) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Dest entry isn't permitted if the A entry is present"));
return false;
} else if (action != null) {
ContextHelper.validateElement(ctx, dictionary, ACTIONS_PROCESS);
} else if (dest != null) {
ContextHelper.validateElement(ctx, dest, DESTINATION_PROCESS);
}
// else no specific validation
// check children
PDOutlineItem fChild = inputItem.getFirstChild();
if (fChild != null) {
if (!isCountEntryPresent(inputItem.getCOSObject())) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Outline item doesn't have Count entry but has at least one descendant"));
isValid = false;
} else {
COSObject firstObj = toCOSObject(dictionary.getItem(COSName.FIRST));
COSObject lastObj = toCOSObject(dictionary.getItem(COSName.LAST));
if ((firstObj == null && lastObj != null) || (firstObj != null && lastObj == null)) {
addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "/First and /Last are both required if there are outline entries"));
isValid = false;
}
// there are some descendants, so dictionary must have a Count entry
isValid = isValid && exploreOutlineLevel(ctx, fChild, firstObj, lastObj);
}
}
return isValid;
}
use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.
the class TestTextStripper method testStripByOutlineItems.
/**
* Test whether stripping controlled by outline items works properly. The test file has 4
* outline items at the top level, that point to 0-based pages 0, 2, 3 and 4. We are testing
* text stripping by outlines pointing to 0-based pages 2 and 3, and also text stripping of the
* 0-based page 2. The test makes sure that the output is different to a complete strip, not
* empty, different to each other when different bookmark intervals are used, but identical from
* bookmark intervals to strips with page intervals. When fed with orphan bookmarks, stripping
* must be empty.
*
* @throws IOException
* @throws URISyntaxException
*/
public void testStripByOutlineItems() throws IOException, URISyntaxException {
PDDocument doc = PDDocument.load(new File(TestPDPageTree.class.getResource("with_outline.pdf").toURI()));
PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
Iterable<PDOutlineItem> children = outline.children();
Iterator<PDOutlineItem> it = children.iterator();
PDOutlineItem oi0 = it.next();
PDOutlineItem oi2 = it.next();
PDOutlineItem oi3 = it.next();
PDOutlineItem oi4 = it.next();
assertEquals(0, findOutlineItemDestPageNum(doc, oi0));
assertEquals(2, findOutlineItemDestPageNum(doc, oi2));
assertEquals(3, findOutlineItemDestPageNum(doc, oi3));
assertEquals(4, findOutlineItemDestPageNum(doc, oi4));
String textFull = stripper.getText(doc);
assertFalse(textFull.isEmpty());
String expectedTextFull = "First level 1\n" + "First level 2\n" + "Fist level 3\n" + "Some content\n" + "Some other content\n" + "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n" + "Fourth level 1\n" + "Content\n" + "Content\n";
assertEquals(expectedTextFull, textFull.replaceAll("\r", ""));
// this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
// by their bookmarks
stripper.setStartBookmark(oi2);
stripper.setEndBookmark(oi3);
String textoi23 = stripper.getText(doc);
assertFalse(textoi23.isEmpty());
assertFalse(textoi23.equals(textFull));
String expectedTextoi23 = "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n";
assertEquals(expectedTextoi23, textoi23.replaceAll("\r", ""));
// this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
// by their page numbers
stripper.setStartBookmark(null);
stripper.setEndBookmark(null);
stripper.setStartPage(3);
stripper.setEndPage(4);
String textp34 = stripper.getText(doc);
assertFalse(textp34.isEmpty());
assertFalse(textoi23.equals(textFull));
assertTrue(textoi23.equals(textp34));
// this should grab 0-based page 2, i.e. 1-based page 3
// by the bookmark
stripper.setStartBookmark(oi2);
stripper.setEndBookmark(oi2);
String textoi2 = stripper.getText(doc);
assertFalse(textoi2.isEmpty());
assertFalse(textoi2.equals(textoi23));
assertFalse(textoi23.equals(textFull));
String expectedTextoi2 = "Second at level 1\n" + "Second level 2\n" + "Content\n";
assertEquals(expectedTextoi2, textoi2.replaceAll("\r", ""));
// this should grab 0-based page 2, i.e. 1-based page 3
// by the page number
stripper.setStartBookmark(null);
stripper.setEndBookmark(null);
stripper.setStartPage(3);
stripper.setEndPage(3);
String textp3 = stripper.getText(doc);
assertFalse(textp3.isEmpty());
assertFalse(textp3.equals(textp34));
assertFalse(textoi23.equals(textFull));
assertTrue(textoi2.equals(textp3));
// Test with orphan bookmark
PDOutlineItem oiOrphan = new PDOutlineItem();
stripper.setStartBookmark(oiOrphan);
stripper.setEndBookmark(oiOrphan);
String textOiOrphan = stripper.getText(doc);
assertTrue(textOiOrphan.isEmpty());
}
use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.
the class TestPDPageTree method indexOfPageFromOutlineDestination.
@Test
public void indexOfPageFromOutlineDestination() throws IOException {
doc = PDDocument.load(TestPDPageTree.class.getResourceAsStream("with_outline.pdf"));
PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
for (PDOutlineItem current : outline.children()) {
if (current.getTitle().contains("Second")) {
assertEquals(2, doc.getPages().indexOf(current.findDestinationPage(doc)));
}
}
}
Aggregations