Search in sources :

Example 6 with PDOutlineItem

use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.

the class BookmarkValidationProcess method exploreOutlineLevel.

/**
 * This method explores the Outline Item Level and calls a validation method on each Outline Item. If an invalid
 * outline item is found, the result list is updated.
 *
 * @param ctx the preflight context.
 * @param inputItem The first outline item of the level.
 * @param firstObj The first PDF object of the level.
 * @param lastObj The last PDF object of the level.
 * @return true if all items are valid in this level.
 * @throws ValidationException
 */
protected boolean exploreOutlineLevel(PreflightContext ctx, PDOutlineItem inputItem, COSObject firstObj, COSObject lastObj) throws ValidationException {
    PDOutlineItem currentItem = inputItem;
    COSObject currentObj = firstObj;
    Set<COSObject> levelObjects = new HashSet<>();
    levelObjects.add(firstObj);
    boolean result = true;
    if (currentItem != null && inputItem.getPreviousSibling() != null) {
        addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "The value of /Prev of first object " + firstObj + " on a level is " + inputItem.getCOSObject().getItem(COSName.PREV) + ", but shouldn't exist"));
        result = false;
    }
    while (currentItem != null) {
        COSObject realPrevObject = currentObj;
        if (!validateItem(ctx, currentItem)) {
            result = false;
        }
        currentObj = toCOSObject(currentItem.getCOSObject().getItem(COSName.NEXT));
        if (levelObjects.contains(currentObj)) {
            addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Loop detected: /Next " + currentObj + " is already in the list"));
            return false;
        }
        if (realPrevObject == null) {
            // unclear if this can ever happen
            addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Outline object before " + currentObj + " is null"));
            return false;
        }
        levelObjects.add(currentObj);
        currentItem = currentItem.getNextSibling();
        if (currentItem == null) {
            if (!realPrevObject.equals(lastObj)) {
                addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Last object on a level isn't the expected /Last: " + lastObj + ", but is " + currentObj));
                result = false;
            }
        } else {
            COSObject prevObject = toCOSObject(currentItem.getCOSObject().getItem(COSName.PREV));
            if (!realPrevObject.equals(prevObject)) {
                addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "The value of /Prev at " + currentObj + " doesn't point to previous object " + realPrevObject + ", but to " + prevObject));
                result = false;
            }
        }
    }
    return result;
}
Also used : COSObject(org.apache.pdfbox.cos.COSObject) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem) HashSet(java.util.HashSet)

Example 7 with PDOutlineItem

use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.

the class BookmarkValidationProcess method validateItem.

/**
 * This method checks the inputItem dictionary and call the exploreOutlineLevel method on the first child if it is
 * not null.
 *
 * @param ctx the preflight context.
 * @param inputItem outline item to validate
 * @return the validation result.
 * @throws ValidationException
 */
protected boolean validateItem(PreflightContext ctx, PDOutlineItem inputItem) throws ValidationException {
    boolean isValid = true;
    // Dest entry isn't permitted if the A entry is present
    // A entry isn't permitted if the Dest entry is present
    // If the A entry is present, the referenced actions is validated
    COSDictionary dictionary = inputItem.getCOSObject();
    COSBase dest = dictionary.getItem(COSName.DEST);
    COSBase action = dictionary.getItem(COSName.A);
    if (!checkIndirectObjects(ctx, dictionary)) {
        return false;
    }
    if (action != null && dest != null) {
        addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Dest entry isn't permitted if the A entry is present"));
        return false;
    } else if (action != null) {
        ContextHelper.validateElement(ctx, dictionary, ACTIONS_PROCESS);
    } else if (dest != null) {
        ContextHelper.validateElement(ctx, dest, DESTINATION_PROCESS);
    }
    // else no specific validation
    // check children
    PDOutlineItem fChild = inputItem.getFirstChild();
    if (fChild != null) {
        if (!isCountEntryPresent(inputItem.getCOSObject())) {
            addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "Outline item doesn't have Count entry but has at least one descendant"));
            isValid = false;
        } else {
            COSObject firstObj = toCOSObject(dictionary.getItem(COSName.FIRST));
            COSObject lastObj = toCOSObject(dictionary.getItem(COSName.LAST));
            if ((firstObj == null && lastObj != null) || (firstObj != null && lastObj == null)) {
                addValidationError(ctx, new ValidationError(ERROR_SYNTAX_TRAILER_OUTLINES_INVALID, "/First and /Last are both required if there are outline entries"));
                isValid = false;
            }
            // there are some descendants, so dictionary must have a Count entry
            isValid = isValid && exploreOutlineLevel(ctx, fChild, firstObj, lastObj);
        }
    }
    return isValid;
}
Also used : COSDictionary(org.apache.pdfbox.cos.COSDictionary) COSObject(org.apache.pdfbox.cos.COSObject) COSBase(org.apache.pdfbox.cos.COSBase) ValidationError(org.apache.pdfbox.preflight.ValidationResult.ValidationError) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem)

Example 8 with PDOutlineItem

use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.

the class TestTextStripper method testStripByOutlineItems.

/**
 * Test whether stripping controlled by outline items works properly. The test file has 4
 * outline items at the top level, that point to 0-based pages 0, 2, 3 and 4. We are testing
 * text stripping by outlines pointing to 0-based pages 2 and 3, and also text stripping of the
 * 0-based page 2. The test makes sure that the output is different to a complete strip, not
 * empty, different to each other when different bookmark intervals are used, but identical from
 * bookmark intervals to strips with page intervals. When fed with orphan bookmarks, stripping
 * must be empty.
 *
 * @throws IOException
 * @throws URISyntaxException
 */
public void testStripByOutlineItems() throws IOException, URISyntaxException {
    PDDocument doc = PDDocument.load(new File(TestPDPageTree.class.getResource("with_outline.pdf").toURI()));
    PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
    Iterable<PDOutlineItem> children = outline.children();
    Iterator<PDOutlineItem> it = children.iterator();
    PDOutlineItem oi0 = it.next();
    PDOutlineItem oi2 = it.next();
    PDOutlineItem oi3 = it.next();
    PDOutlineItem oi4 = it.next();
    assertEquals(0, findOutlineItemDestPageNum(doc, oi0));
    assertEquals(2, findOutlineItemDestPageNum(doc, oi2));
    assertEquals(3, findOutlineItemDestPageNum(doc, oi3));
    assertEquals(4, findOutlineItemDestPageNum(doc, oi4));
    String textFull = stripper.getText(doc);
    assertFalse(textFull.isEmpty());
    String expectedTextFull = "First level 1\n" + "First level 2\n" + "Fist level 3\n" + "Some content\n" + "Some other content\n" + "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n" + "Fourth level 1\n" + "Content\n" + "Content\n";
    assertEquals(expectedTextFull, textFull.replaceAll("\r", ""));
    // this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
    // by their bookmarks
    stripper.setStartBookmark(oi2);
    stripper.setEndBookmark(oi3);
    String textoi23 = stripper.getText(doc);
    assertFalse(textoi23.isEmpty());
    assertFalse(textoi23.equals(textFull));
    String expectedTextoi23 = "Second at level 1\n" + "Second level 2\n" + "Content\n" + "Third level 1\n" + "Third level 2\n" + "Third level 3\n" + "Content\n";
    assertEquals(expectedTextoi23, textoi23.replaceAll("\r", ""));
    // this should grab 0-based pages 2 and 3, i.e. 1-based pages 3 and 4
    // by their page numbers
    stripper.setStartBookmark(null);
    stripper.setEndBookmark(null);
    stripper.setStartPage(3);
    stripper.setEndPage(4);
    String textp34 = stripper.getText(doc);
    assertFalse(textp34.isEmpty());
    assertFalse(textoi23.equals(textFull));
    assertTrue(textoi23.equals(textp34));
    // this should grab 0-based page 2, i.e. 1-based page 3
    // by the bookmark
    stripper.setStartBookmark(oi2);
    stripper.setEndBookmark(oi2);
    String textoi2 = stripper.getText(doc);
    assertFalse(textoi2.isEmpty());
    assertFalse(textoi2.equals(textoi23));
    assertFalse(textoi23.equals(textFull));
    String expectedTextoi2 = "Second at level 1\n" + "Second level 2\n" + "Content\n";
    assertEquals(expectedTextoi2, textoi2.replaceAll("\r", ""));
    // this should grab 0-based page 2, i.e. 1-based page 3
    // by the page number
    stripper.setStartBookmark(null);
    stripper.setEndBookmark(null);
    stripper.setStartPage(3);
    stripper.setEndPage(3);
    String textp3 = stripper.getText(doc);
    assertFalse(textp3.isEmpty());
    assertFalse(textp3.equals(textp34));
    assertFalse(textoi23.equals(textFull));
    assertTrue(textoi2.equals(textp3));
    // Test with orphan bookmark
    PDOutlineItem oiOrphan = new PDOutlineItem();
    stripper.setStartBookmark(oiOrphan);
    stripper.setEndBookmark(oiOrphan);
    String textOiOrphan = stripper.getText(doc);
    assertTrue(textOiOrphan.isEmpty());
}
Also used : PDDocumentOutline(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) TestPDPageTree(org.apache.pdfbox.pdmodel.TestPDPageTree) File(java.io.File) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem)

Example 9 with PDOutlineItem

use of org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem in project pdfbox by apache.

the class TestPDPageTree method indexOfPageFromOutlineDestination.

@Test
public void indexOfPageFromOutlineDestination() throws IOException {
    doc = PDDocument.load(TestPDPageTree.class.getResourceAsStream("with_outline.pdf"));
    PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
    for (PDOutlineItem current : outline.children()) {
        if (current.getTitle().contains("Second")) {
            assertEquals(2, doc.getPages().indexOf(current.findDestinationPage(doc)));
        }
    }
}
Also used : PDDocumentOutline(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline) PDOutlineItem(org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem) Test(org.junit.Test)

Aggregations

PDOutlineItem (org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem)9 PDDocumentOutline (org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline)5 File (java.io.File)3 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)3 PDActionGoTo (org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo)3 PDPageDestination (org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination)3 IOException (java.io.IOException)2 COSDictionary (org.apache.pdfbox.cos.COSDictionary)2 COSObject (org.apache.pdfbox.cos.COSObject)2 PDPage (org.apache.pdfbox.pdmodel.PDPage)2 PDDestination (org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDDestination)2 ValidationError (org.apache.pdfbox.preflight.ValidationResult.ValidationError)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 COSArray (org.apache.pdfbox.cos.COSArray)1 COSBase (org.apache.pdfbox.cos.COSBase)1 COSNumber (org.apache.pdfbox.cos.COSNumber)1 COSStream (org.apache.pdfbox.cos.COSStream)1 PDDocumentCatalog (org.apache.pdfbox.pdmodel.PDDocumentCatalog)1 PDDocumentInformation (org.apache.pdfbox.pdmodel.PDDocumentInformation)1