use of org.apache.pdfbox.multipdf.Splitter in project pdfbox by apache.
the class PDFSplit method split.
private void split(String[] args) throws IOException {
String password = "";
String split = null;
String startPage = null;
String endPage = null;
Splitter splitter = new Splitter();
String pdfFile = null;
String outputPrefix = null;
for (int i = 0; i < args.length; i++) {
switch(args[i]) {
case PASSWORD:
i++;
if (i >= args.length) {
usage();
}
password = args[i];
break;
case SPLIT:
i++;
if (i >= args.length) {
usage();
}
split = args[i];
break;
case START_PAGE:
i++;
if (i >= args.length) {
usage();
}
startPage = args[i];
break;
case END_PAGE:
i++;
if (i >= args.length) {
usage();
}
endPage = args[i];
break;
case OUTPUT_PREFIX:
i++;
outputPrefix = args[i];
break;
default:
if (pdfFile == null) {
pdfFile = args[i];
}
break;
}
}
if (pdfFile == null) {
usage();
} else {
if (outputPrefix == null) {
outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.'));
}
PDDocument document = null;
List<PDDocument> documents = null;
try {
document = PDDocument.load(new File(pdfFile), password);
int numberOfPages = document.getNumberOfPages();
boolean startEndPageSet = false;
if (startPage != null) {
splitter.setStartPage(Integer.parseInt(startPage));
startEndPageSet = true;
if (split == null) {
splitter.setSplitAtPage(numberOfPages);
}
}
if (endPage != null) {
splitter.setEndPage(Integer.parseInt(endPage));
startEndPageSet = true;
if (split == null) {
splitter.setSplitAtPage(Integer.parseInt(endPage));
}
}
if (split != null) {
splitter.setSplitAtPage(Integer.parseInt(split));
} else {
if (!startEndPageSet) {
splitter.setSplitAtPage(1);
}
}
documents = splitter.split(document);
for (int i = 0; i < documents.size(); i++) {
try (PDDocument doc = documents.get(i)) {
String fileName = outputPrefix + "-" + (i + 1) + ".pdf";
writeDocument(doc, fileName);
}
}
} finally {
if (document != null) {
document.close();
}
for (int i = 0; documents != null && i < documents.size(); i++) {
PDDocument doc = documents.get(i);
doc.close();
}
}
}
}
use of org.apache.pdfbox.multipdf.Splitter in project tess4j by nguyenq.
the class PdfBoxUtilities method splitPdf.
/**
* Splits PDF.
*
* @param inputPdfFile input file
* @param outputPdfFile output file
* @param firstPage begin page
* @param lastPage end page
*/
public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
PDDocument document = null;
try {
document = PDDocument.load(inputPdfFile);
Splitter splitter = new Splitter();
splitter.setStartPage(firstPage);
splitter.setEndPage(lastPage);
splitter.setSplitAtPage(lastPage - firstPage + 1);
List<PDDocument> documents = splitter.split(document);
if (documents.size() == 1) {
PDDocument outputPdf = documents.get(0);
outputPdf.save(outputPdfFile);
outputPdf.close();
} else {
logger.error("Splitter returned " + documents.size() + " documents rather than expected of 1");
}
} catch (IOException ioe) {
logger.error("Exception splitting PDF => " + ioe);
} finally {
if (document != null) {
try {
document.close();
} catch (Exception e) {
}
}
}
}
use of org.apache.pdfbox.multipdf.Splitter in project estatio by estatio.
the class PdfManipulator method extractAndStamp.
@Programmatic
public byte[] extractAndStamp(final byte[] docBytes, final ExtractSpec extractSpec, final Stamp stamp) throws IOException {
List<byte[]> extractedPageDocBytes = Lists.newArrayList();
final PDDocument pdDoc = PDDocument.load(docBytes);
try {
final Splitter splitter = new Splitter();
final List<PDDocument> splitDocs = splitter.split(pdDoc);
final int sizeOfDoc = splitDocs.size();
final Integer[] pageNums = extractSpec.pageNumbersFor(sizeOfDoc);
for (Integer pageNum : pageNums) {
final PDDocument docOfExtractedPage = splitDocs.get(pageNum);
if (stamp != null) {
final List<Line> leftLines = stamp.getLeftLines();
final List<Line> rightLines = stamp.getRightLines();
leftLines.add(new Line(String.format("Page: %d/%d", (pageNum + 1), sizeOfDoc), TEXT_COLOR, null));
stamp.appendHyperlinkIfAnyTo(leftLines);
extractedPageDocBytes.add(stamp(docOfExtractedPage, leftLines, rightLines));
} else {
extractedPageDocBytes.add(asBytes(docOfExtractedPage));
}
}
for (PDDocument splitDoc : splitDocs) {
splitDoc.close();
}
} finally {
pdDoc.close();
}
final byte[] mergedBytes = pdfBoxService.merge(extractedPageDocBytes.toArray(new byte[][] {}));
return mergedBytes;
}
Aggregations