use of org.apache.pdfbox.cos.COSObject in project lucene-solr by apache.
the class ReducePDFSize method main.
public static void main(String[] args) throws IOException {
if (2 != args.length) {
throw new RuntimeException("arg0 must be input file, org1 must be output file");
}
String in = args[0];
String out = args[1];
PDDocument doc = null;
try {
doc = PDDocument.load(new File(in));
doc.setAllSecurityToBeRemoved(true);
for (COSObject cosObject : doc.getDocument().getObjects()) {
COSBase base = cosObject.getObject();
// if it's a stream: decode it, then re-write it using FLATE_DECODE
if (base instanceof COSStream) {
COSStream stream = (COSStream) base;
byte[] bytes;
try {
bytes = new PDStream(stream).toByteArray();
} catch (IOException ex) {
// IIUC then that will leave the original (non-decoded / non-flated) stream in place?
throw new RuntimeException("can't serialize byte[] from: " + cosObject.getObjectNumber() + " " + cosObject.getGenerationNumber() + " obj: " + ex.getMessage(), ex);
}
stream.removeItem(COSName.FILTER);
OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE);
streamOut.write(bytes);
streamOut.close();
}
}
doc.getDocumentCatalog();
doc.save(out);
} finally {
if (doc != null) {
doc.close();
}
}
}
Aggregations