use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.
the class MainActivity method stripText.
/**
* Strips the text from a PDF and displays the text on screen
*/
public void stripText(View v) {
String parsedText = null;
PDDocument document = null;
try {
document = PDDocument.load(assetManager.open("Hello.pdf"));
} catch (IOException e) {
Log.e("PdfBox-Android-Sample", "Exception thrown while loading document to strip", e);
}
try {
PDFTextStripper pdfStripper = new PDFTextStripper();
pdfStripper.setStartPage(0);
pdfStripper.setEndPage(1);
parsedText = "Parsed text: " + pdfStripper.getText(document);
} catch (IOException e) {
Log.e("PdfBox-Android-Sample", "Exception thrown while stripping text", e);
} finally {
try {
if (document != null)
document.close();
} catch (IOException e) {
Log.e("PdfBox-Android-Sample", "Exception thrown while closing document", e);
}
}
tv.setText(parsedText);
}
use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.
the class TestFontEmbedding method getUnicodeText.
private String getUnicodeText(File file) throws IOException {
PDDocument document = PDDocument.load(file);
PDFTextStripper stripper = new PDFTextStripper();
return stripper.getText(document);
}
use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.
the class PDFontTest method testPDFBox3826checkFonts.
private void testPDFBox3826checkFonts(byte[] byteArray, File fontFile) throws IOException {
PDDocument doc = PDDocument.load(byteArray);
PDPage page2 = doc.getPage(0);
// F1 = type0 subset
PDType0Font fontF1 = (PDType0Font) page2.getResources().getFont(COSName.getPDFName("F1"));
Assert.assertTrue(fontF1.getName().contains("+"));
Assert.assertTrue(fontFile.length() > fontF1.getFontDescriptor().getFontFile2().toByteArray().length);
// F2 = type0 full embed
PDType0Font fontF2 = (PDType0Font) page2.getResources().getFont(COSName.getPDFName("F2"));
Assert.assertFalse(fontF2.getName().contains("+"));
Assert.assertEquals(fontFile.length(), fontF2.getFontDescriptor().getFontFile2().toByteArray().length);
// F3 = tt full embed
PDTrueTypeFont fontF3 = (PDTrueTypeFont) page2.getResources().getFont(COSName.getPDFName("F3"));
Assert.assertFalse(fontF2.getName().contains("+"));
Assert.assertEquals(fontFile.length(), fontF3.getFontDescriptor().getFontFile2().toByteArray().length);
new PDFRenderer(doc).renderImage(0);
PDFTextStripper stripper = new PDFTextStripper();
stripper.setLineSeparator("\n");
String text = stripper.getText(doc);
Assert.assertEquals("testMultipleFontFileReuse1\ntestMultipleFontFileReuse2\ntestMultipleFontFileReuse3", text.trim());
doc.close();
}
use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.
the class PDFontTest method testPDFBOX4115.
/**
* PDFBOX-4115: Test ability to create PDF with german umlaut glyphs with a type 1 font.
* Test for everything that went wrong before this was fixed.
*
* @throws IOException
*/
@Test
public void testPDFBOX4115() throws IOException {
File fontFile = TestResourceGenerator.downloadTestResource(IN_DIR, "n019003l.pfb", "https://issues.apache.org/jira/secure/attachment/12911053/n019003l.pfb");
assumeTrue(fontFile.exists());
File outputFile = new File(OUT_DIR, "FontType1.pdf");
String text = "äöüÄÖÜ";
PDDocument doc = new PDDocument();
PDPage page = new PDPage();
PDPageContentStream contentStream = new PDPageContentStream(doc, page);
PDType1Font font = new PDType1Font(doc, new FileInputStream(fontFile), WinAnsiEncoding.INSTANCE);
contentStream.beginText();
contentStream.setFont(font, 10);
contentStream.newLineAtOffset(10, 700);
contentStream.showText(text);
contentStream.endText();
contentStream.close();
doc.addPage(page);
doc.save(outputFile);
doc.close();
doc = PDDocument.load(outputFile);
font = (PDType1Font) doc.getPage(0).getResources().getFont(COSName.getPDFName("F1"));
Assert.assertEquals(font.getEncoding(), WinAnsiEncoding.INSTANCE);
for (char c : text.toCharArray()) {
String name = font.getEncoding().getName(c);
Assert.assertEquals("dieresis", name.substring(1));
Assert.assertFalse(font.getPath(name).isEmpty());
}
PDFTextStripper stripper = new PDFTextStripper();
Assert.assertEquals(text, stripper.getText(doc).trim());
doc.close();
}
use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.
the class TestPublicKeyEncryption method reload.
/**
* Reloads the given document from a file and check some contents.
*
* @param file input file
* @param decryptionPassword password to be used to decrypt the doc
* @param keyStore password to be used to decrypt the doc
* @return reloaded document
* @throws Exception if
*/
private PDDocument reload(File file, String decryptionPassword, InputStream keyStore) throws IOException, NoSuchAlgorithmException {
PDDocument doc2 = PDDocument.load(file, decryptionPassword, keyStore, null, MemoryUsageSetting.setupMainMemoryOnly());
Assert.assertEquals("Extracted text is different", text, new PDFTextStripper().getText(doc2));
Assert.assertEquals("Producer is different", producer, doc2.getDocumentInformation().getProducer());
return doc2;
}
Aggregations