Search in sources :

Example 1 with PDFTextStripper

use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.

the class MainActivity method stripText.

/**
 * Strips the text from a PDF and displays the text on screen
 */
public void stripText(View v) {
    String parsedText = null;
    PDDocument document = null;
    try {
        document = PDDocument.load(assetManager.open("Hello.pdf"));
    } catch (IOException e) {
        Log.e("PdfBox-Android-Sample", "Exception thrown while loading document to strip", e);
    }
    try {
        PDFTextStripper pdfStripper = new PDFTextStripper();
        pdfStripper.setStartPage(0);
        pdfStripper.setEndPage(1);
        parsedText = "Parsed text: " + pdfStripper.getText(document);
    } catch (IOException e) {
        Log.e("PdfBox-Android-Sample", "Exception thrown while stripping text", e);
    } finally {
        try {
            if (document != null)
                document.close();
        } catch (IOException e) {
            Log.e("PdfBox-Android-Sample", "Exception thrown while closing document", e);
        }
    }
    tv.setText(parsedText);
}
Also used : PDDocument(com.tom_roush.pdfbox.pdmodel.PDDocument) IOException(java.io.IOException) PDFTextStripper(com.tom_roush.pdfbox.text.PDFTextStripper)

Example 2 with PDFTextStripper

use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.

the class TestFontEmbedding method getUnicodeText.

private String getUnicodeText(File file) throws IOException {
    PDDocument document = PDDocument.load(file);
    PDFTextStripper stripper = new PDFTextStripper();
    return stripper.getText(document);
}
Also used : PDDocument(com.tom_roush.pdfbox.pdmodel.PDDocument) PDFTextStripper(com.tom_roush.pdfbox.text.PDFTextStripper)

Example 3 with PDFTextStripper

use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.

the class PDFontTest method testPDFBox3826checkFonts.

private void testPDFBox3826checkFonts(byte[] byteArray, File fontFile) throws IOException {
    PDDocument doc = PDDocument.load(byteArray);
    PDPage page2 = doc.getPage(0);
    // F1 = type0 subset
    PDType0Font fontF1 = (PDType0Font) page2.getResources().getFont(COSName.getPDFName("F1"));
    Assert.assertTrue(fontF1.getName().contains("+"));
    Assert.assertTrue(fontFile.length() > fontF1.getFontDescriptor().getFontFile2().toByteArray().length);
    // F2 = type0 full embed
    PDType0Font fontF2 = (PDType0Font) page2.getResources().getFont(COSName.getPDFName("F2"));
    Assert.assertFalse(fontF2.getName().contains("+"));
    Assert.assertEquals(fontFile.length(), fontF2.getFontDescriptor().getFontFile2().toByteArray().length);
    // F3 = tt full embed
    PDTrueTypeFont fontF3 = (PDTrueTypeFont) page2.getResources().getFont(COSName.getPDFName("F3"));
    Assert.assertFalse(fontF2.getName().contains("+"));
    Assert.assertEquals(fontFile.length(), fontF3.getFontDescriptor().getFontFile2().toByteArray().length);
    new PDFRenderer(doc).renderImage(0);
    PDFTextStripper stripper = new PDFTextStripper();
    stripper.setLineSeparator("\n");
    String text = stripper.getText(doc);
    Assert.assertEquals("testMultipleFontFileReuse1\ntestMultipleFontFileReuse2\ntestMultipleFontFileReuse3", text.trim());
    doc.close();
}
Also used : PDPage(com.tom_roush.pdfbox.pdmodel.PDPage) PDDocument(com.tom_roush.pdfbox.pdmodel.PDDocument) PDFRenderer(com.tom_roush.pdfbox.rendering.PDFRenderer) PDFTextStripper(com.tom_roush.pdfbox.text.PDFTextStripper)

Example 4 with PDFTextStripper

use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.

the class PDFontTest method testPDFBOX4115.

/**
 * PDFBOX-4115: Test ability to create PDF with german umlaut glyphs with a type 1 font.
 * Test for everything that went wrong before this was fixed.
 *
 * @throws IOException
 */
@Test
public void testPDFBOX4115() throws IOException {
    File fontFile = TestResourceGenerator.downloadTestResource(IN_DIR, "n019003l.pfb", "https://issues.apache.org/jira/secure/attachment/12911053/n019003l.pfb");
    assumeTrue(fontFile.exists());
    File outputFile = new File(OUT_DIR, "FontType1.pdf");
    String text = "äöüÄÖÜ";
    PDDocument doc = new PDDocument();
    PDPage page = new PDPage();
    PDPageContentStream contentStream = new PDPageContentStream(doc, page);
    PDType1Font font = new PDType1Font(doc, new FileInputStream(fontFile), WinAnsiEncoding.INSTANCE);
    contentStream.beginText();
    contentStream.setFont(font, 10);
    contentStream.newLineAtOffset(10, 700);
    contentStream.showText(text);
    contentStream.endText();
    contentStream.close();
    doc.addPage(page);
    doc.save(outputFile);
    doc.close();
    doc = PDDocument.load(outputFile);
    font = (PDType1Font) doc.getPage(0).getResources().getFont(COSName.getPDFName("F1"));
    Assert.assertEquals(font.getEncoding(), WinAnsiEncoding.INSTANCE);
    for (char c : text.toCharArray()) {
        String name = font.getEncoding().getName(c);
        Assert.assertEquals("dieresis", name.substring(1));
        Assert.assertFalse(font.getPath(name).isEmpty());
    }
    PDFTextStripper stripper = new PDFTextStripper();
    Assert.assertEquals(text, stripper.getText(doc).trim());
    doc.close();
}
Also used : PDPage(com.tom_roush.pdfbox.pdmodel.PDPage) PDDocument(com.tom_roush.pdfbox.pdmodel.PDDocument) PDPageContentStream(com.tom_roush.pdfbox.pdmodel.PDPageContentStream) File(java.io.File) FileInputStream(java.io.FileInputStream) PDFTextStripper(com.tom_roush.pdfbox.text.PDFTextStripper) Test(org.junit.Test)

Example 5 with PDFTextStripper

use of com.tom_roush.pdfbox.text.PDFTextStripper in project PdfBox-Android by TomRoush.

the class TestPublicKeyEncryption method reload.

/**
 * Reloads the given document from a file and check some contents.
 *
 * @param file input file
 * @param decryptionPassword password to be used to decrypt the doc
 * @param keyStore password to be used to decrypt the doc
 * @return reloaded document
 * @throws Exception if
 */
private PDDocument reload(File file, String decryptionPassword, InputStream keyStore) throws IOException, NoSuchAlgorithmException {
    PDDocument doc2 = PDDocument.load(file, decryptionPassword, keyStore, null, MemoryUsageSetting.setupMainMemoryOnly());
    Assert.assertEquals("Extracted text is different", text, new PDFTextStripper().getText(doc2));
    Assert.assertEquals("Producer is different", producer, doc2.getDocumentInformation().getProducer());
    return doc2;
}
Also used : PDDocument(com.tom_roush.pdfbox.pdmodel.PDDocument) PDFTextStripper(com.tom_roush.pdfbox.text.PDFTextStripper)

Aggregations

PDFTextStripper (com.tom_roush.pdfbox.text.PDFTextStripper)7 PDDocument (com.tom_roush.pdfbox.pdmodel.PDDocument)6 PDPage (com.tom_roush.pdfbox.pdmodel.PDPage)3 PDPageContentStream (com.tom_roush.pdfbox.pdmodel.PDPageContentStream)2 File (java.io.File)2 AccessPermission (com.tom_roush.pdfbox.pdmodel.encryption.AccessPermission)1 PDFRenderer (com.tom_roush.pdfbox.rendering.PDFRenderer)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 Before (org.junit.Before)1 Test (org.junit.Test)1