Search in sources :

Example 16 with EncryptedDocumentException

use of org.apache.tika.exception.EncryptedDocumentException in project tika by apache.

the class Seven7ParserTest method testPasswordProtected.

@Test
public void testPasswordProtected() throws Exception {
    Parser parser = new AutoDetectParser();
    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    // No password, will fail with EncryptedDocumentException
    boolean ex = false;
    try (InputStream stream = Seven7ParserTest.class.getResourceAsStream("/test-documents/test7Z_protected_passTika.7z")) {
        parser.parse(stream, handler, metadata, recursingContext);
        fail("Shouldn't be able to read a password protected 7z without the password");
    } catch (EncryptedDocumentException e) {
        // Good
        ex = true;
    }
    assertTrue("test no password", ex);
    ex = false;
    // Wrong password currently silently gives no content
    // Ideally we'd like Commons Compress to give an error, but it doesn't...
    recursingContext.set(PasswordProvider.class, new PasswordProvider() {

        @Override
        public String getPassword(Metadata metadata) {
            return "wrong";
        }
    });
    handler = new BodyContentHandler();
    try (InputStream stream = Seven7ParserTest.class.getResourceAsStream("/test-documents/test7Z_protected_passTika.7z")) {
        parser.parse(stream, handler, metadata, recursingContext);
        fail("Shouldn't be able to read a password protected 7z with wrong password");
    } catch (TikaException e) {
        //if JCE is installed, the cause will be: Caused by: org.tukaani.xz.CorruptedInputException: Compressed data is corrupt
        //if JCE is not installed, the message will include
        // "(do you have the JCE  Unlimited Strength Jurisdiction Policy Files installed?")
        ex = true;
    }
    assertTrue("TikaException for bad password", ex);
    // Will be empty
    assertEquals("", handler.toString());
    ex = false;
    // Right password works fine if JCE Unlimited Strength has been installed!!!
    if (isStrongCryptoAvailable()) {
        recursingContext.set(PasswordProvider.class, new PasswordProvider() {

            @Override
            public String getPassword(Metadata metadata) {
                return "Tika";
            }
        });
        handler = new BodyContentHandler();
        try (InputStream stream = Seven7ParserTest.class.getResourceAsStream("/test-documents/test7Z_protected_passTika.7z")) {
            parser.parse(stream, handler, metadata, recursingContext);
        }
        assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE));
        String content = handler.toString();
        // Should get filename
        assertContains("text.txt", content);
        // Should get contents from the text file in the 7z file
        assertContains("TEST DATA FOR TIKA.", content);
        assertContains("This is text inside an encrypted 7zip (7z) file.", content);
        assertContains("It should be processed by Tika just fine!", content);
        assertContains("TIKA-1521", content);
    } else {
        //if jce is not installed, test for IOException wrapped in TikaException
        boolean ioe = false;
        recursingContext.set(PasswordProvider.class, new PasswordProvider() {

            @Override
            public String getPassword(Metadata metadata) {
                return "Tika";
            }
        });
        handler = new BodyContentHandler();
        try (InputStream stream = Seven7ParserTest.class.getResourceAsStream("/test-documents/test7Z_protected_passTika.7z")) {
            parser.parse(stream, handler, metadata, recursingContext);
        } catch (TikaException e) {
            ioe = true;
        }
        assertTrue("IOException because JCE was not installed", ioe);
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) EncryptedDocumentException(org.apache.tika.exception.EncryptedDocumentException) TikaException(org.apache.tika.exception.TikaException) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) AutoDetectParser(org.apache.tika.parser.AutoDetectParser) PasswordProvider(org.apache.tika.parser.PasswordProvider) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Parser(org.apache.tika.parser.Parser) AutoDetectParser(org.apache.tika.parser.AutoDetectParser) Test(org.junit.Test)

Aggregations

EncryptedDocumentException (org.apache.tika.exception.EncryptedDocumentException)16 PasswordProvider (org.apache.tika.parser.PasswordProvider)10 Metadata (org.apache.tika.metadata.Metadata)9 TikaInputStream (org.apache.tika.io.TikaInputStream)8 InputStream (java.io.InputStream)7 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)7 Test (org.junit.Test)7 TikaTest (org.apache.tika.TikaTest)6 TikaException (org.apache.tika.exception.TikaException)6 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)6 ParseContext (org.apache.tika.parser.ParseContext)6 Parser (org.apache.tika.parser.Parser)6 ContentHandler (org.xml.sax.ContentHandler)5 TemporaryResources (org.apache.tika.io.TemporaryResources)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 GeneralSecurityException (java.security.GeneralSecurityException)2 ZipArchiveEntry (org.apache.commons.compress.archivers.zip.ZipArchiveEntry)2 CloseShieldInputStream (org.apache.commons.io.input.CloseShieldInputStream)2 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)2