Search in sources :

Example 61 with POIFSFileSystem

use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project poi by apache.

the class TestDecryptor method bug60320.

@Test
public void bug60320() throws IOException, GeneralSecurityException {
    int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
    Assume.assumeTrue("Please install JCE Unlimited Strength Jurisdiction Policy files for AES 256", maxKeyLen == 2147483647);
    InputStream is = POIDataSamples.getPOIFSInstance().openResourceAsStream("60320-protected.xlsx");
    POIFSFileSystem fs = new POIFSFileSystem(is);
    is.close();
    EncryptionInfo info = new EncryptionInfo(fs);
    Decryptor d = Decryptor.getInstance(info);
    boolean b = d.verifyPassword("Test001!!");
    assertTrue(b);
    zipOk(fs.getRoot(), d);
    fs.close();
}
Also used : ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) Test(org.junit.Test)

Example 62 with POIFSFileSystem

use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project poi by apache.

the class TestEncryptionInfo method testEncryptionInfo.

@Test
public void testEncryptionInfo() throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(POIDataSamples.getPOIFSInstance().openResourceAsStream("protect.xlsx"));
    EncryptionInfo info = new EncryptionInfo(fs);
    assertEquals(3, info.getVersionMajor());
    assertEquals(2, info.getVersionMinor());
    assertEquals(CipherAlgorithm.aes128, info.getHeader().getCipherAlgorithm());
    assertEquals(HashAlgorithm.sha1, info.getHeader().getHashAlgorithm());
    assertEquals(128, info.getHeader().getKeySize());
    assertEquals(32, info.getVerifier().getEncryptedVerifierHash().length);
    assertEquals(CipherProvider.aes, info.getHeader().getCipherProvider());
    assertEquals("Microsoft Enhanced RSA and AES Cryptographic Provider", info.getHeader().getCspName());
    fs.close();
}
Also used : POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) Test(org.junit.Test)

Example 63 with POIFSFileSystem

use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project poi by apache.

the class TestEncryptor method binaryRC4Encryption.

@Test
public void binaryRC4Encryption() throws Exception {
    // please contribute a real sample file, which is binary rc4 encrypted
    // ... at least the output can be opened in Excel Viewer 
    String password = "pass";
    InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("SimpleMultiCell.xlsx");
    ByteArrayOutputStream payloadExpected = new ByteArrayOutputStream();
    IOUtils.copy(is, payloadExpected);
    is.close();
    POIFSFileSystem fs = new POIFSFileSystem();
    EncryptionInfo ei = new EncryptionInfo(EncryptionMode.binaryRC4);
    Encryptor enc = ei.getEncryptor();
    enc.confirmPassword(password);
    OutputStream os = enc.getDataStream(fs.getRoot());
    payloadExpected.writeTo(os);
    os.close();
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    fs.writeFilesystem(bos);
    fs = new POIFSFileSystem(new ByteArrayInputStream(bos.toByteArray()));
    ei = new EncryptionInfo(fs);
    Decryptor dec = ei.getDecryptor();
    boolean b = dec.verifyPassword(password);
    assertTrue(b);
    ByteArrayOutputStream payloadActual = new ByteArrayOutputStream();
    is = dec.getDataStream(fs.getRoot());
    IOUtils.copy(is, payloadActual);
    is.close();
    assertArrayEquals(payloadExpected.toByteArray(), payloadActual.toByteArray());
}
Also used : AgileDecryptor(org.apache.poi.poifs.crypt.agile.AgileDecryptor) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) BoundedInputStream(org.apache.poi.util.BoundedInputStream) InputStream(java.io.InputStream) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 64 with POIFSFileSystem

use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project tika by apache.

the class QPWTextExtractor method extract.

@SuppressWarnings("resource")
public void extract(InputStream input, XHTMLContentHandler xhtml, Metadata metadata) throws IOException, SAXException, TikaException {
    POIFSFileSystem pfs = new POIFSFileSystem(input);
    DirectoryNode rootNode = pfs.getRoot();
    if (rootNode == null || !rootNode.hasEntry(OLE_DOCUMENT_NAME)) {
        throw new UnsupportedFormatException("Unsupported QuattroPro file format. " + "Looking for OLE entry \"" + OLE_DOCUMENT_NAME + "\". Found: " + (rootNode == null ? "null" : rootNode.getEntryNames()));
    }
    //TODO shall we validate and throw warning/error if the file does not 
    //start with a BOF and ends with a EOF?
    xhtml.startElement("p");
    try (WPInputStream in = new WPInputStream(pfs.createDocumentInputStream(OLE_DOCUMENT_NAME))) {
        Context ctx = new Context(in, xhtml, metadata);
        while (hasNext(in)) {
            ctx.type = in.readWPShort();
            ctx.bodyLength = in.readWPShort();
            Extractor extractor = EXTRACTORS.get(ctx.type);
            if (extractor != null) {
                extractor.extract(ctx);
            } else {
                // Use DEBUG to find out what we are ignoring
                //                    Extractor.DEBUG.extract(ctx);
                Extractor.IGNORE.extract(ctx);
            }
        }
    }
    xhtml.endElement("p");
}
Also used : UnsupportedFormatException(org.apache.tika.exception.UnsupportedFormatException) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode)

Example 65 with POIFSFileSystem

use of org.apache.poi.poifs.filesystem.POIFSFileSystem in project tika by apache.

the class AbstractOOXMLExtractor method handleEmbeddedOLE.

/**
     * Handles an embedded OLE object in the document
     */
private void handleEmbeddedOLE(PackagePart part, ContentHandler handler, String rel, Metadata parentMetadata) throws IOException, SAXException {
    // A POIFSFileSystem needs to be at least 3 blocks big to be valid
    if (part.getSize() >= 0 && part.getSize() < 512 * 3) {
        // Too small, skip
        return;
    }
    InputStream is = part.getInputStream();
    // Open the POIFS (OLE2) structure and process
    POIFSFileSystem fs = null;
    try {
        fs = new POIFSFileSystem(part.getInputStream());
    } catch (Exception e) {
        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
        return;
    }
    TikaInputStream stream = null;
    try {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, rel);
        DirectoryNode root = fs.getRoot();
        POIFSDocumentType type = POIFSDocumentType.detectType(root);
        if (root.hasEntry("CONTENTS") && root.hasEntry("Ole") && root.hasEntry("CompObj")) {
            // TIKA-704: OLE 2.0 embedded non-Office document?
            //TODO: figure out if the equivalent of OLE 1.0's
            //getCommand() and getFileName() exist for OLE 2.0 to populate
            //TikaCoreProperties.ORIGINAL_RESOURCE_NAME
            stream = TikaInputStream.get(fs.createDocumentInputStream("CONTENTS"));
            if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                embeddedExtractor.parseEmbedded(stream, new EmbeddedContentHandler(handler), metadata, false);
            }
        } else if (POIFSDocumentType.OLE10_NATIVE == type) {
            // TIKA-704: OLE 1.0 embedded document
            Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(fs);
            if (ole.getLabel() != null) {
                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
            }
            if (ole.getCommand() != null) {
                metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getCommand());
            }
            if (ole.getFileName() != null) {
                metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getFileName());
            }
            byte[] data = ole.getDataBuffer();
            if (data != null) {
                stream = TikaInputStream.get(data);
            }
            if (stream != null && embeddedExtractor.shouldParseEmbedded(metadata)) {
                embeddedExtractor.parseEmbedded(stream, new EmbeddedContentHandler(handler), metadata, false);
            }
        } else {
            handleEmbeddedFile(part, handler, rel);
        }
    } catch (FileNotFoundException e) {
    // There was no CONTENTS entry, so skip this part
    } catch (Ole10NativeException e) {
    // Could not process an OLE 1.0 entry, so skip this part
    } catch (IOException e) {
        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (stream != null) {
            stream.close();
        }
    }
}
Also used : Ole10NativeException(org.apache.poi.poifs.filesystem.Ole10NativeException) Ole10Native(org.apache.poi.poifs.filesystem.Ole10Native) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) Metadata(org.apache.tika.metadata.Metadata) FileNotFoundException(java.io.FileNotFoundException) TikaInputStream(org.apache.tika.io.TikaInputStream) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) POIFSDocumentType(org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType) EmbeddedContentHandler(org.apache.tika.sax.EmbeddedContentHandler) IOException(java.io.IOException) Ole10NativeException(org.apache.poi.poifs.filesystem.Ole10NativeException) TikaException(org.apache.tika.exception.TikaException) InvalidFormatException(org.apache.poi.openxml4j.exceptions.InvalidFormatException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) XmlException(org.apache.xmlbeans.XmlException) SAXException(org.xml.sax.SAXException)

Aggregations

POIFSFileSystem (org.apache.poi.poifs.filesystem.POIFSFileSystem)121 Test (org.junit.Test)58 NPOIFSFileSystem (org.apache.poi.poifs.filesystem.NPOIFSFileSystem)38 InputStream (java.io.InputStream)36 ByteArrayInputStream (java.io.ByteArrayInputStream)33 ByteArrayOutputStream (java.io.ByteArrayOutputStream)33 FileInputStream (java.io.FileInputStream)31 File (java.io.File)25 OPOIFSFileSystem (org.apache.poi.poifs.filesystem.OPOIFSFileSystem)15 FileOutputStream (java.io.FileOutputStream)14 OutputStream (java.io.OutputStream)14 HSSFWorkbook (org.apache.poi.hssf.usermodel.HSSFWorkbook)13 DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)13 TempFile (org.apache.poi.util.TempFile)13 IOException (java.io.IOException)12 MutablePropertySet (org.apache.poi.hpsf.MutablePropertySet)7 MutableSection (org.apache.poi.hpsf.MutableSection)7 HashMap (java.util.HashMap)6 DocumentEntry (org.apache.poi.poifs.filesystem.DocumentEntry)6 NDocumentOutputStream (org.apache.poi.poifs.filesystem.NDocumentOutputStream)6