Search in sources :

Example 16 with DocumentInputStream

use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.

the class CryptoAPIEncryptor method getSummaryEntries.

/**
     * Encrypt the Document-/SummaryInformation and other optionally streams.
     * Opposed to other crypto modes, cryptoapi is record based and can't be used
     * to stream-encrypt a whole file
     * 
     * @see <a href="http://msdn.microsoft.com/en-us/library/dd943321(v=office.12).aspx">2.3.5.4 RC4 CryptoAPI Encrypted Summary Stream</a>
     */
public OutputStream getSummaryEntries(DirectoryNode dir) throws IOException, GeneralSecurityException {
    // NOSONAR
    CryptoAPIDocumentOutputStream bos = new CryptoAPIDocumentOutputStream(this);
    byte[] buf = new byte[8];
    // skip header
    bos.write(buf, 0, 8);
    String[] entryNames = { SummaryInformation.DEFAULT_STREAM_NAME, DocumentSummaryInformation.DEFAULT_STREAM_NAME };
    List<StreamDescriptorEntry> descList = new ArrayList<StreamDescriptorEntry>();
    int block = 0;
    for (String entryName : entryNames) {
        if (!dir.hasEntry(entryName)) {
            continue;
        }
        StreamDescriptorEntry descEntry = new StreamDescriptorEntry();
        descEntry.block = block;
        descEntry.streamOffset = bos.size();
        descEntry.streamName = entryName;
        descEntry.flags = StreamDescriptorEntry.flagStream.setValue(0, 1);
        descEntry.reserved2 = 0;
        bos.setBlock(block);
        DocumentInputStream dis = dir.createDocumentInputStream(entryName);
        IOUtils.copy(dis, bos);
        dis.close();
        descEntry.streamSize = bos.size() - descEntry.streamOffset;
        descList.add(descEntry);
        dir.getEntry(entryName).delete();
        block++;
    }
    int streamDescriptorArrayOffset = bos.size();
    bos.setBlock(0);
    LittleEndian.putUInt(buf, 0, descList.size());
    bos.write(buf, 0, 4);
    for (StreamDescriptorEntry sde : descList) {
        LittleEndian.putUInt(buf, 0, sde.streamOffset);
        bos.write(buf, 0, 4);
        LittleEndian.putUInt(buf, 0, sde.streamSize);
        bos.write(buf, 0, 4);
        LittleEndian.putUShort(buf, 0, sde.block);
        bos.write(buf, 0, 2);
        LittleEndian.putUByte(buf, 0, (short) sde.streamName.length());
        bos.write(buf, 0, 1);
        LittleEndian.putUByte(buf, 0, (short) sde.flags);
        bos.write(buf, 0, 1);
        LittleEndian.putUInt(buf, 0, sde.reserved2);
        bos.write(buf, 0, 4);
        byte[] nameBytes = StringUtil.getToUnicodeLE(sde.streamName);
        bos.write(nameBytes, 0, nameBytes.length);
        // null-termination
        LittleEndian.putShort(buf, 0, (short) 0);
        bos.write(buf, 0, 2);
    }
    int savedSize = bos.size();
    int streamDescriptorArraySize = savedSize - streamDescriptorArrayOffset;
    LittleEndian.putUInt(buf, 0, streamDescriptorArrayOffset);
    LittleEndian.putUInt(buf, 4, streamDescriptorArraySize);
    bos.reset();
    bos.setBlock(0);
    bos.write(buf, 0, 8);
    bos.setSize(savedSize);
    dir.createDocument("EncryptedSummary", new ByteArrayInputStream(bos.getBuf(), 0, savedSize));
    DocumentSummaryInformation dsi = PropertySetFactory.newDocumentSummaryInformation();
    try {
        dsi.write(dir, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
    } catch (WritingNotSupportedException e) {
        throw new IOException(e);
    }
    return bos;
}
Also used : ArrayList(java.util.ArrayList) DocumentSummaryInformation(org.apache.poi.hpsf.DocumentSummaryInformation) WritingNotSupportedException(org.apache.poi.hpsf.WritingNotSupportedException) IOException(java.io.IOException) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) StreamDescriptorEntry(org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIDecryptor.StreamDescriptorEntry) ByteArrayInputStream(java.io.ByteArrayInputStream)

Example 17 with DocumentInputStream

use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.

the class StandardDecryptor method getDataStream.

@Override
@SuppressWarnings("resource")
public InputStream getDataStream(DirectoryNode dir) throws IOException {
    DocumentInputStream dis = dir.createDocumentInputStream(DEFAULT_POIFS_ENTRY);
    _length = dis.readLong();
    if (getSecretKey() == null) {
        verifyPassword(null);
    }
    // limit wrong calculated ole entries - (bug #57080)
    // standard encryption always uses aes encoding, so blockSize is always 16 
    // http://stackoverflow.com/questions/3283787/size-of-data-after-aes-encryption
    int blockSize = getEncryptionInfo().getHeader().getCipherAlgorithm().blockSize;
    long cipherLen = (_length / blockSize + 1) * blockSize;
    Cipher cipher = getCipher(getSecretKey());
    InputStream boundedDis = new BoundedInputStream(dis, cipherLen);
    return new BoundedInputStream(new CipherInputStream(boundedDis, cipher), _length);
}
Also used : CipherInputStream(javax.crypto.CipherInputStream) CipherInputStream(javax.crypto.CipherInputStream) BoundedInputStream(org.apache.poi.util.BoundedInputStream) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) InputStream(java.io.InputStream) BoundedInputStream(org.apache.poi.util.BoundedInputStream) Cipher(javax.crypto.Cipher) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream)

Example 18 with DocumentInputStream

use of org.apache.poi.poifs.filesystem.DocumentInputStream in project tika by apache.

the class AbstractPOIFSExtractor method handleEmbeddedOfficeDoc.

/**
     * Handle an office document that's embedded at the POIFS level
     */
protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, String resourceName, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
    if (dir.hasEntry("Package")) {
        // It's OOXML (has a ZipFile):
        Entry ooxml = dir.getEntry("Package");
        try (TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml))) {
            ZipContainerDetector detector = new ZipContainerDetector();
            MediaType type = null;
            try {
                //if there's a stream error while detecting...
                type = detector.detect(stream, new Metadata());
            } catch (Exception e) {
                EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
                return;
            }
            handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml, true);
            return;
        }
    }
    // It's regular OLE2:
    // What kind of document is it?
    Metadata metadata = new Metadata();
    metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
    if (dir.getStorageClsid() != null) {
        metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
    }
    POIFSDocumentType type = POIFSDocumentType.detectType(dir);
    TikaInputStream embedded = null;
    String rName = (resourceName == null) ? dir.getName() : resourceName;
    try {
        if (type == POIFSDocumentType.OLE10_NATIVE) {
            try {
                // Try to un-wrap the OLE10Native record:
                Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
                if (ole.getLabel() != null) {
                    metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '/' + ole.getLabel());
                }
                if (ole.getCommand() != null) {
                    metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getCommand());
                }
                if (ole.getFileName() != null) {
                    metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getFileName());
                }
                byte[] data = ole.getDataBuffer();
                embedded = TikaInputStream.get(data);
            } catch (Ole10NativeException ex) {
            // Not a valid OLE10Native record, skip it
            } catch (Exception e) {
                EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
                return;
            }
        } else if (type == POIFSDocumentType.COMP_OBJ) {
            try {
                //TODO: figure out if the equivalent of OLE 1.0's
                //getCommand() and getFileName() exist for OLE 2.0 to populate
                //TikaCoreProperties.ORIGINAL_RESOURCE_NAME
                // Grab the contents and process
                DocumentEntry contentsEntry;
                try {
                    contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS");
                } catch (FileNotFoundException ioe) {
                    contentsEntry = (DocumentEntry) dir.getEntry("Contents");
                }
                DocumentInputStream inp = new DocumentInputStream(contentsEntry);
                byte[] contents = new byte[contentsEntry.getSize()];
                inp.readFully(contents);
                embedded = TikaInputStream.get(contents);
                // Try to work out what it is
                MediaType mediaType = getDetector().detect(embedded, new Metadata());
                String extension = type.getExtension();
                try {
                    MimeType mimeType = getMimeTypes().forName(mediaType.toString());
                    extension = mimeType.getExtension();
                } catch (MimeTypeException mte) {
                // No details on this type are known
                }
                // Record what we can do about it
                metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
                metadata.set(Metadata.RESOURCE_NAME_KEY, rName + extension);
            } catch (Exception e) {
                EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
                return;
            }
        } else {
            metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
            metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '.' + type.getExtension());
        }
        // Should we parse it?
        if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) {
            if (embedded == null) {
                // Make a TikaInputStream that just
                // passes the root directory of the
                // embedded document, and is otherwise
                // empty (byte[0]):
                embedded = TikaInputStream.get(new byte[0]);
                embedded.setOpenContainer(dir);
            }
            embeddedDocumentUtil.parseEmbedded(embedded, xhtml, metadata, true);
        }
    } catch (IOException e) {
        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
    } finally {
        if (embedded != null) {
            embedded.close();
        }
    }
}
Also used : ZipContainerDetector(org.apache.tika.parser.pkg.ZipContainerDetector) Ole10Native(org.apache.poi.poifs.filesystem.Ole10Native) Metadata(org.apache.tika.metadata.Metadata) FileNotFoundException(java.io.FileNotFoundException) TikaInputStream(org.apache.tika.io.TikaInputStream) POIFSDocumentType(org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType) IOException(java.io.IOException) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) Ole10NativeException(org.apache.poi.poifs.filesystem.Ole10NativeException) TikaException(org.apache.tika.exception.TikaException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) SAXException(org.xml.sax.SAXException) MimeTypeException(org.apache.tika.mime.MimeTypeException) MimeType(org.apache.tika.mime.MimeType) Entry(org.apache.poi.poifs.filesystem.Entry) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) Ole10NativeException(org.apache.poi.poifs.filesystem.Ole10NativeException) MimeTypeException(org.apache.tika.mime.MimeTypeException) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) MediaType(org.apache.tika.mime.MediaType)

Example 19 with DocumentInputStream

use of org.apache.poi.poifs.filesystem.DocumentInputStream in project tika by apache.

the class SummaryExtractor method parseSummaryEntryIfExists.

private void parseSummaryEntryIfExists(DirectoryNode root, String entryName) throws IOException, TikaException {
    try {
        DocumentEntry entry = (DocumentEntry) root.getEntry(entryName);
        PropertySet properties = new PropertySet(new DocumentInputStream(entry));
        if (properties.isSummaryInformation()) {
            parse(new SummaryInformation(properties));
        }
        if (properties.isDocumentSummaryInformation()) {
            parse(new DocumentSummaryInformation(properties));
        }
    } catch (FileNotFoundException e) {
    // entry does not exist, just skip it
    } catch (NoPropertySetStreamException e) {
    // no property stream, just skip it
    } catch (UnexpectedPropertySetTypeException e) {
        throw new TikaException("Unexpected HPSF document", e);
    } catch (MarkUnsupportedException e) {
        throw new TikaException("Invalid DocumentInputStream", e);
    } catch (Exception e) {
        LOG.warn("Ignoring unexpected exception while parsing summary entry {}", entryName, e);
    }
}
Also used : TikaException(org.apache.tika.exception.TikaException) SummaryInformation(org.apache.poi.hpsf.SummaryInformation) DocumentSummaryInformation(org.apache.poi.hpsf.DocumentSummaryInformation) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) FileNotFoundException(java.io.FileNotFoundException) PropertySet(org.apache.poi.hpsf.PropertySet) DocumentSummaryInformation(org.apache.poi.hpsf.DocumentSummaryInformation) NoPropertySetStreamException(org.apache.poi.hpsf.NoPropertySetStreamException) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) UnexpectedPropertySetTypeException(org.apache.poi.hpsf.UnexpectedPropertySetTypeException) MarkUnsupportedException(org.apache.poi.hpsf.MarkUnsupportedException) NoPropertySetStreamException(org.apache.poi.hpsf.NoPropertySetStreamException) UnexpectedPropertySetTypeException(org.apache.poi.hpsf.UnexpectedPropertySetTypeException) TikaException(org.apache.tika.exception.TikaException) MarkUnsupportedException(org.apache.poi.hpsf.MarkUnsupportedException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 20 with DocumentInputStream

use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.

the class TestWriteWellKnown method getSummaryInformation.

private static SummaryInformation getSummaryInformation(NPOIFSFileSystem poifs) throws Exception {
    DocumentInputStream dis = poifs.createDocumentInputStream(SummaryInformation.DEFAULT_STREAM_NAME);
    PropertySet ps = new PropertySet(dis);
    SummaryInformation si = new SummaryInformation(ps);
    dis.close();
    return si;
}
Also used : SummaryInformation(org.apache.poi.hpsf.SummaryInformation) DocumentSummaryInformation(org.apache.poi.hpsf.DocumentSummaryInformation) PropertySet(org.apache.poi.hpsf.PropertySet) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream)

Aggregations

DocumentInputStream (org.apache.poi.poifs.filesystem.DocumentInputStream)22 IOException (java.io.IOException)10 DocumentEntry (org.apache.poi.poifs.filesystem.DocumentEntry)5 DocumentNode (org.apache.poi.poifs.filesystem.DocumentNode)5 InputStream (java.io.InputStream)4 DocumentSummaryInformation (org.apache.poi.hpsf.DocumentSummaryInformation)4 PropertySet (org.apache.poi.hpsf.PropertySet)4 DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)4 Entry (org.apache.poi.poifs.filesystem.Entry)4 FileNotFoundException (java.io.FileNotFoundException)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 SummaryInformation (org.apache.poi.hpsf.SummaryInformation)2 DirectoryEntry (org.apache.poi.poifs.filesystem.DirectoryEntry)2 BoundedInputStream (org.apache.poi.util.BoundedInputStream)2 RLEDecompressingInputStream (org.apache.poi.util.RLEDecompressingInputStream)2 TikaException (org.apache.tika.exception.TikaException)2 EOFException (java.io.EOFException)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1