use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.
the class CryptoAPIEncryptor method getSummaryEntries.
/**
* Encrypt the Document-/SummaryInformation and other optionally streams.
* Opposed to other crypto modes, cryptoapi is record based and can't be used
* to stream-encrypt a whole file
*
* @see <a href="http://msdn.microsoft.com/en-us/library/dd943321(v=office.12).aspx">2.3.5.4 RC4 CryptoAPI Encrypted Summary Stream</a>
*/
public OutputStream getSummaryEntries(DirectoryNode dir) throws IOException, GeneralSecurityException {
// NOSONAR
CryptoAPIDocumentOutputStream bos = new CryptoAPIDocumentOutputStream(this);
byte[] buf = new byte[8];
// skip header
bos.write(buf, 0, 8);
String[] entryNames = { SummaryInformation.DEFAULT_STREAM_NAME, DocumentSummaryInformation.DEFAULT_STREAM_NAME };
List<StreamDescriptorEntry> descList = new ArrayList<StreamDescriptorEntry>();
int block = 0;
for (String entryName : entryNames) {
if (!dir.hasEntry(entryName)) {
continue;
}
StreamDescriptorEntry descEntry = new StreamDescriptorEntry();
descEntry.block = block;
descEntry.streamOffset = bos.size();
descEntry.streamName = entryName;
descEntry.flags = StreamDescriptorEntry.flagStream.setValue(0, 1);
descEntry.reserved2 = 0;
bos.setBlock(block);
DocumentInputStream dis = dir.createDocumentInputStream(entryName);
IOUtils.copy(dis, bos);
dis.close();
descEntry.streamSize = bos.size() - descEntry.streamOffset;
descList.add(descEntry);
dir.getEntry(entryName).delete();
block++;
}
int streamDescriptorArrayOffset = bos.size();
bos.setBlock(0);
LittleEndian.putUInt(buf, 0, descList.size());
bos.write(buf, 0, 4);
for (StreamDescriptorEntry sde : descList) {
LittleEndian.putUInt(buf, 0, sde.streamOffset);
bos.write(buf, 0, 4);
LittleEndian.putUInt(buf, 0, sde.streamSize);
bos.write(buf, 0, 4);
LittleEndian.putUShort(buf, 0, sde.block);
bos.write(buf, 0, 2);
LittleEndian.putUByte(buf, 0, (short) sde.streamName.length());
bos.write(buf, 0, 1);
LittleEndian.putUByte(buf, 0, (short) sde.flags);
bos.write(buf, 0, 1);
LittleEndian.putUInt(buf, 0, sde.reserved2);
bos.write(buf, 0, 4);
byte[] nameBytes = StringUtil.getToUnicodeLE(sde.streamName);
bos.write(nameBytes, 0, nameBytes.length);
// null-termination
LittleEndian.putShort(buf, 0, (short) 0);
bos.write(buf, 0, 2);
}
int savedSize = bos.size();
int streamDescriptorArraySize = savedSize - streamDescriptorArrayOffset;
LittleEndian.putUInt(buf, 0, streamDescriptorArrayOffset);
LittleEndian.putUInt(buf, 4, streamDescriptorArraySize);
bos.reset();
bos.setBlock(0);
bos.write(buf, 0, 8);
bos.setSize(savedSize);
dir.createDocument("EncryptedSummary", new ByteArrayInputStream(bos.getBuf(), 0, savedSize));
DocumentSummaryInformation dsi = PropertySetFactory.newDocumentSummaryInformation();
try {
dsi.write(dir, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
} catch (WritingNotSupportedException e) {
throw new IOException(e);
}
return bos;
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.
the class StandardDecryptor method getDataStream.
@Override
@SuppressWarnings("resource")
public InputStream getDataStream(DirectoryNode dir) throws IOException {
DocumentInputStream dis = dir.createDocumentInputStream(DEFAULT_POIFS_ENTRY);
_length = dis.readLong();
if (getSecretKey() == null) {
verifyPassword(null);
}
// limit wrong calculated ole entries - (bug #57080)
// standard encryption always uses aes encoding, so blockSize is always 16
// http://stackoverflow.com/questions/3283787/size-of-data-after-aes-encryption
int blockSize = getEncryptionInfo().getHeader().getCipherAlgorithm().blockSize;
long cipherLen = (_length / blockSize + 1) * blockSize;
Cipher cipher = getCipher(getSecretKey());
InputStream boundedDis = new BoundedInputStream(dis, cipherLen);
return new BoundedInputStream(new CipherInputStream(boundedDis, cipher), _length);
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project tika by apache.
the class AbstractPOIFSExtractor method handleEmbeddedOfficeDoc.
/**
* Handle an office document that's embedded at the POIFS level
*/
protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, String resourceName, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
if (dir.hasEntry("Package")) {
// It's OOXML (has a ZipFile):
Entry ooxml = dir.getEntry("Package");
try (TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml))) {
ZipContainerDetector detector = new ZipContainerDetector();
MediaType type = null;
try {
//if there's a stream error while detecting...
type = detector.detect(stream, new Metadata());
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml, true);
return;
}
}
// It's regular OLE2:
// What kind of document is it?
Metadata metadata = new Metadata();
metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
if (dir.getStorageClsid() != null) {
metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
}
POIFSDocumentType type = POIFSDocumentType.detectType(dir);
TikaInputStream embedded = null;
String rName = (resourceName == null) ? dir.getName() : resourceName;
try {
if (type == POIFSDocumentType.OLE10_NATIVE) {
try {
// Try to un-wrap the OLE10Native record:
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
if (ole.getLabel() != null) {
metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '/' + ole.getLabel());
}
if (ole.getCommand() != null) {
metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getCommand());
}
if (ole.getFileName() != null) {
metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getFileName());
}
byte[] data = ole.getDataBuffer();
embedded = TikaInputStream.get(data);
} catch (Ole10NativeException ex) {
// Not a valid OLE10Native record, skip it
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
} else if (type == POIFSDocumentType.COMP_OBJ) {
try {
//TODO: figure out if the equivalent of OLE 1.0's
//getCommand() and getFileName() exist for OLE 2.0 to populate
//TikaCoreProperties.ORIGINAL_RESOURCE_NAME
// Grab the contents and process
DocumentEntry contentsEntry;
try {
contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS");
} catch (FileNotFoundException ioe) {
contentsEntry = (DocumentEntry) dir.getEntry("Contents");
}
DocumentInputStream inp = new DocumentInputStream(contentsEntry);
byte[] contents = new byte[contentsEntry.getSize()];
inp.readFully(contents);
embedded = TikaInputStream.get(contents);
// Try to work out what it is
MediaType mediaType = getDetector().detect(embedded, new Metadata());
String extension = type.getExtension();
try {
MimeType mimeType = getMimeTypes().forName(mediaType.toString());
extension = mimeType.getExtension();
} catch (MimeTypeException mte) {
// No details on this type are known
}
// Record what we can do about it
metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
metadata.set(Metadata.RESOURCE_NAME_KEY, rName + extension);
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
} else {
metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '.' + type.getExtension());
}
// Should we parse it?
if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) {
if (embedded == null) {
// Make a TikaInputStream that just
// passes the root directory of the
// embedded document, and is otherwise
// empty (byte[0]):
embedded = TikaInputStream.get(new byte[0]);
embedded.setOpenContainer(dir);
}
embeddedDocumentUtil.parseEmbedded(embedded, xhtml, metadata, true);
}
} catch (IOException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
} finally {
if (embedded != null) {
embedded.close();
}
}
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project tika by apache.
the class SummaryExtractor method parseSummaryEntryIfExists.
private void parseSummaryEntryIfExists(DirectoryNode root, String entryName) throws IOException, TikaException {
try {
DocumentEntry entry = (DocumentEntry) root.getEntry(entryName);
PropertySet properties = new PropertySet(new DocumentInputStream(entry));
if (properties.isSummaryInformation()) {
parse(new SummaryInformation(properties));
}
if (properties.isDocumentSummaryInformation()) {
parse(new DocumentSummaryInformation(properties));
}
} catch (FileNotFoundException e) {
// entry does not exist, just skip it
} catch (NoPropertySetStreamException e) {
// no property stream, just skip it
} catch (UnexpectedPropertySetTypeException e) {
throw new TikaException("Unexpected HPSF document", e);
} catch (MarkUnsupportedException e) {
throw new TikaException("Invalid DocumentInputStream", e);
} catch (Exception e) {
LOG.warn("Ignoring unexpected exception while parsing summary entry {}", entryName, e);
}
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.
the class TestWriteWellKnown method getSummaryInformation.
private static SummaryInformation getSummaryInformation(NPOIFSFileSystem poifs) throws Exception {
DocumentInputStream dis = poifs.createDocumentInputStream(SummaryInformation.DEFAULT_STREAM_NAME);
PropertySet ps = new PropertySet(dis);
SummaryInformation si = new SummaryInformation(ps);
dis.close();
return si;
}
Aggregations