use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.
the class VBAMacroReader method readModule.
private static void readModule(DocumentInputStream dis, String name, ModuleMap modules) throws IOException {
Module module = modules.get(name);
// TODO Refactor this to fetch dir then do the rest
if (module == null) {
// no DIR stream with offsets yet, so store the compressed bytes for later
module = new Module();
modules.put(name, module);
module.read(dis);
} else if (module.buf == null) {
//if we haven't already read the bytes for the module keyed off this name...
if (module.offset == null) {
//This should not happen. bug 59858
throw new IOException("Module offset for '" + name + "' was never read.");
}
// we know the offset already, so decompress immediately on-the-fly
long skippedBytes = dis.skip(module.offset);
if (skippedBytes != module.offset) {
throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
}
InputStream stream = new RLEDecompressingInputStream(dis);
module.read(stream);
stream.close();
}
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.
the class CopyCompare method equal.
/**
* <p>Compares two {@link DocumentEntry} instances of a POI file system.
* Documents that are not property set streams must be bitwise identical.
* Property set streams must be logically equal.</p>
*
* @param d1 The first document.
* @param d2 The second document.
* @param msg The method may append human-readable comparison messages to
* this string buffer.
* @return <code>true</code> if the documents are equal, else
* <code>false</code>.
* @exception MarkUnsupportedException if a POI document stream does not
* support the mark() operation.
* @exception NoPropertySetStreamException if the application tries to
* create a property set from a POI document stream that is not a property
* set stream.
* @throws UnsupportedEncodingException
* @exception IOException if any I/O exception occurs.
*/
private static boolean equal(final DocumentEntry d1, final DocumentEntry d2, final StringBuffer msg) throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException, IOException {
final DocumentInputStream dis1 = new DocumentInputStream(d1);
final DocumentInputStream dis2 = new DocumentInputStream(d2);
try {
if (PropertySet.isPropertySetStream(dis1) && PropertySet.isPropertySetStream(dis2)) {
final PropertySet ps1 = PropertySetFactory.create(dis1);
final PropertySet ps2 = PropertySetFactory.create(dis2);
if (!ps1.equals(ps2)) {
msg.append("Property sets are not equal.\n");
return false;
}
} else {
int i1, i2;
do {
i1 = dis1.read();
i2 = dis2.read();
if (i1 != i2) {
msg.append("Documents are not equal.\n");
return false;
}
} while (i1 > -1);
}
} finally {
dis2.close();
dis1.close();
}
return true;
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project poi by apache.
the class AgileDecryptor method getDataStream.
@Override
public InputStream getDataStream(DirectoryNode dir) throws IOException, GeneralSecurityException {
DocumentInputStream dis = dir.createDocumentInputStream(DEFAULT_POIFS_ENTRY);
_length = dis.readLong();
return new AgileCipherInputStream(dis, _length);
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project tika by apache.
the class RTFObjDataParser method handleEmbeddedPOIFS.
//will throw IOException if not actually POIFS
//can return null byte[]
private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException {
byte[] ret = null;
try (NPOIFSFileSystem fs = new NPOIFSFileSystem(is)) {
DirectoryNode root = fs.getRoot();
if (root == null) {
return ret;
}
if (root.hasEntry("Package")) {
Entry ooxml = root.getEntry("Package");
TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));
ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(stream, out);
ret = out.toByteArray();
} else {
//try poifs
POIFSDocumentType type = POIFSDocumentType.detectType(root);
if (type == POIFSDocumentType.OLE10_NATIVE) {
try {
// Try to un-wrap the OLE10Native record:
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root);
ret = ole.getDataBuffer();
} catch (Ole10NativeException ex) {
// Not a valid OLE10Native record, skip it
}
} else if (type == POIFSDocumentType.COMP_OBJ) {
DocumentEntry contentsEntry;
try {
contentsEntry = (DocumentEntry) root.getEntry("CONTENTS");
} catch (FileNotFoundException ioe) {
contentsEntry = (DocumentEntry) root.getEntry("Contents");
}
try (DocumentInputStream inp = new DocumentInputStream(contentsEntry)) {
ret = new byte[contentsEntry.getSize()];
inp.readFully(ret);
}
} else {
ByteArrayOutputStream out = new ByteArrayOutputStream();
is.reset();
IOUtils.copy(is, out);
ret = out.toByteArray();
metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
}
}
}
return ret;
}
use of org.apache.poi.poifs.filesystem.DocumentInputStream in project tika by apache.
the class POIFSContainerDetector method processCompObjFormatType.
/**
* Is this one of the kinds of formats which uses CompObj to
* store all of their data, eg Star Draw, Star Impress or
* (older) Works?
* If not, it's likely an embedded resource
*/
private static MediaType processCompObjFormatType(DirectoryEntry root) {
try {
Entry e = root.getEntry("CompObj");
if (e != null && e.isDocumentEntry()) {
DocumentNode dn = (DocumentNode) e;
DocumentInputStream stream = new DocumentInputStream(dn);
byte[] bytes = IOUtils.toByteArray(stream);
/*
* This array contains a string with a normal ASCII name of the
* application used to create this file. We want to search for that
* name.
*/
if (arrayContains(bytes, MS_GRAPH_CHART_BYTES)) {
return MS_GRAPH_CHART;
} else if (arrayContains(bytes, STAR_DRAW)) {
return SDA;
} else if (arrayContains(bytes, STAR_IMPRESS)) {
return SDD;
} else if (arrayContains(bytes, WORKS_QUILL96)) {
return WPS;
}
}
} catch (Exception e) {
/*
* "root.getEntry" can throw FileNotFoundException. The code inside
* "if" can throw IOExceptions. Theoretically. Practically no
* exceptions will likely ever appear.
*
* Swallow all of them. If any occur, we just assume that we can't
* distinguish between Draw and Impress and return something safe:
* x-tika-msoffice
*/
}
return OLE;
}
Aggregations