use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.
the class POIFSDump method dump.
public static void dump(DirectoryEntry root, File parent) throws IOException {
for (Iterator<Entry> it = root.getEntries(); it.hasNext(); ) {
Entry entry = it.next();
if (entry instanceof DocumentNode) {
DocumentNode node = (DocumentNode) entry;
DocumentInputStream is = new DocumentInputStream(node);
byte[] bytes = IOUtils.toByteArray(is);
is.close();
OutputStream out = new FileOutputStream(new File(parent, node.getName().trim()));
try {
out.write(bytes);
} finally {
out.close();
}
} else if (entry instanceof DirectoryEntry) {
DirectoryEntry dir = (DirectoryEntry) entry;
File file = new File(parent, entry.getName());
if (!file.exists() && !file.mkdirs()) {
throw new IOException("Could not create directory " + file);
}
dump(dir, file);
} else {
System.err.println("Skipping unsupported POIFS entry: " + entry);
}
}
}
use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.
the class EmbeddedExtractor method copyNodes.
protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
for (Entry e : src) {
if (e instanceof DirectoryNode) {
DirectoryNode srcDir = (DirectoryNode) e;
DirectoryNode destDir = (DirectoryNode) dest.createDirectory(srcDir.getName());
destDir.setStorageClsid(srcDir.getStorageClsid());
copyNodes(srcDir, destDir);
} else {
InputStream is = src.createDocumentInputStream(e);
try {
dest.createDocument(e.getName(), is);
} finally {
is.close();
}
}
}
}
use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.
the class CopyCompare method equal.
/**
* <p>Compares two {@link DirectoryEntry} instances of a POI file system.
* The directories must contain the same streams with the same names and
* contents.</p>
*
* @param d1 The first directory.
* @param d2 The second directory.
* @param msg The method may append human-readable comparison messages to
* this string buffer.
* @return <code>true</code> if the directories are equal, else
* <code>false</code>.
* @exception MarkUnsupportedException if a POI document stream does not
* support the mark() operation.
* @exception NoPropertySetStreamException if the application tries to
* create a property set from a POI document stream that is not a property
* set stream.
* @throws UnsupportedEncodingException
* @exception IOException if any I/O exception occurs.
*/
private static boolean equal(final DirectoryEntry d1, final DirectoryEntry d2, final StringBuffer msg) throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException, IOException {
boolean equal = true;
/* Iterate over d1 and compare each entry with its counterpart in d2. */
for (final Entry e1 : d1) {
final String n1 = e1.getName();
if (!d2.hasEntry(n1)) {
msg.append("Document \"" + n1 + "\" exists only in the source.\n");
equal = false;
break;
}
Entry e2 = d2.getEntry(n1);
if (e1.isDirectoryEntry() && e2.isDirectoryEntry()) {
equal = equal((DirectoryEntry) e1, (DirectoryEntry) e2, msg);
} else if (e1.isDocumentEntry() && e2.isDocumentEntry()) {
equal = equal((DocumentEntry) e1, (DocumentEntry) e2, msg);
} else {
msg.append("One of \"" + e1 + "\" and \"" + e2 + "\" is a " + "document while the other one is a directory.\n");
equal = false;
}
}
/* Iterate over d2 just to make sure that there are no entries in d2
* that are not in d1. */
for (final Entry e2 : d2) {
final String n2 = e2.getName();
Entry e1 = null;
try {
e1 = d1.getEntry(n2);
} catch (FileNotFoundException ex) {
msg.append("Document \"" + e2 + "\" exitsts, document \"" + e1 + "\" does not.\n");
equal = false;
break;
}
}
return equal;
}
use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.
the class ExtractorFactory method getEmbededDocsTextExtractors.
/**
* Returns an array of text extractors, one for each of
* the embedded documents in the file (if there are any).
* If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
*/
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
// All the embedded directories we spotted
ArrayList<Entry> dirs = new ArrayList<Entry>();
// For anything else not directly held in as a POIFS directory
ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
if (root == null) {
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
}
if (ext instanceof ExcelExtractor) {
// These are in MBD... under the root
Iterator<Entry> it = root.getEntries();
while (it.hasNext()) {
Entry entry = it.next();
if (entry.getName().startsWith("MBD")) {
dirs.add(entry);
}
}
} else if (ext instanceof WordExtractor) {
// These are in ObjectPool -> _... under the root
try {
DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
Iterator<Entry> it = op.getEntries();
while (it.hasNext()) {
Entry entry = it.next();
if (entry.getName().startsWith("_")) {
dirs.add(entry);
}
}
} catch (FileNotFoundException e) {
logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
// ignored here
}
//} else if(ext instanceof PowerPointExtractor) {
// Tricky, not stored directly in poifs
// TODO
} else if (ext instanceof OutlookTextExtactor) {
// Stored in the Attachment blocks
MAPIMessage msg = ((OutlookTextExtactor) ext).getMAPIMessage();
for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
if (attachment.getAttachData() != null) {
byte[] data = attachment.getAttachData().getValue();
nonPOIFS.add(new ByteArrayInputStream(data));
} else if (attachment.getAttachmentDirectory() != null) {
dirs.add(attachment.getAttachmentDirectory().getDirectory());
}
}
}
// Create the extractors
if (dirs.size() == 0 && nonPOIFS.size() == 0) {
return new POITextExtractor[0];
}
ArrayList<POITextExtractor> textExtractors = new ArrayList<POITextExtractor>();
for (Entry dir : dirs) {
textExtractors.add(createExtractor((DirectoryNode) dir));
}
for (InputStream nonPOIF : nonPOIFS) {
try {
textExtractors.add(createExtractor(nonPOIF));
} catch (IllegalArgumentException e) {
// Ignore, just means it didn't contain
// a format we support as yet
logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
} catch (XmlException e) {
throw new IOException(e.getMessage(), e);
} catch (OpenXML4JException e) {
throw new IOException(e.getMessage(), e);
}
}
return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
}
use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.
the class POIFSChunkParser method parse.
public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
Chunks mainChunks = new Chunks();
ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
groups.add(mainChunks);
// there doesn't seem to be any use of that in Outlook
for (Entry entry : node) {
if (entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode) entry;
ChunkGroup group = null;
// Do we know what to do with it?
if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
group = new AttachmentChunks(dir.getName());
}
if (dir.getName().startsWith(NameIdChunks.NAME)) {
group = new NameIdChunks();
}
if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new RecipientChunks(dir.getName());
}
if (group != null) {
processChunks(dir, group);
groups.add(group);
} else {
// Unknown directory, skip silently
}
}
}
// Now do the top level chunks
processChunks(node, mainChunks);
// match up variable-length properties and their chunks
for (ChunkGroup group : groups) {
group.chunksComplete();
}
// Finish
return groups.toArray(new ChunkGroup[groups.size()]);
}
Aggregations