Search in sources :

Example 1 with Entry

use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.

the class POIFSDump method dump.

public static void dump(DirectoryEntry root, File parent) throws IOException {
    for (Iterator<Entry> it = root.getEntries(); it.hasNext(); ) {
        Entry entry = it.next();
        if (entry instanceof DocumentNode) {
            DocumentNode node = (DocumentNode) entry;
            DocumentInputStream is = new DocumentInputStream(node);
            byte[] bytes = IOUtils.toByteArray(is);
            is.close();
            OutputStream out = new FileOutputStream(new File(parent, node.getName().trim()));
            try {
                out.write(bytes);
            } finally {
                out.close();
            }
        } else if (entry instanceof DirectoryEntry) {
            DirectoryEntry dir = (DirectoryEntry) entry;
            File file = new File(parent, entry.getName());
            if (!file.exists() && !file.mkdirs()) {
                throw new IOException("Could not create directory " + file);
            }
            dump(dir, file);
        } else {
            System.err.println("Skipping unsupported POIFS entry: " + entry);
        }
    }
}
Also used : Entry(org.apache.poi.poifs.filesystem.Entry) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) DocumentNode(org.apache.poi.poifs.filesystem.DocumentNode) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) File(java.io.File)

Example 2 with Entry

use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.

the class EmbeddedExtractor method copyNodes.

protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
    for (Entry e : src) {
        if (e instanceof DirectoryNode) {
            DirectoryNode srcDir = (DirectoryNode) e;
            DirectoryNode destDir = (DirectoryNode) dest.createDirectory(srcDir.getName());
            destDir.setStorageClsid(srcDir.getStorageClsid());
            copyNodes(srcDir, destDir);
        } else {
            InputStream is = src.createDocumentInputStream(e);
            try {
                dest.createDocument(e.getName(), is);
            } finally {
                is.close();
            }
        }
    }
}
Also used : Entry(org.apache.poi.poifs.filesystem.Entry) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) InputStream(java.io.InputStream) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode)

Example 3 with Entry

use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.

the class CopyCompare method equal.

/**
     * <p>Compares two {@link DirectoryEntry} instances of a POI file system.
     * The directories must contain the same streams with the same names and
     * contents.</p>
     *
     * @param d1 The first directory.
     * @param d2 The second directory.
     * @param msg The method may append human-readable comparison messages to
     * this string buffer. 
     * @return <code>true</code> if the directories are equal, else
     * <code>false</code>.
     * @exception MarkUnsupportedException if a POI document stream does not
     * support the mark() operation.
     * @exception NoPropertySetStreamException if the application tries to
     * create a property set from a POI document stream that is not a property
     * set stream.
     * @throws UnsupportedEncodingException 
     * @exception IOException if any I/O exception occurs.
     */
private static boolean equal(final DirectoryEntry d1, final DirectoryEntry d2, final StringBuffer msg) throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException, IOException {
    boolean equal = true;
    /* Iterate over d1 and compare each entry with its counterpart in d2. */
    for (final Entry e1 : d1) {
        final String n1 = e1.getName();
        if (!d2.hasEntry(n1)) {
            msg.append("Document \"" + n1 + "\" exists only in the source.\n");
            equal = false;
            break;
        }
        Entry e2 = d2.getEntry(n1);
        if (e1.isDirectoryEntry() && e2.isDirectoryEntry()) {
            equal = equal((DirectoryEntry) e1, (DirectoryEntry) e2, msg);
        } else if (e1.isDocumentEntry() && e2.isDocumentEntry()) {
            equal = equal((DocumentEntry) e1, (DocumentEntry) e2, msg);
        } else {
            msg.append("One of \"" + e1 + "\" and \"" + e2 + "\" is a " + "document while the other one is a directory.\n");
            equal = false;
        }
    }
    /* Iterate over d2 just to make sure that there are no entries in d2
         * that are not in d1. */
    for (final Entry e2 : d2) {
        final String n2 = e2.getName();
        Entry e1 = null;
        try {
            e1 = d1.getEntry(n2);
        } catch (FileNotFoundException ex) {
            msg.append("Document \"" + e2 + "\" exitsts, document \"" + e1 + "\" does not.\n");
            equal = false;
            break;
        }
    }
    return equal;
}
Also used : Entry(org.apache.poi.poifs.filesystem.Entry) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) FileNotFoundException(java.io.FileNotFoundException) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry)

Example 4 with Entry

use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.

the class ExtractorFactory method getEmbededDocsTextExtractors.

/**
     * Returns an array of text extractors, one for each of
     *  the embedded documents in the file (if there are any).
     * If there are no embedded documents, you'll get back an
     *  empty array. Otherwise, you'll get one open
     *  {@link POITextExtractor} for each embedded file.
     */
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
    // All the embedded directories we spotted
    ArrayList<Entry> dirs = new ArrayList<Entry>();
    // For anything else not directly held in as a POIFS directory
    ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
    // Find all the embedded directories
    DirectoryEntry root = ext.getRoot();
    if (root == null) {
        throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
    }
    if (ext instanceof ExcelExtractor) {
        // These are in MBD... under the root
        Iterator<Entry> it = root.getEntries();
        while (it.hasNext()) {
            Entry entry = it.next();
            if (entry.getName().startsWith("MBD")) {
                dirs.add(entry);
            }
        }
    } else if (ext instanceof WordExtractor) {
        // These are in ObjectPool -> _... under the root
        try {
            DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
            Iterator<Entry> it = op.getEntries();
            while (it.hasNext()) {
                Entry entry = it.next();
                if (entry.getName().startsWith("_")) {
                    dirs.add(entry);
                }
            }
        } catch (FileNotFoundException e) {
            logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
        // ignored here
        }
    //} else if(ext instanceof PowerPointExtractor) {
    // Tricky, not stored directly in poifs
    // TODO
    } else if (ext instanceof OutlookTextExtactor) {
        // Stored in the Attachment blocks
        MAPIMessage msg = ((OutlookTextExtactor) ext).getMAPIMessage();
        for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
            if (attachment.getAttachData() != null) {
                byte[] data = attachment.getAttachData().getValue();
                nonPOIFS.add(new ByteArrayInputStream(data));
            } else if (attachment.getAttachmentDirectory() != null) {
                dirs.add(attachment.getAttachmentDirectory().getDirectory());
            }
        }
    }
    // Create the extractors
    if (dirs.size() == 0 && nonPOIFS.size() == 0) {
        return new POITextExtractor[0];
    }
    ArrayList<POITextExtractor> textExtractors = new ArrayList<POITextExtractor>();
    for (Entry dir : dirs) {
        textExtractors.add(createExtractor((DirectoryNode) dir));
    }
    for (InputStream nonPOIF : nonPOIFS) {
        try {
            textExtractors.add(createExtractor(nonPOIF));
        } catch (IllegalArgumentException e) {
            // Ignore, just means it didn't contain
            //  a format we support as yet
            logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
        } catch (XmlException e) {
            throw new IOException(e.getMessage(), e);
        } catch (OpenXML4JException e) {
            throw new IOException(e.getMessage(), e);
        }
    }
    return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
}
Also used : PushbackInputStream(java.io.PushbackInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) IOException(java.io.IOException) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) WordExtractor(org.apache.poi.hwpf.extractor.WordExtractor) XWPFWordExtractor(org.apache.poi.xwpf.extractor.XWPFWordExtractor) MAPIMessage(org.apache.poi.hsmf.MAPIMessage) Entry(org.apache.poi.poifs.filesystem.Entry) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) OutlookTextExtactor(org.apache.poi.hsmf.extractor.OutlookTextExtactor) OpenXML4JException(org.apache.poi.openxml4j.exceptions.OpenXML4JException) ByteArrayInputStream(java.io.ByteArrayInputStream) POITextExtractor(org.apache.poi.POITextExtractor) XSSFExcelExtractor(org.apache.poi.xssf.extractor.XSSFExcelExtractor) ExcelExtractor(org.apache.poi.hssf.extractor.ExcelExtractor) XSSFEventBasedExcelExtractor(org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor) XSSFBEventBasedExcelExtractor(org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor) XmlException(org.apache.xmlbeans.XmlException) Iterator(java.util.Iterator) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks)

Example 5 with Entry

use of org.apache.poi.poifs.filesystem.Entry in project poi by apache.

the class POIFSChunkParser method parse.

public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
    Chunks mainChunks = new Chunks();
    ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
    groups.add(mainChunks);
    //  there doesn't seem to be any use of that in Outlook
    for (Entry entry : node) {
        if (entry instanceof DirectoryNode) {
            DirectoryNode dir = (DirectoryNode) entry;
            ChunkGroup group = null;
            // Do we know what to do with it?
            if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
                group = new AttachmentChunks(dir.getName());
            }
            if (dir.getName().startsWith(NameIdChunks.NAME)) {
                group = new NameIdChunks();
            }
            if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
                group = new RecipientChunks(dir.getName());
            }
            if (group != null) {
                processChunks(dir, group);
                groups.add(group);
            } else {
            // Unknown directory, skip silently
            }
        }
    }
    // Now do the top level chunks
    processChunks(node, mainChunks);
    // match up variable-length properties and their chunks
    for (ChunkGroup group : groups) {
        group.chunksComplete();
    }
    // Finish
    return groups.toArray(new ChunkGroup[groups.size()]);
}
Also used : Entry(org.apache.poi.poifs.filesystem.Entry) Chunks(org.apache.poi.hsmf.datatypes.Chunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) ChunkGroup(org.apache.poi.hsmf.datatypes.ChunkGroup) ArrayList(java.util.ArrayList) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks)

Aggregations

Entry (org.apache.poi.poifs.filesystem.Entry)24 DirectoryEntry (org.apache.poi.poifs.filesystem.DirectoryEntry)12 IOException (java.io.IOException)9 DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)9 FileNotFoundException (java.io.FileNotFoundException)6 InputStream (java.io.InputStream)6 DocumentEntry (org.apache.poi.poifs.filesystem.DocumentEntry)6 DocumentInputStream (org.apache.poi.poifs.filesystem.DocumentInputStream)6 DocumentNode (org.apache.poi.poifs.filesystem.DocumentNode)4 POIFSFileSystem (org.apache.poi.poifs.filesystem.POIFSFileSystem)4 ArrayList (java.util.ArrayList)3 AttachmentChunks (org.apache.poi.hsmf.datatypes.AttachmentChunks)3 HWPFDocument (org.apache.poi.hwpf.HWPFDocument)3 OldWordFileFormatException (org.apache.poi.hwpf.OldWordFileFormatException)3 BufferedInputStream (java.io.BufferedInputStream)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FileInputStream (java.io.FileInputStream)2 POITextExtractor (org.apache.poi.POITextExtractor)2 HSLFSlideShow (org.apache.poi.hslf.usermodel.HSLFSlideShow)2 MAPIMessage (org.apache.poi.hsmf.MAPIMessage)2