Search in sources :

Example 31 with DirectoryNode

use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.

the class OLE2ExtractorFactory method getEmbededDocsTextExtractors.

/**
     * Returns an array of text extractors, one for each of
     *  the embedded documents in the file (if there are any).
     * If there are no embedded documents, you'll get back an
     *  empty array. Otherwise, you'll get one open
     *  {@link POITextExtractor} for each embedded file.
     */
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException {
    // All the embedded directories we spotted
    List<Entry> dirs = new ArrayList<Entry>();
    // For anything else not directly held in as a POIFS directory
    List<InputStream> nonPOIFS = new ArrayList<InputStream>();
    // Find all the embedded directories
    DirectoryEntry root = ext.getRoot();
    if (root == null) {
        throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
    }
    if (ext instanceof ExcelExtractor) {
        // These are in MBD... under the root
        Iterator<Entry> it = root.getEntries();
        while (it.hasNext()) {
            Entry entry = it.next();
            if (entry.getName().startsWith("MBD")) {
                dirs.add(entry);
            }
        }
    } else {
        // Ask Scratchpad, or fail trying
        Class<?> cls = getScratchpadClass();
        try {
            Method m = cls.getDeclaredMethod("identifyEmbeddedResources", POIOLE2TextExtractor.class, List.class, List.class);
            m.invoke(null, ext, dirs, nonPOIFS);
        } catch (Exception e) {
            throw new IllegalArgumentException("Error checking for Scratchpad embedded resources", e);
        }
    }
    // Create the extractors
    if (dirs.size() == 0 && nonPOIFS.size() == 0) {
        return new POITextExtractor[0];
    }
    ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
    for (Entry dir : dirs) {
        e.add(createExtractor((DirectoryNode) dir));
    }
    for (InputStream nonPOIF : nonPOIFS) {
        try {
            e.add(createExtractor(nonPOIF));
        } catch (IllegalArgumentException ie) {
            // Ignore, just means it didn't contain
            //  a format we support as yet
            LOGGER.log(POILogger.WARN, ie);
        } catch (Exception xe) {
            // Ignore, invalid format
            LOGGER.log(POILogger.WARN, xe);
        }
    }
    return e.toArray(new POITextExtractor[e.size()]);
}
Also used : InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) Method(java.lang.reflect.Method) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) IOException(java.io.IOException) OldExcelFormatException(org.apache.poi.hssf.OldExcelFormatException) Entry(org.apache.poi.poifs.filesystem.Entry) DirectoryEntry(org.apache.poi.poifs.filesystem.DirectoryEntry) POITextExtractor(org.apache.poi.POITextExtractor) ExcelExtractor(org.apache.poi.hssf.extractor.ExcelExtractor) EventBasedExcelExtractor(org.apache.poi.hssf.extractor.EventBasedExcelExtractor)

Example 32 with DirectoryNode

use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.

the class POIFSChunkParser method process.

/**
    * Creates a chunk, and gives it to its parent group 
    */
protected static void process(Entry entry, ChunkGroup grouping) {
    String entryName = entry.getName();
    Chunk chunk = null;
    // Is it a properties chunk? (They have special names)
    if (entryName.equals(PropertiesChunk.NAME)) {
        if (grouping instanceof Chunks) {
            // These should be the properties for the message itself
            chunk = new MessagePropertiesChunk(grouping);
        } else {
            // Will be properties on an attachment or recipient
            chunk = new StoragePropertiesChunk(grouping);
        }
    } else {
        // Check it's a regular chunk
        if (entryName.length() < 9) {
            // Name in the wrong format
            return;
        }
        if (!entryName.contains("_")) {
            // Name in the wrong format
            return;
        }
        // Split it into its parts
        int splitAt = entryName.lastIndexOf('_');
        String namePrefix = entryName.substring(0, splitAt + 1);
        String ids = entryName.substring(splitAt + 1);
        //  the form __<name>_<id><type>
        if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) {
            // This is some odd Outlook 2002 thing, skip
            return;
        } else if (splitAt <= entryName.length() - 8) {
        // In the right form for a normal chunk
        // We'll process this further in a little bit
        } else {
            // Underscores not the right place, something's wrong
            throw new IllegalArgumentException("Invalid chunk name " + entryName);
        }
        // Now try to turn it into id + type
        try {
            int chunkId = Integer.parseInt(ids.substring(0, 4), 16);
            int typeId = Integer.parseInt(ids.substring(4, 8), 16);
            MAPIType type = Types.getById(typeId);
            if (type == null) {
                type = Types.createCustom(typeId);
            }
            // Special cases based on the ID
            if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
                chunk = new MessageSubmissionChunk(namePrefix, chunkId, type);
            } else {
                // So, do the usual thing which is by type
                if (type == Types.BINARY) {
                    chunk = new ByteChunk(namePrefix, chunkId, type);
                } else if (type == Types.DIRECTORY) {
                    if (entry instanceof DirectoryNode) {
                        chunk = new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type);
                    }
                } else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
                    chunk = new StringChunk(namePrefix, chunkId, type);
                } else {
                // Type of an unsupported type! Skipping... 
                }
            }
        } catch (NumberFormatException e) {
            // Name in the wrong format
            return;
        }
    }
    if (chunk != null) {
        if (entry instanceof DocumentNode) {
            DocumentInputStream inp = null;
            try {
                inp = new DocumentInputStream((DocumentNode) entry);
                chunk.readValue(inp);
                grouping.record(chunk);
            } catch (IOException e) {
                logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e);
            } finally {
                if (inp != null)
                    inp.close();
            }
        } else {
            grouping.record(chunk);
        }
    }
}
Also used : StoragePropertiesChunk(org.apache.poi.hsmf.datatypes.StoragePropertiesChunk) Chunks(org.apache.poi.hsmf.datatypes.Chunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) DocumentNode(org.apache.poi.poifs.filesystem.DocumentNode) ByteChunk(org.apache.poi.hsmf.datatypes.ByteChunk) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) DirectoryChunk(org.apache.poi.hsmf.datatypes.DirectoryChunk) MessagePropertiesChunk(org.apache.poi.hsmf.datatypes.MessagePropertiesChunk) IOException(java.io.IOException) ByteChunk(org.apache.poi.hsmf.datatypes.ByteChunk) PropertiesChunk(org.apache.poi.hsmf.datatypes.PropertiesChunk) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) Chunk(org.apache.poi.hsmf.datatypes.Chunk) DirectoryChunk(org.apache.poi.hsmf.datatypes.DirectoryChunk) StoragePropertiesChunk(org.apache.poi.hsmf.datatypes.StoragePropertiesChunk) MessagePropertiesChunk(org.apache.poi.hsmf.datatypes.MessagePropertiesChunk) MessageSubmissionChunk(org.apache.poi.hsmf.datatypes.MessageSubmissionChunk) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) MessageSubmissionChunk(org.apache.poi.hsmf.datatypes.MessageSubmissionChunk) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) MAPIType(org.apache.poi.hsmf.datatypes.Types.MAPIType)

Example 33 with DirectoryNode

use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.

the class POIDocument method replaceDirectory.

/**
     * Replaces the attached directory, e.g. if this document is written
     * to a new POIFSFileSystem
     *
     * @param newDirectory the new directory
     * @return the old/previous directory
     */
@Internal
protected DirectoryNode replaceDirectory(DirectoryNode newDirectory) {
    DirectoryNode dn = directory;
    directory = newDirectory;
    return dn;
}
Also used : DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) Internal(org.apache.poi.util.Internal)

Example 34 with DirectoryNode

use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.

the class POIDocument method getPropertySet.

/** 
     * For a given named property entry, either return it or null if
     *  if it wasn't found
     *  
     *  @param setName The property to read
     *  @param encryptionInfo the encryption descriptor in case of cryptoAPI encryption
     *  @return The value of the given property or null if it wasn't found.
     */
protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) {
    DirectoryNode dirNode = directory;
    NPOIFSFileSystem encPoifs = null;
    String step = "getting";
    try {
        if (encryptionInfo != null) {
            step = "getting encrypted";
            String encryptedStream = null;
            for (String s : encryptedStreamNames) {
                if (dirNode.hasEntry(s)) {
                    encryptedStream = s;
                }
            }
            if (encryptedStream == null) {
                throw new EncryptedDocumentException("can't find matching encrypted property stream");
            }
            CryptoAPIDecryptor dec = (CryptoAPIDecryptor) encryptionInfo.getDecryptor();
            encPoifs = dec.getSummaryEntries(dirNode, encryptedStream);
            dirNode = encPoifs.getRoot();
        }
        //directory can be null when creating new documents
        if (dirNode == null || !dirNode.hasEntry(setName)) {
            return null;
        }
        // Find the entry, and get an input stream for it
        step = "getting";
        DocumentInputStream dis = dirNode.createDocumentInputStream(dirNode.getEntry(setName));
        try {
            // Create the Property Set
            step = "creating";
            return PropertySetFactory.create(dis);
        } finally {
            dis.close();
        }
    } catch (Exception e) {
        logger.log(POILogger.WARN, "Error " + step + " property set with name " + setName, e);
        return null;
    } finally {
        if (encPoifs != null) {
            try {
                encPoifs.close();
            } catch (IOException e) {
                logger.log(POILogger.WARN, "Error closing encrypted property poifs", e);
            }
        }
    }
}
Also used : NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) IOException(java.io.IOException) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) CryptoAPIDecryptor(org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIDecryptor) IOException(java.io.IOException)

Example 35 with DirectoryNode

use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.

the class POIFSLister method displayDirectory.

public static void displayDirectory(DirectoryNode dir, String indent, boolean withSizes) {
    System.out.println(indent + dir.getName() + " -");
    String newIndent = indent + "  ";
    boolean hadChildren = false;
    for (Iterator<Entry> it = dir.getEntries(); it.hasNext(); ) {
        hadChildren = true;
        Entry entry = it.next();
        if (entry instanceof DirectoryNode) {
            displayDirectory((DirectoryNode) entry, newIndent, withSizes);
        } else {
            DocumentNode doc = (DocumentNode) entry;
            String name = doc.getName();
            String size = "";
            if (name.charAt(0) < 10) {
                String altname = "(0x0" + (int) name.charAt(0) + ")" + name.substring(1);
                name = name.substring(1) + " <" + altname + ">";
            }
            if (withSizes) {
                size = " [" + doc.getSize() + " / 0x" + Integer.toHexString(doc.getSize()) + "]";
            }
            System.out.println(newIndent + name + size);
        }
    }
    if (!hadChildren) {
        System.out.println(newIndent + "(no children)");
    }
}
Also used : Entry(org.apache.poi.poifs.filesystem.Entry) DocumentNode(org.apache.poi.poifs.filesystem.DocumentNode) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode)

Aggregations

DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)47 Test (org.junit.Test)16 InputStream (java.io.InputStream)15 POIFSFileSystem (org.apache.poi.poifs.filesystem.POIFSFileSystem)13 NPOIFSFileSystem (org.apache.poi.poifs.filesystem.NPOIFSFileSystem)12 Entry (org.apache.poi.poifs.filesystem.Entry)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 ByteArrayOutputStream (java.io.ByteArrayOutputStream)8 IOException (java.io.IOException)8 OPOIFSFileSystem (org.apache.poi.poifs.filesystem.OPOIFSFileSystem)6 FileInputStream (java.io.FileInputStream)5 FileNotFoundException (java.io.FileNotFoundException)5 DocumentInputStream (org.apache.poi.poifs.filesystem.DocumentInputStream)5 HSSFWorkbook (org.apache.poi.hssf.usermodel.HSSFWorkbook)4 HWPFDocument (org.apache.poi.hwpf.HWPFDocument)4 File (java.io.File)3 ArrayList (java.util.ArrayList)3 AttachmentChunks (org.apache.poi.hsmf.datatypes.AttachmentChunks)3 DirectoryEntry (org.apache.poi.poifs.filesystem.DirectoryEntry)3 DocumentEntry (org.apache.poi.poifs.filesystem.DocumentEntry)3