use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.
the class POIFSChunkParser method parse.
public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
Chunks mainChunks = new Chunks();
ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
groups.add(mainChunks);
// there doesn't seem to be any use of that in Outlook
for (Entry entry : node) {
if (entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode) entry;
ChunkGroup group = null;
// Do we know what to do with it?
if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
group = new AttachmentChunks(dir.getName());
}
if (dir.getName().startsWith(NameIdChunks.NAME)) {
group = new NameIdChunks();
}
if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new RecipientChunks(dir.getName());
}
if (group != null) {
processChunks(dir, group);
groups.add(group);
} else {
// Unknown directory, skip silently
}
}
}
// Now do the top level chunks
processChunks(node, mainChunks);
// match up variable-length properties and their chunks
for (ChunkGroup group : groups) {
group.chunksComplete();
}
// Finish
return groups.toArray(new ChunkGroup[groups.size()]);
}
use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.
the class TestExtractor method testExtractFromEmbeded.
@Test
public void testExtractFromEmbeded() throws IOException {
InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
POIFSFileSystem fs = new POIFSFileSystem(is);
DirectoryNode root = fs.getRoot();
PowerPointExtractor ppe1 = assertExtractFromEmbedded(root, "MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n");
PowerPointExtractor ppe2 = assertExtractFromEmbedded(root, "MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n");
ppe2.close();
ppe1.close();
fs.close();
}
use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.
the class TestTextRunReWrite method testWritesOutTheSameNonRich.
@Test
public void testWritesOutTheSameNonRich() throws IOException {
// Ensure the text lengths are as we'd expect to start with
assertEquals(1, ss.getSlides().size());
assertEquals(2, ss.getSlides().get(0).getTextParagraphs().size());
// Grab the first text run on the first sheet
List<HSLFTextParagraph> tr1 = ss.getSlides().get(0).getTextParagraphs().get(0);
List<HSLFTextParagraph> tr2 = ss.getSlides().get(0).getTextParagraphs().get(1);
assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
assertEquals(179, HSLFTextParagraph.getRawText(tr2).length());
assertEquals(1, tr1.size());
assertEquals(30, HSLFTextParagraph.getText(tr1).length());
assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
assertEquals(31, tr1.get(0).getTextRuns().get(0).getCharacterStyle().getCharactersCovered());
assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
// Set the text to be as it is now
HSLFTextParagraph.setText(tr1, HSLFTextParagraph.getRawText(tr1));
tr1 = ss.getSlides().get(0).getTextParagraphs().get(0);
// Check the text lengths are still right
assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
assertEquals(179, HSLFTextParagraph.getRawText(tr2).length());
assertEquals(1, tr1.size());
assertEquals(30, HSLFTextParagraph.getText(tr1).length());
assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
assertEquals(31, tr1.get(0).getTextRuns().get(0).getCharacterStyle().getCharactersCovered());
assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
// Write the slideshow out to a byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ss.write(baos);
// Build an input stream of it
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
// Use POIFS to query that lot
POIFSFileSystem npfs = new POIFSFileSystem(bais);
// Check that the "PowerPoint Document" sections have the same size
DirectoryNode oDir = ss.getSlideShowImpl().getDirectory();
DocumentEntry oProps = (DocumentEntry) oDir.getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
DocumentEntry nProps = (DocumentEntry) npfs.getRoot().getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
assertEquals(oProps.getSize(), nProps.getSize());
// Check that they contain the same data
byte[] _oData = new byte[oProps.getSize()];
byte[] _nData = new byte[nProps.getSize()];
oDir.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_oData);
npfs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_nData);
assertArrayEquals(_oData, _nData);
npfs.close();
}
use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.
the class TestTextRunReWrite method testWritesOutTheSameRich.
@Test
public void testWritesOutTheSameRich() throws IOException {
// Grab the first text run on the first sheet
List<HSLFTextParagraph> tr1 = ss.getSlides().get(0).getTextParagraphs().get(0);
// Get the first rich text run
HSLFTextRun rtr1 = tr1.get(0).getTextRuns().get(0);
// Check that the text sizes are as expected
assertEquals(1, tr1.get(0).getTextRuns().size());
assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
assertEquals(30, rtr1.getLength());
assertEquals(30, rtr1.getRawText().length());
assertEquals(31, rtr1.getCharacterStyle().getCharactersCovered());
assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
// Set the text to be as it is now
rtr1.setText(rtr1.getRawText());
rtr1 = tr1.get(0).getTextRuns().get(0);
// Check that the text sizes are still as expected
assertEquals(1, tr1.get(0).getTextRuns().size());
assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
assertEquals(30, tr1.get(0).getTextRuns().get(0).getRawText().length());
assertEquals(30, rtr1.getLength());
assertEquals(30, rtr1.getRawText().length());
assertEquals(31, rtr1.getCharacterStyle().getCharactersCovered());
assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
// Write the slideshow out to a byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ss.write(baos);
// Build an input stream of it
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
// Use POIFS to query that lot
POIFSFileSystem npfs = new POIFSFileSystem(bais);
// Check that the "PowerPoint Document" sections have the same size
DirectoryNode oDir = ss.getSlideShowImpl().getDirectory();
DocumentEntry oProps = (DocumentEntry) oDir.getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
DocumentEntry nProps = (DocumentEntry) npfs.getRoot().getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
assertEquals(oProps.getSize(), nProps.getSize());
// Check that they contain the same data
byte[] _oData = new byte[oProps.getSize()];
byte[] _nData = new byte[nProps.getSize()];
oDir.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_oData);
npfs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_nData);
assertArrayEquals(_oData, _nData);
npfs.close();
}
use of org.apache.poi.poifs.filesystem.DirectoryNode in project poi by apache.
the class EmbeddedObjects method main.
@SuppressWarnings("unused")
public static void main(String[] args) throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(args[0]));
HSSFWorkbook workbook = new HSSFWorkbook(fs);
for (HSSFObjectData obj : workbook.getAllEmbeddedObjects()) {
//the OLE2 Class Name of the object
String oleName = obj.getOLE2ClassName();
DirectoryNode dn = (obj.hasDirectoryEntry()) ? (DirectoryNode) obj.getDirectory() : null;
Closeable document = null;
if (oleName.equals("Worksheet")) {
document = new HSSFWorkbook(dn, fs, false);
} else if (oleName.equals("Document")) {
document = new HWPFDocument(dn);
} else if (oleName.equals("Presentation")) {
document = new HSLFSlideShow(dn);
} else {
if (dn != null) {
// The DirectoryEntry is a DocumentNode. Examine its entries to find out what it is
for (Entry entry : dn) {
String name = entry.getName();
}
} else {
// There is no DirectoryEntry
// Recover the object's data from the HSSFObjectData instance.
byte[] objectData = obj.getObjectData();
}
}
if (document != null) {
document.close();
}
}
workbook.close();
}
Aggregations