Search in sources :

Example 6 with DocumentEntry

use of org.apache.poi.poifs.filesystem.DocumentEntry in project poi by apache.

the class TestTextRunReWrite method testWritesOutTheSameNonRich.

@Test
public void testWritesOutTheSameNonRich() throws IOException {
    // Ensure the text lengths are as we'd expect to start with
    assertEquals(1, ss.getSlides().size());
    assertEquals(2, ss.getSlides().get(0).getTextParagraphs().size());
    // Grab the first text run on the first sheet
    List<HSLFTextParagraph> tr1 = ss.getSlides().get(0).getTextParagraphs().get(0);
    List<HSLFTextParagraph> tr2 = ss.getSlides().get(0).getTextParagraphs().get(1);
    assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
    assertEquals(179, HSLFTextParagraph.getRawText(tr2).length());
    assertEquals(1, tr1.size());
    assertEquals(30, HSLFTextParagraph.getText(tr1).length());
    assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
    assertEquals(31, tr1.get(0).getTextRuns().get(0).getCharacterStyle().getCharactersCovered());
    assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
    // Set the text to be as it is now
    HSLFTextParagraph.setText(tr1, HSLFTextParagraph.getRawText(tr1));
    tr1 = ss.getSlides().get(0).getTextParagraphs().get(0);
    // Check the text lengths are still right
    assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
    assertEquals(179, HSLFTextParagraph.getRawText(tr2).length());
    assertEquals(1, tr1.size());
    assertEquals(30, HSLFTextParagraph.getText(tr1).length());
    assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
    assertEquals(31, tr1.get(0).getTextRuns().get(0).getCharacterStyle().getCharactersCovered());
    assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
    // Write the slideshow out to a byte array
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ss.write(baos);
    // Build an input stream of it
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    // Use POIFS to query that lot
    POIFSFileSystem npfs = new POIFSFileSystem(bais);
    // Check that the "PowerPoint Document" sections have the same size
    DirectoryNode oDir = ss.getSlideShowImpl().getDirectory();
    DocumentEntry oProps = (DocumentEntry) oDir.getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
    DocumentEntry nProps = (DocumentEntry) npfs.getRoot().getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
    assertEquals(oProps.getSize(), nProps.getSize());
    // Check that they contain the same data
    byte[] _oData = new byte[oProps.getSize()];
    byte[] _nData = new byte[nProps.getSize()];
    oDir.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_oData);
    npfs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_nData);
    assertArrayEquals(_oData, _nData);
    npfs.close();
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) HSLFTextParagraph(org.apache.poi.hslf.usermodel.HSLFTextParagraph) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 7 with DocumentEntry

use of org.apache.poi.poifs.filesystem.DocumentEntry in project poi by apache.

the class TestTextRunReWrite method testWritesOutTheSameRich.

@Test
public void testWritesOutTheSameRich() throws IOException {
    // Grab the first text run on the first sheet
    List<HSLFTextParagraph> tr1 = ss.getSlides().get(0).getTextParagraphs().get(0);
    // Get the first rich text run
    HSLFTextRun rtr1 = tr1.get(0).getTextRuns().get(0);
    // Check that the text sizes are as expected
    assertEquals(1, tr1.get(0).getTextRuns().size());
    assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
    assertEquals(30, rtr1.getLength());
    assertEquals(30, rtr1.getRawText().length());
    assertEquals(31, rtr1.getCharacterStyle().getCharactersCovered());
    assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
    // Set the text to be as it is now
    rtr1.setText(rtr1.getRawText());
    rtr1 = tr1.get(0).getTextRuns().get(0);
    // Check that the text sizes are still as expected
    assertEquals(1, tr1.get(0).getTextRuns().size());
    assertEquals(30, HSLFTextParagraph.getRawText(tr1).length());
    assertEquals(30, tr1.get(0).getTextRuns().get(0).getRawText().length());
    assertEquals(30, rtr1.getLength());
    assertEquals(30, rtr1.getRawText().length());
    assertEquals(31, rtr1.getCharacterStyle().getCharactersCovered());
    assertEquals(31, tr1.get(0).getParagraphStyle().getCharactersCovered());
    // Write the slideshow out to a byte array
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ss.write(baos);
    // Build an input stream of it
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    // Use POIFS to query that lot
    POIFSFileSystem npfs = new POIFSFileSystem(bais);
    // Check that the "PowerPoint Document" sections have the same size
    DirectoryNode oDir = ss.getSlideShowImpl().getDirectory();
    DocumentEntry oProps = (DocumentEntry) oDir.getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
    DocumentEntry nProps = (DocumentEntry) npfs.getRoot().getEntry(HSLFSlideShow.POWERPOINT_DOCUMENT);
    assertEquals(oProps.getSize(), nProps.getSize());
    // Check that they contain the same data
    byte[] _oData = new byte[oProps.getSize()];
    byte[] _nData = new byte[nProps.getSize()];
    oDir.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_oData);
    npfs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT).read(_nData);
    assertArrayEquals(_oData, _nData);
    npfs.close();
}
Also used : HSLFTextRun(org.apache.poi.hslf.usermodel.HSLFTextRun) ByteArrayInputStream(java.io.ByteArrayInputStream) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) HSLFTextParagraph(org.apache.poi.hslf.usermodel.HSLFTextParagraph) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 8 with DocumentEntry

use of org.apache.poi.poifs.filesystem.DocumentEntry in project poi by apache.

the class TestCurrentUserAtom method writeNormal.

@Test
public void writeNormal() throws Exception {
    // Get raw contents from a known file
    POIFSFileSystem fs = new POIFSFileSystem(_slTests.getFile(normalFile));
    DocumentEntry docProps = (DocumentEntry) fs.getRoot().getEntry("Current User");
    byte[] contents = new byte[docProps.getSize()];
    InputStream in = fs.getRoot().createDocumentInputStream("Current User");
    in.read(contents);
    in.close();
    fs.close();
    // Now build up a new one
    CurrentUserAtom cu = new CurrentUserAtom();
    cu.setLastEditUsername("Hogwarts");
    cu.setCurrentEditOffset(0x2942);
    // Check it matches
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    cu.writeOut(baos);
    byte[] out = baos.toByteArray();
    assertArrayEquals(contents, out);
}
Also used : POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) InputStream(java.io.InputStream) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 9 with DocumentEntry

use of org.apache.poi.poifs.filesystem.DocumentEntry in project poi by apache.

the class TestExOleObjStg method testRead.

@Test
public void testRead() throws Exception {
    ExOleObjStg record = new ExOleObjStg(data, 0, data.length);
    assertEquals(RecordTypes.ExOleObjStg.typeID, record.getRecordType());
    int len = record.getDataLength();
    byte[] oledata = readAll(record.getData());
    assertEquals(len, oledata.length);
    POIFSFileSystem fs = new POIFSFileSystem(record.getData());
    assertTrue("Constructed POIFS from ExOleObjStg data", true);
    DocumentEntry doc = (DocumentEntry) fs.getRoot().getEntry("Contents");
    assertNotNull(doc);
    assertTrue("Fetched the Contents stream containing OLE properties", true);
    fs.close();
}
Also used : POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) Test(org.junit.Test)

Example 10 with DocumentEntry

use of org.apache.poi.poifs.filesystem.DocumentEntry in project tika by apache.

the class RTFObjDataParser method handleEmbeddedPOIFS.

//will throw IOException if not actually POIFS
//can return null byte[]
private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException {
    byte[] ret = null;
    try (NPOIFSFileSystem fs = new NPOIFSFileSystem(is)) {
        DirectoryNode root = fs.getRoot();
        if (root == null) {
            return ret;
        }
        if (root.hasEntry("Package")) {
            Entry ooxml = root.getEntry("Package");
            TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            IOUtils.copy(stream, out);
            ret = out.toByteArray();
        } else {
            //try poifs
            POIFSDocumentType type = POIFSDocumentType.detectType(root);
            if (type == POIFSDocumentType.OLE10_NATIVE) {
                try {
                    // Try to un-wrap the OLE10Native record:
                    Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root);
                    ret = ole.getDataBuffer();
                } catch (Ole10NativeException ex) {
                // Not a valid OLE10Native record, skip it
                }
            } else if (type == POIFSDocumentType.COMP_OBJ) {
                DocumentEntry contentsEntry;
                try {
                    contentsEntry = (DocumentEntry) root.getEntry("CONTENTS");
                } catch (FileNotFoundException ioe) {
                    contentsEntry = (DocumentEntry) root.getEntry("Contents");
                }
                try (DocumentInputStream inp = new DocumentInputStream(contentsEntry)) {
                    ret = new byte[contentsEntry.getSize()];
                    inp.readFully(ret);
                }
            } else {
                ByteArrayOutputStream out = new ByteArrayOutputStream();
                is.reset();
                IOUtils.copy(is, out);
                ret = out.toByteArray();
                metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
                metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
            }
        }
    }
    return ret;
}
Also used : NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) Entry(org.apache.poi.poifs.filesystem.Entry) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) Ole10NativeException(org.apache.poi.poifs.filesystem.Ole10NativeException) Ole10Native(org.apache.poi.poifs.filesystem.Ole10Native) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) FileNotFoundException(java.io.FileNotFoundException) TikaInputStream(org.apache.tika.io.TikaInputStream) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) POIFSDocumentType(org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream)

Aggregations

DocumentEntry (org.apache.poi.poifs.filesystem.DocumentEntry)14 POIFSFileSystem (org.apache.poi.poifs.filesystem.POIFSFileSystem)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 DocumentInputStream (org.apache.poi.poifs.filesystem.DocumentInputStream)6 ByteArrayInputStream (java.io.ByteArrayInputStream)5 FileNotFoundException (java.io.FileNotFoundException)4 Test (org.junit.Test)4 InputStream (java.io.InputStream)3 DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)3 Entry (org.apache.poi.poifs.filesystem.Entry)3 IOException (java.io.IOException)2 HSLFTextParagraph (org.apache.poi.hslf.usermodel.HSLFTextParagraph)2 DirectoryEntry (org.apache.poi.poifs.filesystem.DirectoryEntry)2 Ole10Native (org.apache.poi.poifs.filesystem.Ole10Native)2 Ole10NativeException (org.apache.poi.poifs.filesystem.Ole10NativeException)2 TikaException (org.apache.tika.exception.TikaException)2 TikaInputStream (org.apache.tika.io.TikaInputStream)2 POIFSDocumentType (org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType)2 File (java.io.File)1 DocumentSummaryInformation (org.apache.poi.hpsf.DocumentSummaryInformation)1