Search in sources :

Example 1 with HSLFObjectData

use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.

the class DataExtraction method main.

public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        usage();
        return;
    }
    FileInputStream is = new FileInputStream(args[0]);
    HSLFSlideShow ppt = new HSLFSlideShow(is);
    is.close();
    //extract all sound files embedded in this presentation
    HSLFSoundData[] sound = ppt.getSoundData();
    for (int i = 0; i < sound.length; i++) {
        //*.wav
        String type = sound[i].getSoundType();
        //typically file name
        String name = sound[i].getSoundName();
        //raw bytes
        byte[] data = sound[i].getData();
        //save the sound  on disk
        FileOutputStream out = new FileOutputStream(name + type);
        out.write(data);
        out.close();
    }
    int oleIdx = -1, picIdx = -1;
    for (HSLFSlide slide : ppt.getSlides()) {
        //extract embedded OLE documents
        for (HSLFShape shape : slide.getShapes()) {
            if (shape instanceof OLEShape) {
                oleIdx++;
                OLEShape ole = (OLEShape) shape;
                HSLFObjectData data = ole.getObjectData();
                String name = ole.getInstanceName();
                if ("Worksheet".equals(name)) {
                    //read xls
                    @SuppressWarnings({ "unused", "resource" }) HSSFWorkbook wb = new HSSFWorkbook(data.getData());
                } else if ("Document".equals(name)) {
                    HWPFDocument doc = new HWPFDocument(data.getData());
                    //read the word document
                    Range r = doc.getRange();
                    for (int k = 0; k < r.numParagraphs(); k++) {
                        Paragraph p = r.getParagraph(k);
                        System.out.println(p.text());
                    }
                    //save on disk
                    FileOutputStream out = new FileOutputStream(name + "-(" + (oleIdx) + ").doc");
                    doc.write(out);
                    out.close();
                    doc.close();
                } else {
                    FileOutputStream out = new FileOutputStream(ole.getProgID() + "-" + (oleIdx + 1) + ".dat");
                    InputStream dis = data.getData();
                    byte[] chunk = new byte[2048];
                    int count;
                    while ((count = dis.read(chunk)) >= 0) {
                        out.write(chunk, 0, count);
                    }
                    is.close();
                    out.close();
                }
            } else //Pictures
            if (shape instanceof HSLFPictureShape) {
                picIdx++;
                HSLFPictureShape p = (HSLFPictureShape) shape;
                HSLFPictureData data = p.getPictureData();
                String ext = data.getType().extension;
                FileOutputStream out = new FileOutputStream("pict-" + picIdx + ext);
                out.write(data.getData());
                out.close();
            }
        }
    }
    ppt.close();
}
Also used : FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) HSLFObjectData(org.apache.poi.hslf.usermodel.HSLFObjectData) Range(org.apache.poi.hwpf.usermodel.Range) HSLFSlideShow(org.apache.poi.hslf.usermodel.HSLFSlideShow) FileInputStream(java.io.FileInputStream) OLEShape(org.apache.poi.hslf.model.OLEShape) HSSFWorkbook(org.apache.poi.hssf.usermodel.HSSFWorkbook) Paragraph(org.apache.poi.hwpf.usermodel.Paragraph) HWPFDocument(org.apache.poi.hwpf.HWPFDocument) HSLFShape(org.apache.poi.hslf.usermodel.HSLFShape) HSLFPictureShape(org.apache.poi.hslf.usermodel.HSLFPictureShape) FileOutputStream(java.io.FileOutputStream) HSLFSoundData(org.apache.poi.hslf.usermodel.HSLFSoundData) HSLFPictureData(org.apache.poi.hslf.usermodel.HSLFPictureData) HSLFSlide(org.apache.poi.hslf.usermodel.HSLFSlide)

Example 2 with HSLFObjectData

use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.

the class TestOleEmbedding method testOleEmbedding2003.

/**
     * Tests support for OLE objects.
     *
     * @throws Exception if an error occurs.
     */
@Test
public void testOleEmbedding2003() throws IOException {
    HSLFSlideShowImpl slideShow = new HSLFSlideShowImpl(_slTests.openResourceAsStream("ole2-embedding-2003.ppt"));
    // Placeholder EMFs for clients that don't support the OLE components.
    List<HSLFPictureData> pictures = slideShow.getPictureData();
    assertEquals("Should be two pictures", 2, pictures.size());
    long[] checkSums = { 0xD37A4204l, 0x26A62F68l, 0x82853169l, 0xE0E45D2Bl };
    int checkId = 0;
    // check for checksum to be uptodate
    for (HSLFPictureData pd : pictures) {
        long checkEMF = IOUtils.calculateChecksum(pd.getData());
        assertEquals(checkSums[checkId++], checkEMF);
    }
    // Actual embedded objects.
    HSLFObjectData[] objects = slideShow.getEmbeddedObjects();
    assertEquals("Should be two objects", 2, objects.length);
    for (HSLFObjectData od : objects) {
        long checkEMF = IOUtils.calculateChecksum(od.getData());
        assertEquals(checkSums[checkId++], checkEMF);
    }
    slideShow.close();
}
Also used : HSLFPictureData(org.apache.poi.hslf.usermodel.HSLFPictureData) HSLFObjectData(org.apache.poi.hslf.usermodel.HSLFObjectData) HSLFSlideShowImpl(org.apache.poi.hslf.usermodel.HSLFSlideShowImpl) Test(org.junit.Test)

Example 3 with HSLFObjectData

use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.

the class OLEShape method getObjectData.

/**
     * Returns unique identifier for the OLE object.
     *
     * @return the unique identifier for the OLE object
     */
@SuppressWarnings("resource")
public HSLFObjectData getObjectData() {
    HSLFSlideShow ppt = getSheet().getSlideShow();
    HSLFObjectData[] ole = ppt.getEmbeddedObjects();
    //persist reference
    ExEmbed exEmbed = getExEmbed();
    HSLFObjectData data = null;
    if (exEmbed != null) {
        int ref = exEmbed.getExOleObjAtom().getObjStgDataRef();
        for (int i = 0; i < ole.length; i++) {
            if (ole[i].getExOleObjStg().getPersistId() == ref) {
                data = ole[i];
            }
        }
    }
    if (data == null) {
        LOG.log(POILogger.WARN, "OLE data not found");
    }
    return data;
}
Also used : ExEmbed(org.apache.poi.hslf.record.ExEmbed) HSLFObjectData(org.apache.poi.hslf.usermodel.HSLFObjectData) HSLFSlideShow(org.apache.poi.hslf.usermodel.HSLFSlideShow)

Example 4 with HSLFObjectData

use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.

the class TestOleEmbedding method testOLEShape.

@Test
public void testOLEShape() throws IOException {
    HSLFSlideShow ppt = new HSLFSlideShow(_slTests.openResourceAsStream("ole2-embedding-2003.ppt"));
    HSLFSlide slide = ppt.getSlides().get(0);
    int cnt = 0;
    for (HSLFShape sh : slide.getShapes()) {
        if (sh instanceof OLEShape) {
            cnt++;
            OLEShape ole = (OLEShape) sh;
            HSLFObjectData data = ole.getObjectData();
            if ("Worksheet".equals(ole.getInstanceName())) {
                //Voila! we created a workbook from the embedded OLE data
                HSSFWorkbook wb = new HSSFWorkbook(data.getData());
                HSSFSheet sheet = wb.getSheetAt(0);
                //verify we can access the xls data
                assertEquals(1, sheet.getRow(0).getCell(0).getNumericCellValue(), 0);
                assertEquals(1, sheet.getRow(1).getCell(0).getNumericCellValue(), 0);
                assertEquals(2, sheet.getRow(2).getCell(0).getNumericCellValue(), 0);
                assertEquals(3, sheet.getRow(3).getCell(0).getNumericCellValue(), 0);
                assertEquals(8, sheet.getRow(5).getCell(0).getNumericCellValue(), 0);
                wb.close();
            } else if ("Document".equals(ole.getInstanceName())) {
                //creating a HWPF document
                HWPFDocument doc = new HWPFDocument(data.getData());
                String txt = doc.getRange().getParagraph(0).text();
                assertEquals("OLE embedding is thoroughly unremarkable.\r", txt);
                doc.close();
            }
        }
    }
    assertEquals("Expected 2 OLE shapes", 2, cnt);
    ppt.close();
}
Also used : HWPFDocument(org.apache.poi.hwpf.HWPFDocument) HSLFShape(org.apache.poi.hslf.usermodel.HSLFShape) HSSFSheet(org.apache.poi.hssf.usermodel.HSSFSheet) HSLFObjectData(org.apache.poi.hslf.usermodel.HSLFObjectData) HSLFSlideShow(org.apache.poi.hslf.usermodel.HSLFSlideShow) HSLFSlide(org.apache.poi.hslf.usermodel.HSLFSlide) HSSFWorkbook(org.apache.poi.hssf.usermodel.HSSFWorkbook) Test(org.junit.Test)

Example 5 with HSLFObjectData

use of org.apache.poi.hslf.usermodel.HSLFObjectData in project tika by apache.

the class HSLFExtractor method handleSlideEmbeddedResources.

private void handleSlideEmbeddedResources(HSLFSlide slide, XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException {
    List<HSLFShape> shapes;
    try {
        shapes = slide.getShapes();
    } catch (NullPointerException e) {
        // Sometimes HSLF hits problems
        // Please open POI bugs for any you come across!
        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
        return;
    }
    for (HSLFShape shape : shapes) {
        if (shape instanceof OLEShape) {
            OLEShape oleShape = (OLEShape) shape;
            HSLFObjectData data = null;
            try {
                data = oleShape.getObjectData();
            } catch (NullPointerException e) {
                /* getObjectData throws NPE some times. */
                EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
                continue;
            }
            if (data != null) {
                String objID = Integer.toString(oleShape.getObjectID());
                // Embedded Object: add a <div
                // class="embedded" id="X"/> so consumer can see where
                // in the main text each embedded document
                // occurred:
                AttributesImpl attributes = new AttributesImpl();
                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
                attributes.addAttribute("", "id", "id", "CDATA", objID);
                xhtml.startElement("div", attributes);
                xhtml.endElement("div");
                InputStream dataStream = null;
                try {
                    dataStream = data.getData();
                } catch (Exception e) {
                    EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
                    continue;
                }
                try (TikaInputStream stream = TikaInputStream.get(dataStream)) {
                    String mediaType = null;
                    if ("Excel.Chart.8".equals(oleShape.getProgID())) {
                        mediaType = "application/vnd.ms-excel";
                    } else {
                        MediaType mt = getTikaConfig().getDetector().detect(stream, new Metadata());
                        mediaType = mt.toString();
                    }
                    if (mediaType.equals("application/x-tika-msoffice-embedded; format=comp_obj")) {
                        try (NPOIFSFileSystem npoifs = new NPOIFSFileSystem(new CloseShieldInputStream(stream))) {
                            handleEmbeddedOfficeDoc(npoifs.getRoot(), objID, xhtml);
                        }
                    } else {
                        handleEmbeddedResource(stream, objID, objID, mediaType, xhtml, false);
                    }
                } catch (IOException e) {
                    EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
                }
            }
        }
    }
}
Also used : TikaInputStream(org.apache.tika.io.TikaInputStream) CloseShieldInputStream(org.apache.tika.io.CloseShieldInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) TikaInputStream(org.apache.tika.io.TikaInputStream) IOException(java.io.IOException) HSLFObjectData(org.apache.poi.hslf.usermodel.HSLFObjectData) OLEShape(org.apache.poi.hslf.model.OLEShape) TikaException(org.apache.tika.exception.TikaException) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException) NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) HSLFShape(org.apache.poi.hslf.usermodel.HSLFShape) AttributesImpl(org.xml.sax.helpers.AttributesImpl) MediaType(org.apache.tika.mime.MediaType) CloseShieldInputStream(org.apache.tika.io.CloseShieldInputStream)

Aggregations

HSLFObjectData (org.apache.poi.hslf.usermodel.HSLFObjectData)5 HSLFShape (org.apache.poi.hslf.usermodel.HSLFShape)3 HSLFSlideShow (org.apache.poi.hslf.usermodel.HSLFSlideShow)3 InputStream (java.io.InputStream)2 OLEShape (org.apache.poi.hslf.model.OLEShape)2 HSLFPictureData (org.apache.poi.hslf.usermodel.HSLFPictureData)2 HSLFSlide (org.apache.poi.hslf.usermodel.HSLFSlide)2 HSSFWorkbook (org.apache.poi.hssf.usermodel.HSSFWorkbook)2 HWPFDocument (org.apache.poi.hwpf.HWPFDocument)2 Test (org.junit.Test)2 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 ExEmbed (org.apache.poi.hslf.record.ExEmbed)1 HSLFPictureShape (org.apache.poi.hslf.usermodel.HSLFPictureShape)1 HSLFSlideShowImpl (org.apache.poi.hslf.usermodel.HSLFSlideShowImpl)1 HSLFSoundData (org.apache.poi.hslf.usermodel.HSLFSoundData)1 HSSFSheet (org.apache.poi.hssf.usermodel.HSSFSheet)1 Paragraph (org.apache.poi.hwpf.usermodel.Paragraph)1 Range (org.apache.poi.hwpf.usermodel.Range)1