use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.
the class DataExtraction method main.
public static void main(String[] args) throws Exception {
if (args.length == 0) {
usage();
return;
}
FileInputStream is = new FileInputStream(args[0]);
HSLFSlideShow ppt = new HSLFSlideShow(is);
is.close();
//extract all sound files embedded in this presentation
HSLFSoundData[] sound = ppt.getSoundData();
for (int i = 0; i < sound.length; i++) {
//*.wav
String type = sound[i].getSoundType();
//typically file name
String name = sound[i].getSoundName();
//raw bytes
byte[] data = sound[i].getData();
//save the sound on disk
FileOutputStream out = new FileOutputStream(name + type);
out.write(data);
out.close();
}
int oleIdx = -1, picIdx = -1;
for (HSLFSlide slide : ppt.getSlides()) {
//extract embedded OLE documents
for (HSLFShape shape : slide.getShapes()) {
if (shape instanceof OLEShape) {
oleIdx++;
OLEShape ole = (OLEShape) shape;
HSLFObjectData data = ole.getObjectData();
String name = ole.getInstanceName();
if ("Worksheet".equals(name)) {
//read xls
@SuppressWarnings({ "unused", "resource" }) HSSFWorkbook wb = new HSSFWorkbook(data.getData());
} else if ("Document".equals(name)) {
HWPFDocument doc = new HWPFDocument(data.getData());
//read the word document
Range r = doc.getRange();
for (int k = 0; k < r.numParagraphs(); k++) {
Paragraph p = r.getParagraph(k);
System.out.println(p.text());
}
//save on disk
FileOutputStream out = new FileOutputStream(name + "-(" + (oleIdx) + ").doc");
doc.write(out);
out.close();
doc.close();
} else {
FileOutputStream out = new FileOutputStream(ole.getProgID() + "-" + (oleIdx + 1) + ".dat");
InputStream dis = data.getData();
byte[] chunk = new byte[2048];
int count;
while ((count = dis.read(chunk)) >= 0) {
out.write(chunk, 0, count);
}
is.close();
out.close();
}
} else //Pictures
if (shape instanceof HSLFPictureShape) {
picIdx++;
HSLFPictureShape p = (HSLFPictureShape) shape;
HSLFPictureData data = p.getPictureData();
String ext = data.getType().extension;
FileOutputStream out = new FileOutputStream("pict-" + picIdx + ext);
out.write(data.getData());
out.close();
}
}
}
ppt.close();
}
use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.
the class TestOleEmbedding method testOleEmbedding2003.
/**
* Tests support for OLE objects.
*
* @throws Exception if an error occurs.
*/
@Test
public void testOleEmbedding2003() throws IOException {
HSLFSlideShowImpl slideShow = new HSLFSlideShowImpl(_slTests.openResourceAsStream("ole2-embedding-2003.ppt"));
// Placeholder EMFs for clients that don't support the OLE components.
List<HSLFPictureData> pictures = slideShow.getPictureData();
assertEquals("Should be two pictures", 2, pictures.size());
long[] checkSums = { 0xD37A4204l, 0x26A62F68l, 0x82853169l, 0xE0E45D2Bl };
int checkId = 0;
// check for checksum to be uptodate
for (HSLFPictureData pd : pictures) {
long checkEMF = IOUtils.calculateChecksum(pd.getData());
assertEquals(checkSums[checkId++], checkEMF);
}
// Actual embedded objects.
HSLFObjectData[] objects = slideShow.getEmbeddedObjects();
assertEquals("Should be two objects", 2, objects.length);
for (HSLFObjectData od : objects) {
long checkEMF = IOUtils.calculateChecksum(od.getData());
assertEquals(checkSums[checkId++], checkEMF);
}
slideShow.close();
}
use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.
the class OLEShape method getObjectData.
/**
* Returns unique identifier for the OLE object.
*
* @return the unique identifier for the OLE object
*/
@SuppressWarnings("resource")
public HSLFObjectData getObjectData() {
HSLFSlideShow ppt = getSheet().getSlideShow();
HSLFObjectData[] ole = ppt.getEmbeddedObjects();
//persist reference
ExEmbed exEmbed = getExEmbed();
HSLFObjectData data = null;
if (exEmbed != null) {
int ref = exEmbed.getExOleObjAtom().getObjStgDataRef();
for (int i = 0; i < ole.length; i++) {
if (ole[i].getExOleObjStg().getPersistId() == ref) {
data = ole[i];
}
}
}
if (data == null) {
LOG.log(POILogger.WARN, "OLE data not found");
}
return data;
}
use of org.apache.poi.hslf.usermodel.HSLFObjectData in project poi by apache.
the class TestOleEmbedding method testOLEShape.
@Test
public void testOLEShape() throws IOException {
HSLFSlideShow ppt = new HSLFSlideShow(_slTests.openResourceAsStream("ole2-embedding-2003.ppt"));
HSLFSlide slide = ppt.getSlides().get(0);
int cnt = 0;
for (HSLFShape sh : slide.getShapes()) {
if (sh instanceof OLEShape) {
cnt++;
OLEShape ole = (OLEShape) sh;
HSLFObjectData data = ole.getObjectData();
if ("Worksheet".equals(ole.getInstanceName())) {
//Voila! we created a workbook from the embedded OLE data
HSSFWorkbook wb = new HSSFWorkbook(data.getData());
HSSFSheet sheet = wb.getSheetAt(0);
//verify we can access the xls data
assertEquals(1, sheet.getRow(0).getCell(0).getNumericCellValue(), 0);
assertEquals(1, sheet.getRow(1).getCell(0).getNumericCellValue(), 0);
assertEquals(2, sheet.getRow(2).getCell(0).getNumericCellValue(), 0);
assertEquals(3, sheet.getRow(3).getCell(0).getNumericCellValue(), 0);
assertEquals(8, sheet.getRow(5).getCell(0).getNumericCellValue(), 0);
wb.close();
} else if ("Document".equals(ole.getInstanceName())) {
//creating a HWPF document
HWPFDocument doc = new HWPFDocument(data.getData());
String txt = doc.getRange().getParagraph(0).text();
assertEquals("OLE embedding is thoroughly unremarkable.\r", txt);
doc.close();
}
}
}
assertEquals("Expected 2 OLE shapes", 2, cnt);
ppt.close();
}
use of org.apache.poi.hslf.usermodel.HSLFObjectData in project tika by apache.
the class HSLFExtractor method handleSlideEmbeddedResources.
private void handleSlideEmbeddedResources(HSLFSlide slide, XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException {
List<HSLFShape> shapes;
try {
shapes = slide.getShapes();
} catch (NullPointerException e) {
// Sometimes HSLF hits problems
// Please open POI bugs for any you come across!
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
for (HSLFShape shape : shapes) {
if (shape instanceof OLEShape) {
OLEShape oleShape = (OLEShape) shape;
HSLFObjectData data = null;
try {
data = oleShape.getObjectData();
} catch (NullPointerException e) {
/* getObjectData throws NPE some times. */
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
continue;
}
if (data != null) {
String objID = Integer.toString(oleShape.getObjectID());
// Embedded Object: add a <div
// class="embedded" id="X"/> so consumer can see where
// in the main text each embedded document
// occurred:
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", objID);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
InputStream dataStream = null;
try {
dataStream = data.getData();
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
continue;
}
try (TikaInputStream stream = TikaInputStream.get(dataStream)) {
String mediaType = null;
if ("Excel.Chart.8".equals(oleShape.getProgID())) {
mediaType = "application/vnd.ms-excel";
} else {
MediaType mt = getTikaConfig().getDetector().detect(stream, new Metadata());
mediaType = mt.toString();
}
if (mediaType.equals("application/x-tika-msoffice-embedded; format=comp_obj")) {
try (NPOIFSFileSystem npoifs = new NPOIFSFileSystem(new CloseShieldInputStream(stream))) {
handleEmbeddedOfficeDoc(npoifs.getRoot(), objID, xhtml);
}
} else {
handleEmbeddedResource(stream, objID, objID, mediaType, xhtml, false);
}
} catch (IOException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
}
}
}
}
}
Aggregations