use of org.apache.poi.hslf.model.OLEShape in project poi by apache.
the class DataExtraction method main.
public static void main(String[] args) throws Exception {
if (args.length == 0) {
usage();
return;
}
FileInputStream is = new FileInputStream(args[0]);
HSLFSlideShow ppt = new HSLFSlideShow(is);
is.close();
//extract all sound files embedded in this presentation
HSLFSoundData[] sound = ppt.getSoundData();
for (int i = 0; i < sound.length; i++) {
//*.wav
String type = sound[i].getSoundType();
//typically file name
String name = sound[i].getSoundName();
//raw bytes
byte[] data = sound[i].getData();
//save the sound on disk
FileOutputStream out = new FileOutputStream(name + type);
out.write(data);
out.close();
}
int oleIdx = -1, picIdx = -1;
for (HSLFSlide slide : ppt.getSlides()) {
//extract embedded OLE documents
for (HSLFShape shape : slide.getShapes()) {
if (shape instanceof OLEShape) {
oleIdx++;
OLEShape ole = (OLEShape) shape;
HSLFObjectData data = ole.getObjectData();
String name = ole.getInstanceName();
if ("Worksheet".equals(name)) {
//read xls
@SuppressWarnings({ "unused", "resource" }) HSSFWorkbook wb = new HSSFWorkbook(data.getData());
} else if ("Document".equals(name)) {
HWPFDocument doc = new HWPFDocument(data.getData());
//read the word document
Range r = doc.getRange();
for (int k = 0; k < r.numParagraphs(); k++) {
Paragraph p = r.getParagraph(k);
System.out.println(p.text());
}
//save on disk
FileOutputStream out = new FileOutputStream(name + "-(" + (oleIdx) + ").doc");
doc.write(out);
out.close();
doc.close();
} else {
FileOutputStream out = new FileOutputStream(ole.getProgID() + "-" + (oleIdx + 1) + ".dat");
InputStream dis = data.getData();
byte[] chunk = new byte[2048];
int count;
while ((count = dis.read(chunk)) >= 0) {
out.write(chunk, 0, count);
}
is.close();
out.close();
}
} else //Pictures
if (shape instanceof HSLFPictureShape) {
picIdx++;
HSLFPictureShape p = (HSLFPictureShape) shape;
HSLFPictureData data = p.getPictureData();
String ext = data.getType().extension;
FileOutputStream out = new FileOutputStream("pict-" + picIdx + ext);
out.write(data.getData());
out.close();
}
}
}
ppt.close();
}
use of org.apache.poi.hslf.model.OLEShape in project poi by apache.
the class TestExtractor method test52991.
/**
* A powerpoint file with embeded powerpoint files
*/
@Test
public void test52991() throws IOException {
PowerPointExtractor ppe = openExtractor("badzip.ppt");
for (OLEShape shape : ppe.getOLEShapes()) {
IOUtils.copy(shape.getObjectData().getData(), new ByteArrayOutputStream());
}
ppe.close();
}
use of org.apache.poi.hslf.model.OLEShape in project poi by apache.
the class HSLFShapeFactory method createFrame.
private static HSLFShape createFrame(EscherContainerRecord spContainer, ShapeContainer<HSLFShape, HSLFTextParagraph> parent) {
InteractiveInfo info = getClientDataRecord(spContainer, RecordTypes.InteractiveInfo.typeID);
if (info != null && info.getInteractiveInfoAtom() != null) {
switch(info.getInteractiveInfoAtom().getAction()) {
case InteractiveInfoAtom.ACTION_OLE:
return new OLEShape(spContainer, parent);
case InteractiveInfoAtom.ACTION_MEDIA:
return new MovieShape(spContainer, parent);
default:
break;
}
}
ExObjRefAtom oes = getClientDataRecord(spContainer, RecordTypes.ExObjRefAtom.typeID);
return (oes != null) ? new OLEShape(spContainer, parent) : new HSLFPictureShape(spContainer, parent);
}
use of org.apache.poi.hslf.model.OLEShape in project tika by apache.
the class HSLFExtractor method handleSlideEmbeddedResources.
private void handleSlideEmbeddedResources(HSLFSlide slide, XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException {
List<HSLFShape> shapes;
try {
shapes = slide.getShapes();
} catch (NullPointerException e) {
// Sometimes HSLF hits problems
// Please open POI bugs for any you come across!
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
for (HSLFShape shape : shapes) {
if (shape instanceof OLEShape) {
OLEShape oleShape = (OLEShape) shape;
HSLFObjectData data = null;
try {
data = oleShape.getObjectData();
} catch (NullPointerException e) {
/* getObjectData throws NPE some times. */
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
continue;
}
if (data != null) {
String objID = Integer.toString(oleShape.getObjectID());
// Embedded Object: add a <div
// class="embedded" id="X"/> so consumer can see where
// in the main text each embedded document
// occurred:
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", objID);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
InputStream dataStream = null;
try {
dataStream = data.getData();
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
continue;
}
try (TikaInputStream stream = TikaInputStream.get(dataStream)) {
String mediaType = null;
if ("Excel.Chart.8".equals(oleShape.getProgID())) {
mediaType = "application/vnd.ms-excel";
} else {
MediaType mt = getTikaConfig().getDetector().detect(stream, new Metadata());
mediaType = mt.toString();
}
if (mediaType.equals("application/x-tika-msoffice-embedded; format=comp_obj")) {
try (NPOIFSFileSystem npoifs = new NPOIFSFileSystem(new CloseShieldInputStream(stream))) {
handleEmbeddedOfficeDoc(npoifs.getRoot(), objID, xhtml);
}
} else {
handleEmbeddedResource(stream, objID, objID, mediaType, xhtml, false);
}
} catch (IOException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
}
}
}
}
}
use of org.apache.poi.hslf.model.OLEShape in project poi by apache.
the class TestExtractor method testExtractFromOwnEmbeded.
/**
* A powerpoint file with embeded powerpoint files
*/
@Test
public void testExtractFromOwnEmbeded() throws IOException {
PowerPointExtractor ppe = openExtractor("ppt_with_embeded.ppt");
List<OLEShape> shapes = ppe.getOLEShapes();
assertEquals("Expected 6 ole shapes", 6, shapes.size());
int num_ppt = 0, num_doc = 0, num_xls = 0;
for (OLEShape ole : shapes) {
String name = ole.getInstanceName();
InputStream data = ole.getObjectData().getData();
if ("Worksheet".equals(name)) {
HSSFWorkbook wb = new HSSFWorkbook(data);
num_xls++;
wb.close();
} else if ("Document".equals(name)) {
HWPFDocument doc = new HWPFDocument(data);
num_doc++;
doc.close();
} else if ("Presentation".equals(name)) {
num_ppt++;
HSLFSlideShow ppt = new HSLFSlideShow(data);
ppt.close();
}
data.close();
}
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
ppe.close();
}
Aggregations