use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project poi by apache.
the class ZipPackage method saveImpl.
/**
* Save this package into the specified stream
*
*
* @param outputStream
* The stream use to save this package.
*
* @see #save(OutputStream)
*/
@Override
public void saveImpl(OutputStream outputStream) {
// Check that the document was open in write mode
throwExceptionIfReadOnly();
final ZipOutputStream zos;
try {
if (!(outputStream instanceof ZipOutputStream)) {
zos = new ZipOutputStream(outputStream);
} else {
zos = (ZipOutputStream) outputStream;
}
// we save it as well
if (this.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).size() == 0 && this.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES_ECMA376).size() == 0) {
LOG.log(POILogger.DEBUG, "Save core properties part");
// Ensure that core properties are added if missing
getPackageProperties();
// Add core properties to part list ...
addPackagePart(this.packageProperties);
// ... and to add its relationship ...
this.relationships.addRelationship(this.packageProperties.getPartName().getURI(), TargetMode.INTERNAL, PackageRelationshipTypes.CORE_PROPERTIES, null);
// ... and the content if it has not been added yet.
if (!this.contentTypeManager.isContentTypeRegister(ContentTypes.CORE_PROPERTIES_PART)) {
this.contentTypeManager.addContentType(this.packageProperties.getPartName(), ContentTypes.CORE_PROPERTIES_PART);
}
}
// Save package relationships part.
LOG.log(POILogger.DEBUG, "Save package relationships");
ZipPartMarshaller.marshallRelationshipPart(this.getRelationships(), PackagingURIHelper.PACKAGE_RELATIONSHIPS_ROOT_PART_NAME, zos);
// Save content type part.
LOG.log(POILogger.DEBUG, "Save content types part");
this.contentTypeManager.save(zos);
// Save parts.
for (PackagePart part : getParts()) {
// the source part that will do the job.
if (part.isRelationshipPart()) {
continue;
}
final PackagePartName ppn = part.getPartName();
LOG.log(POILogger.DEBUG, "Save part '" + ZipHelper.getZipItemNameFromOPCName(ppn.getName()) + "'");
PartMarshaller marshaller = partMarshallers.get(part._contentType);
String errMsg = "The part " + ppn.getURI() + " failed to be saved in the stream with marshaller ";
if (marshaller != null) {
if (!marshaller.marshall(part, zos)) {
throw new OpenXML4JException(errMsg + marshaller);
}
} else {
if (!defaultPartMarshaller.marshall(part, zos)) {
throw new OpenXML4JException(errMsg + defaultPartMarshaller);
}
}
}
zos.close();
} catch (OpenXML4JRuntimeException e) {
// no need to wrap this type of Exception
throw e;
} catch (Exception e) {
throw new OpenXML4JRuntimeException("Fail to save: an error occurs while saving the package : " + e.getMessage(), e);
}
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project poi by apache.
the class ExtractorFactory method createExtractor.
/**
* Tries to determine the actual type of file and produces a matching text-extractor for it.
*
* @param pkg An {@link OPCPackage}.
* @return A {@link POIXMLTextExtractor} for the given file.
* @throws IOException If an error occurs while reading the file
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
* @throws XmlException If an XML parsing error occurs.
* @throws IllegalArgumentException If no matching file type could be found.
*/
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
try {
// Check for the normal Office core document
PackageRelationshipCollection core;
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
// If nothing was found, try some of the other OOXML-based core types
if (core.size() == 0) {
// Could it be an OOXML-Strict one?
core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
}
if (core.size() == 0) {
// Could it be a visio one?
core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
if (core.size() == 1)
return new XDGFVisioExtractor(pkg);
}
// Should just be a single core document, complain if not
if (core.size() != 1) {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
}
// Grab the core document part, and try to identify from that
final PackagePart corePart = pkg.getPart(core.getRelationship(0));
final String contentType = corePart.getContentType();
// Is it XSSF?
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
if (rel.getContentType().equals(contentType)) {
if (getPreferEventExtractor()) {
return new XSSFEventBasedExcelExtractor(pkg);
}
return new XSSFExcelExtractor(pkg);
}
}
// Is it XWPF?
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
if (rel.getContentType().equals(contentType)) {
return new XWPFWordExtractor(pkg);
}
}
// Is it XSLF?
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
if (rel.getContentType().equals(contentType)) {
return new XSLFPowerPointExtractor(pkg);
}
}
// special handling for SlideShow-Theme-files,
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
}
// How about xlsb?
for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
if (rel.getContentType().equals(contentType)) {
return new XSSFBEventBasedExcelExtractor(pkg);
}
}
throw new IllegalArgumentException("No supported documents found in the OOXML package (found " + contentType + ")");
} catch (IOException e) {
// ensure that we close the package again if there is an error opening it, however
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
pkg.revert();
throw e;
} catch (OpenXML4JException e) {
// ensure that we close the package again if there is an error opening it, however
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
pkg.revert();
throw e;
} catch (XmlException e) {
// ensure that we close the package again if there is an error opening it, however
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
pkg.revert();
throw e;
} catch (RuntimeException e) {
// ensure that we close the package again if there is an error opening it, however
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
pkg.revert();
throw e;
}
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project poi by apache.
the class TestXSSFBugs method bug54764.
@Test
public void bug54764() throws IOException, OpenXML4JException, XmlException {
OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("54764.xlsx");
// Check the core properties - will be found but empty, due
// to the expansion being too much to be considered valid
POIXMLProperties props = new POIXMLProperties(pkg);
assertEquals(null, props.getCoreProperties().getTitle());
assertEquals(null, props.getCoreProperties().getSubject());
assertEquals(null, props.getCoreProperties().getDescription());
// Now check the spreadsheet itself
try {
new XSSFWorkbook(pkg).close();
fail("Should fail as too much expansion occurs");
} catch (POIXMLException e) {
// Expected
}
pkg.close();
// Try with one with the entities in the Content Types
try {
XSSFTestDataSamples.openSamplePackage("54764-2.xlsx").close();
fail("Should fail as too much expansion occurs");
} catch (Exception e) {
// Expected
}
// Check we can still parse valid files after all that
Workbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.xlsx");
assertEquals(3, wb.getNumberOfSheets());
wb.close();
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project poi by apache.
the class XSSFEventBasedExcelExtractor method getText.
/**
* Processes the file and returns the text
*/
public String getText() {
try {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
XSSFReader xssfReader = new XSSFReader(container);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
StringBuffer text = new StringBuffer();
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
while (iter.hasNext()) {
InputStream stream = iter.next();
if (includeSheetNames) {
text.append(iter.getSheetName());
text.append('\n');
}
CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream);
if (includeHeadersFooters) {
sheetExtractor.appendHeaderText(text);
}
sheetExtractor.appendCellText(text);
if (includeTextBoxes) {
processShapes(iter.getShapes(), text);
}
if (includeHeadersFooters) {
sheetExtractor.appendFooterText(text);
}
sheetExtractor.reset();
stream.close();
}
return text.toString();
} catch (IOException e) {
LOGGER.log(POILogger.WARN, e);
return null;
} catch (SAXException se) {
LOGGER.log(POILogger.WARN, se);
return null;
} catch (OpenXML4JException o4je) {
LOGGER.log(POILogger.WARN, o4je);
return null;
}
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.
the class OOXMLExtractorFactory method parse.
public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
Locale locale = context.get(Locale.class, Locale.getDefault());
ExtractorFactory.setThreadPrefersEventExtractors(true);
try {
OOXMLExtractor extractor;
OPCPackage pkg;
// Locate or Open the OPCPackage for the file
TikaInputStream tis = TikaInputStream.cast(stream);
if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
pkg = (OPCPackage) tis.getOpenContainer();
} else if (tis != null && tis.hasFile()) {
pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ);
tis.setOpenContainer(pkg);
} else {
InputStream shield = new CloseShieldInputStream(stream);
pkg = OPCPackage.open(shield);
}
// Get the type, and ensure it's one we handle
MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
// Not a supported type, delegate to Empty Parser
EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
return;
}
metadata.set(Metadata.CONTENT_TYPE, type.toString());
// Have the appropriate OOXML text extractor picked
POIXMLTextExtractor poiExtractor = null;
// This has already been set by OOXMLParser's call to configure()
// We can rely on this being non-null.
OfficeParserConfig config = context.get(OfficeParserConfig.class);
if (config.getUseSAXDocxExtractor()) {
poiExtractor = trySXWPF(pkg);
}
if (poiExtractor == null && config.getUseSAXPptxExtractor()) {
poiExtractor = trySXSLF(pkg);
}
if (poiExtractor == null) {
poiExtractor = ExtractorFactory.createExtractor(pkg);
}
POIXMLDocument document = poiExtractor.getDocument();
if (poiExtractor instanceof XSSFBEventBasedExcelExtractor) {
extractor = new XSSFBExcelExtractorDecorator(context, poiExtractor, locale);
} else if (poiExtractor instanceof XSSFEventBasedExcelExtractor) {
extractor = new XSSFExcelExtractorDecorator(context, poiExtractor, locale);
} else if (poiExtractor instanceof XWPFEventBasedWordExtractor) {
extractor = new SXWPFWordExtractorDecorator(metadata, context, (XWPFEventBasedWordExtractor) poiExtractor);
metadata.add("X-Parsed-By", XWPFEventBasedWordExtractor.class.getCanonicalName());
} else if (poiExtractor instanceof XSLFEventBasedPowerPointExtractor) {
extractor = new SXSLFPowerPointExtractorDecorator(metadata, context, (XSLFEventBasedPowerPointExtractor) poiExtractor);
metadata.add("X-Parsed-By", XSLFEventBasedPowerPointExtractor.class.getCanonicalName());
} else if (document == null) {
throw new TikaException("Expecting UserModel based POI OOXML extractor with a document, but none found. " + "The extractor returned was a " + poiExtractor);
} else if (document instanceof XMLSlideShow) {
extractor = new XSLFPowerPointExtractorDecorator(context, (org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) poiExtractor);
} else if (document instanceof XWPFDocument) {
extractor = new XWPFWordExtractorDecorator(context, (XWPFWordExtractor) poiExtractor);
} else {
extractor = new POIXMLTextExtractorDecorator(context, poiExtractor);
}
// Get the bulk of the metadata first, so that it's accessible during
// parsing if desired by the client (see TIKA-1109)
extractor.getMetadataExtractor().extract(metadata);
// Extract the text, along with any in-document metadata
extractor.getXHTML(baseHandler, metadata, context);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().startsWith("No supported documents found")) {
throw new TikaException("TIKA-418: RuntimeException while getting content" + " for thmx and xps file types", e);
} else {
throw new TikaException("Error creating OOXML extractor", e);
}
} catch (InvalidFormatException e) {
throw new TikaException("Error creating OOXML extractor", e);
} catch (OpenXML4JException e) {
throw new TikaException("Error creating OOXML extractor", e);
} catch (XmlException e) {
throw new TikaException("Error creating OOXML extractor", e);
}
}
Aggregations