use of org.apache.xmlbeans.XmlException in project poi by apache.
the class XWPFHeader method onDocumentRead.
/**
* reads the document
*
* @throws IOException
*/
@Override
protected void onDocumentRead() throws IOException {
super.onDocumentRead();
HdrDocument hdrDocument = null;
InputStream is = null;
try {
is = getPackagePart().getInputStream();
hdrDocument = HdrDocument.Factory.parse(is, DEFAULT_XML_OPTIONS);
headerFooter = hdrDocument.getHdr();
// parse the document with cursor and add
// the XmlObject to its lists
XmlCursor cursor = headerFooter.newCursor();
cursor.selectPath("./*");
while (cursor.toNextSelection()) {
XmlObject o = cursor.getObject();
if (o instanceof CTP) {
XWPFParagraph p = new XWPFParagraph((CTP) o, this);
paragraphs.add(p);
bodyElements.add(p);
}
if (o instanceof CTTbl) {
XWPFTable t = new XWPFTable((CTTbl) o, this);
tables.add(t);
bodyElements.add(t);
}
if (o instanceof CTSdtBlock) {
XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this);
bodyElements.add(c);
}
}
cursor.dispose();
} catch (XmlException e) {
throw new POIXMLException(e);
} finally {
if (is != null) {
is.close();
}
}
}
use of org.apache.xmlbeans.XmlException in project tika by apache.
the class OOXMLExtractorFactory method parse.
public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
Locale locale = context.get(Locale.class, Locale.getDefault());
ExtractorFactory.setThreadPrefersEventExtractors(true);
try {
OOXMLExtractor extractor;
OPCPackage pkg;
// Locate or Open the OPCPackage for the file
TikaInputStream tis = TikaInputStream.cast(stream);
if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
pkg = (OPCPackage) tis.getOpenContainer();
} else if (tis != null && tis.hasFile()) {
pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ);
tis.setOpenContainer(pkg);
} else {
InputStream shield = new CloseShieldInputStream(stream);
pkg = OPCPackage.open(shield);
}
// Get the type, and ensure it's one we handle
MediaType type = ZipContainerDetector.detectOfficeOpenXML(pkg);
if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) {
// Not a supported type, delegate to Empty Parser
EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context);
return;
}
metadata.set(Metadata.CONTENT_TYPE, type.toString());
// Have the appropriate OOXML text extractor picked
POIXMLTextExtractor poiExtractor = null;
// This has already been set by OOXMLParser's call to configure()
// We can rely on this being non-null.
OfficeParserConfig config = context.get(OfficeParserConfig.class);
if (config.getUseSAXDocxExtractor()) {
poiExtractor = trySXWPF(pkg);
}
if (poiExtractor == null && config.getUseSAXPptxExtractor()) {
poiExtractor = trySXSLF(pkg);
}
if (poiExtractor == null) {
poiExtractor = ExtractorFactory.createExtractor(pkg);
}
POIXMLDocument document = poiExtractor.getDocument();
if (poiExtractor instanceof XSSFBEventBasedExcelExtractor) {
extractor = new XSSFBExcelExtractorDecorator(context, poiExtractor, locale);
} else if (poiExtractor instanceof XSSFEventBasedExcelExtractor) {
extractor = new XSSFExcelExtractorDecorator(context, poiExtractor, locale);
} else if (poiExtractor instanceof XWPFEventBasedWordExtractor) {
extractor = new SXWPFWordExtractorDecorator(metadata, context, (XWPFEventBasedWordExtractor) poiExtractor);
metadata.add("X-Parsed-By", XWPFEventBasedWordExtractor.class.getCanonicalName());
} else if (poiExtractor instanceof XSLFEventBasedPowerPointExtractor) {
extractor = new SXSLFPowerPointExtractorDecorator(metadata, context, (XSLFEventBasedPowerPointExtractor) poiExtractor);
metadata.add("X-Parsed-By", XSLFEventBasedPowerPointExtractor.class.getCanonicalName());
} else if (document == null) {
throw new TikaException("Expecting UserModel based POI OOXML extractor with a document, but none found. " + "The extractor returned was a " + poiExtractor);
} else if (document instanceof XMLSlideShow) {
extractor = new XSLFPowerPointExtractorDecorator(context, (org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) poiExtractor);
} else if (document instanceof XWPFDocument) {
extractor = new XWPFWordExtractorDecorator(context, (XWPFWordExtractor) poiExtractor);
} else {
extractor = new POIXMLTextExtractorDecorator(context, poiExtractor);
}
// Get the bulk of the metadata first, so that it's accessible during
// parsing if desired by the client (see TIKA-1109)
extractor.getMetadataExtractor().extract(metadata);
// Extract the text, along with any in-document metadata
extractor.getXHTML(baseHandler, metadata, context);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().startsWith("No supported documents found")) {
throw new TikaException("TIKA-418: RuntimeException while getting content" + " for thmx and xps file types", e);
} else {
throw new TikaException("Error creating OOXML extractor", e);
}
} catch (InvalidFormatException e) {
throw new TikaException("Error creating OOXML extractor", e);
} catch (OpenXML4JException e) {
throw new TikaException("Error creating OOXML extractor", e);
} catch (XmlException e) {
throw new TikaException("Error creating OOXML extractor", e);
}
}
use of org.apache.xmlbeans.XmlException in project tika by apache.
the class XSLFPowerPointExtractorDecorator method getMainDocumentParts.
/**
* In PowerPoint files, slides have things embedded in them,
* and slide drawings which have the images
*/
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
List<PackagePart> parts = new ArrayList<>();
XSLFSlideShow document = null;
try {
document = new XSLFSlideShow(extractor.getPackage());
} catch (Exception e) {
// Shouldn't happen
throw new TikaException(e.getMessage());
}
CTSlideIdList ctSlideIdList = document.getSlideReferences();
if (ctSlideIdList != null) {
for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) {
CTSlideIdListEntry ctSlide = ctSlideIdList.getSldIdArray(i);
// Add the slide
PackagePart slidePart;
try {
slidePart = document.getSlidePart(ctSlide);
} catch (IOException e) {
throw new TikaException("Broken OOXML file", e);
} catch (XmlException xe) {
throw new TikaException("Broken OOXML file", xe);
}
addSlideParts(slidePart, parts);
}
}
//add full document to include macros
parts.add(document.getPackagePart());
for (String rel : new String[] { XSLFRelation.SLIDE_MASTER.getRelation(), HANDOUT_MASTER }) {
try {
PackageRelationshipCollection prc = document.getPackagePart().getRelationshipsByType(rel);
for (int i = 0; i < prc.size(); i++) {
PackagePart pp = document.getPackagePart().getRelatedPart(prc.getRelationship(i));
if (pp != null) {
parts.add(pp);
}
}
} catch (InvalidFormatException e) {
//log
}
}
return parts;
}
use of org.apache.xmlbeans.XmlException in project poi by apache.
the class XSSFCellStyle method cloneStyleFrom.
/**
* Clones all the style information from another
* XSSFCellStyle, onto this one. This
* XSSFCellStyle will then have all the same
* properties as the source, but the two may
* be edited independently.
* Any stylings on this XSSFCellStyle will be lost!
*
* The source XSSFCellStyle could be from another
* XSSFWorkbook if you like. This allows you to
* copy styles from one XSSFWorkbook to another.
*/
@Override
public void cloneStyleFrom(CellStyle source) {
if (source instanceof XSSFCellStyle) {
XSSFCellStyle src = (XSSFCellStyle) source;
// Is it on our Workbook?
if (src._stylesSource == _stylesSource) {
// Nice and easy
_cellXf.set(src.getCoreXf());
_cellStyleXf.set(src.getStyleXf());
} else {
// Copy the style
try {
// avoid orphaned nodes
if (_cellXf.isSetAlignment())
_cellXf.unsetAlignment();
if (_cellXf.isSetExtLst())
_cellXf.unsetExtLst();
// Create a new Xf with the same contents
_cellXf = CTXf.Factory.parse(src.getCoreXf().toString(), DEFAULT_XML_OPTIONS);
// bug 56295: ensure that the fills is available and set correctly
CTFill fill = CTFill.Factory.parse(src.getCTFill().toString(), DEFAULT_XML_OPTIONS);
addFill(fill);
// bug 58084: set borders correctly
CTBorder border = CTBorder.Factory.parse(src.getCTBorder().toString(), DEFAULT_XML_OPTIONS);
addBorder(border);
// Swap it over
_stylesSource.replaceCellXfAt(_cellXfId, _cellXf);
} catch (XmlException e) {
throw new POIXMLException(e);
}
// Copy the format
String fmt = src.getDataFormatString();
setDataFormat((new XSSFDataFormat(_stylesSource)).getFormat(fmt));
// Copy the font
try {
CTFont ctFont = CTFont.Factory.parse(src.getFont().getCTFont().toString(), DEFAULT_XML_OPTIONS);
XSSFFont font = new XSSFFont(ctFont);
font.registerTo(_stylesSource);
setFont(font);
} catch (XmlException e) {
throw new POIXMLException(e);
}
}
// Clear out cached details
_font = null;
_cellAlignment = null;
} else {
throw new IllegalArgumentException("Can only clone from one XSSFCellStyle to another, not between HSSFCellStyle and XSSFCellStyle");
}
}
use of org.apache.xmlbeans.XmlException in project poi by apache.
the class CalculationChain method readFrom.
public void readFrom(InputStream is) throws IOException {
try {
CalcChainDocument doc = CalcChainDocument.Factory.parse(is, DEFAULT_XML_OPTIONS);
chain = doc.getCalcChain();
} catch (XmlException e) {
throw new IOException(e.getLocalizedMessage());
}
}
Aggregations