use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.
the class XSSFBExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
XSSFBSharedStringsTable strings;
XSSFBReader.SheetIterator iter;
XSSFBReader xssfReader;
XSSFBStylesTable styles;
try {
xssfReader = new XSSFBReader(container);
styles = xssfReader.getXSSFBStylesTable();
iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
strings = new XSSFBSharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
while (iter.hasNext()) {
InputStream stream = iter.next();
PackagePart sheetPart = iter.getSheetPart();
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
XSSFBCommentsTable comments = iter.getXSSFBSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.
the class XSSFExcelExtractorDecorator method buildXHTML.
/**
* @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
OPCPackage container = extractor.getPackage();
ReadOnlySharedStringsTable strings;
XSSFReader.SheetIterator iter;
XSSFReader xssfReader;
StylesTable styles;
try {
xssfReader = new XSSFReader(container);
styles = xssfReader.getStylesTable();
iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
strings = new ReadOnlySharedStringsTable(container);
} catch (InvalidFormatException e) {
throw new XmlException(e);
} catch (OpenXML4JException oe) {
throw new XmlException(oe);
}
//temporary workaround for POI-61034
//remove once POI 3.17-beta1 is released
Set<String> seen = new HashSet<>();
while (iter.hasNext()) {
SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(xhtml);
PackagePart sheetPart = null;
try (InputStream stream = iter.next()) {
sheetPart = iter.getSheetPart();
final String partName = sheetPart.getPartName().toString();
if (seen.contains(partName)) {
continue;
}
seen.add(partName);
addDrawingHyperLinks(sheetPart);
sheetParts.add(sheetPart);
CommentsTable comments = iter.getSheetComments();
// Start, and output the sheet name
xhtml.startElement("div");
xhtml.element("h1", iter.getSheetName());
// Extract the main sheet contents
xhtml.startElement("table");
xhtml.startElement("tbody");
processSheet(sheetExtractor, comments, styles, strings, stream);
}
xhtml.endElement("tbody");
xhtml.endElement("table");
// do the headers before the contents)
for (String header : sheetExtractor.headers) {
extractHeaderFooter(header, xhtml);
}
for (String footer : sheetExtractor.footers) {
extractHeaderFooter(footer, xhtml);
}
// Do text held in shapes, if required
if (config.getIncludeShapeBasedContent()) {
List<XSSFShape> shapes = iter.getShapes();
processShapes(shapes, xhtml);
}
//for now dump sheet hyperlinks at bottom of page
//consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
//step 1: extract hyperlink info from bottom of page
//step 2: process as we do now, but with cached hyperlink relationship info
extractHyperLinks(sheetPart, xhtml);
// All done with this sheet
xhtml.endElement("div");
}
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project tika by apache.
the class XWPFEventBasedWordExtractor method loadNumbering.
private XWPFNumbering loadNumbering(PackagePart packagePart) {
try {
PackageRelationshipCollection numberingParts = packagePart.getRelationshipsByType(XWPFRelation.NUMBERING.getRelation());
if (numberingParts.size() > 0) {
PackageRelationship numberingRelationShip = numberingParts.getRelationship(0);
if (numberingRelationShip == null) {
return null;
}
PackagePart numberingPart = container.getPart(numberingRelationShip);
if (numberingPart == null) {
return null;
}
return new XWPFNumbering(numberingPart);
}
} catch (IOException | OpenXML4JException e) {
LOG.warn("Couldn't load numbering", e);
}
return null;
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project translationstudio8 by heartsome.
the class Xlsx2TbxConverter method doConvert.
@Override
public void doConvert(String targetFile, IProgressMonitor monitor) throws Exception {
try {
out = new FileOutputStream(new File(targetFile));
XlsxRowReader reader = new XlsxRowReader(this.xlsxFile, 20, handler);
try {
reader.readRows(monitor);
} catch (ParserConfigurationException e) {
LOGGER.error("", e);
throw new Exception(Messages.getString("converter.xlsx2tmx.parseExcle.error"));
} catch (SAXException e) {
LOGGER.error("", e);
if ("LANG-CODE-ERORR".equals(e.getMessage())) {
throw new Exception(Messages.getString("converter.common.vaild.langcode.error"));
} else if ("DIFF--SRC-LANG-CODE".equals(e.getMessage())) {
throw new Exception(Messages.getString("converter.common.appendtmx.diffsrcLang.error"));
} else if ("EMPTY-LANG-CODE".equals(e.getMessage())) {
throw new Exception(Messages.getString("converter.common.vaild.langcode.error"));
} else if ("DUPLICATE-LANG-CODE-ERORR".equals(e.getMessage())) {
throw new Exception(Messages.getString("converter.common.vaild.duplicatelangcode.error"));
}
} catch (IOException e) {
LOGGER.error("", e);
throw new Exception(Messages.getString("converter.xlsx2tmx.parseExcle.error"));
} catch (OpenXML4JException e) {
LOGGER.error("", e);
throw new Exception(Messages.getString("converter.xlsx2tmx.parseExcle.error"));
}
String s = generateTbxEnd();
if (s != null && s.length() != 0) {
writeString(s);
}
} catch (FileNotFoundException e) {
LOGGER.error("", e);
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
}
}
}
}
use of org.apache.poi.openxml4j.exceptions.OpenXML4JException in project poi by apache.
the class ZipPackagePropertiesMarshaller method marshall.
@Override
public boolean marshall(PackagePart part, OutputStream out) throws OpenXML4JException {
if (!(out instanceof ZipOutputStream)) {
throw new IllegalArgumentException("ZipOutputStream expected!");
}
ZipOutputStream zos = (ZipOutputStream) out;
// Saving the part in the zip file
ZipEntry ctEntry = new ZipEntry(ZipHelper.getZipItemNameFromOPCName(part.getPartName().getURI().toString()));
try {
// Save in ZIP
// Add entry in ZIP
zos.putNextEntry(ctEntry);
// Marshall the properties inside a XML
super.marshall(part, out);
// Document
if (!StreamHelper.saveXmlInStream(xmlDoc, out)) {
return false;
}
zos.closeEntry();
} catch (IOException e) {
throw new OpenXML4JException(e.getLocalizedMessage(), e);
}
return true;
}
Aggregations