Search in sources :

Example 6 with RecordFormatException

use of org.apache.poi.util.RecordFormatException in project poi by apache.

the class EscherTextboxRecord method serialize.

@Override
public int serialize(int offset, byte[] data, EscherSerializationListener listener) {
    listener.beforeRecordSerialize(offset, getRecordId(), this);
    LittleEndian.putShort(data, offset, getOptions());
    LittleEndian.putShort(data, offset + 2, getRecordId());
    int remainingBytes = thedata.length;
    LittleEndian.putInt(data, offset + 4, remainingBytes);
    System.arraycopy(thedata, 0, data, offset + 8, thedata.length);
    int pos = offset + 8 + thedata.length;
    listener.afterRecordSerialize(pos, getRecordId(), pos - offset, this);
    int size = pos - offset;
    if (size != getRecordSize())
        throw new RecordFormatException(size + " bytes written but getRecordSize() reports " + getRecordSize());
    return size;
}
Also used : RecordFormatException(org.apache.poi.util.RecordFormatException)

Example 7 with RecordFormatException

use of org.apache.poi.util.RecordFormatException in project poi by apache.

the class HSSFShapeFactory method createShapeTree.

/**
     * build shape tree from escher container
     * @param container root escher container from which escher records must be taken
     * @param agg - EscherAggregate
     * @param out - shape container to which shapes must be added
     * @param root - node to create HSSFObjectData shapes
     */
public static void createShapeTree(EscherContainerRecord container, EscherAggregate agg, HSSFShapeContainer out, DirectoryNode root) {
    if (container.getRecordId() == EscherContainerRecord.SPGR_CONTAINER) {
        ObjRecord obj = null;
        EscherClientDataRecord clientData = ((EscherContainerRecord) container.getChild(0)).getChildById(EscherClientDataRecord.RECORD_ID);
        if (null != clientData) {
            obj = (ObjRecord) agg.getShapeToObjMapping().get(clientData);
        }
        HSSFShapeGroup group = new HSSFShapeGroup(container, obj);
        List<EscherContainerRecord> children = container.getChildContainers();
        // skip the first child record, it is group descriptor
        for (int i = 0; i < children.size(); i++) {
            EscherContainerRecord spContainer = children.get(i);
            if (i != 0) {
                createShapeTree(spContainer, agg, group, root);
            }
        }
        out.addShape(group);
    } else if (container.getRecordId() == EscherContainerRecord.SP_CONTAINER) {
        Map<EscherRecord, Record> shapeToObj = agg.getShapeToObjMapping();
        ObjRecord objRecord = null;
        TextObjectRecord txtRecord = null;
        for (EscherRecord record : container) {
            switch(record.getRecordId()) {
                case EscherClientDataRecord.RECORD_ID:
                    objRecord = (ObjRecord) shapeToObj.get(record);
                    break;
                case EscherTextboxRecord.RECORD_ID:
                    txtRecord = (TextObjectRecord) shapeToObj.get(record);
                    break;
                default:
                    break;
            }
        }
        if (objRecord == null) {
            throw new RecordFormatException("EscherClientDataRecord can't be found.");
        }
        if (isEmbeddedObject(objRecord)) {
            HSSFObjectData objectData = new HSSFObjectData(container, objRecord, root);
            out.addShape(objectData);
            return;
        }
        CommonObjectDataSubRecord cmo = (CommonObjectDataSubRecord) objRecord.getSubRecords().get(0);
        final HSSFShape shape;
        switch(cmo.getObjectType()) {
            case CommonObjectDataSubRecord.OBJECT_TYPE_PICTURE:
                shape = new HSSFPicture(container, objRecord);
                break;
            case CommonObjectDataSubRecord.OBJECT_TYPE_RECTANGLE:
                shape = new HSSFSimpleShape(container, objRecord, txtRecord);
                break;
            case CommonObjectDataSubRecord.OBJECT_TYPE_LINE:
                shape = new HSSFSimpleShape(container, objRecord);
                break;
            case CommonObjectDataSubRecord.OBJECT_TYPE_COMBO_BOX:
                shape = new HSSFCombobox(container, objRecord);
                break;
            case CommonObjectDataSubRecord.OBJECT_TYPE_MICROSOFT_OFFICE_DRAWING:
                EscherOptRecord optRecord = container.getChildById(EscherOptRecord.RECORD_ID);
                if (optRecord == null) {
                    shape = new HSSFSimpleShape(container, objRecord, txtRecord);
                } else {
                    EscherProperty property = optRecord.lookup(EscherProperties.GEOMETRY__VERTICES);
                    if (null != property) {
                        shape = new HSSFPolygon(container, objRecord, txtRecord);
                    } else {
                        shape = new HSSFSimpleShape(container, objRecord, txtRecord);
                    }
                }
                break;
            case CommonObjectDataSubRecord.OBJECT_TYPE_TEXT:
                shape = new HSSFTextbox(container, objRecord, txtRecord);
                break;
            case CommonObjectDataSubRecord.OBJECT_TYPE_COMMENT:
                shape = new HSSFComment(container, objRecord, txtRecord, agg.getNoteRecordByObj(objRecord));
                break;
            default:
                shape = new HSSFSimpleShape(container, objRecord, txtRecord);
        }
        out.addShape(shape);
    }
}
Also used : TextObjectRecord(org.apache.poi.hssf.record.TextObjectRecord) CommonObjectDataSubRecord(org.apache.poi.hssf.record.CommonObjectDataSubRecord) EscherRecord(org.apache.poi.ddf.EscherRecord) EscherOptRecord(org.apache.poi.ddf.EscherOptRecord) EscherProperty(org.apache.poi.ddf.EscherProperty) RecordFormatException(org.apache.poi.util.RecordFormatException) ObjRecord(org.apache.poi.hssf.record.ObjRecord) EscherClientDataRecord(org.apache.poi.ddf.EscherClientDataRecord) EscherContainerRecord(org.apache.poi.ddf.EscherContainerRecord) Map(java.util.Map)

Example 8 with RecordFormatException

use of org.apache.poi.util.RecordFormatException in project tika by apache.

the class EMFParser method parse.

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    EmbeddedDocumentExtractor embeddedDocumentExtractor = null;
    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
    try {
        HemfExtractor ex = new HemfExtractor(stream);
        long lastY = -1;
        long lastX = -1;
        //derive this from the font or frame/bounds information
        long fudgeFactorX = 1000;
        StringBuilder buffer = new StringBuilder();
        for (HemfRecord record : ex) {
            if (record.getRecordType() == HemfRecordType.comment) {
                AbstractHemfComment comment = ((HemfCommentRecord) record).getComment();
                if (comment instanceof HemfCommentPublic.MultiFormats) {
                    if (embeddedDocumentExtractor == null) {
                        embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
                    }
                    handleMultiFormats((HemfCommentPublic.MultiFormats) comment, xhtml, embeddedDocumentExtractor);
                } else if (comment instanceof HemfCommentPublic.WindowsMetafile) {
                    if (embeddedDocumentExtractor == null) {
                        embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
                    }
                    handleWMF((HemfCommentPublic.WindowsMetafile) comment, xhtml, embeddedDocumentExtractor);
                }
            } else if (record.getRecordType().equals(HemfRecordType.exttextoutw)) {
                HemfText.ExtTextOutW extTextOutW = (HemfText.ExtTextOutW) record;
                if (lastY > -1 && lastY != extTextOutW.getY()) {
                    xhtml.startElement("p");
                    xhtml.characters(buffer.toString());
                    xhtml.endElement("p");
                    buffer.setLength(0);
                    lastX = -1;
                }
                if (lastX > -1 && extTextOutW.getX() - lastX > fudgeFactorX) {
                    buffer.append(" ");
                }
                String txt = extTextOutW.getText();
                buffer.append(txt);
                lastY = extTextOutW.getY();
                lastX = extTextOutW.getX();
            }
        }
        if (buffer.length() > 0) {
            xhtml.startElement("p");
            xhtml.characters(buffer.toString());
            xhtml.endElement("p");
        }
    } catch (RecordFormatException e) {
        //POI's hemfparser can throw these for "parse exceptions"
        throw new TikaException(e.getMessage(), e);
    } catch (RuntimeException e) {
        //convert Runtime to RecordFormatExceptions
        throw new TikaException(e.getMessage(), e);
    }
    xhtml.endDocument();
}
Also used : TikaException(org.apache.tika.exception.TikaException) EmbeddedDocumentExtractor(org.apache.tika.extractor.EmbeddedDocumentExtractor) HemfRecord(org.apache.poi.hemf.record.HemfRecord) HemfCommentRecord(org.apache.poi.hemf.record.HemfCommentRecord) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) HemfText(org.apache.poi.hemf.record.HemfText) RecordFormatException(org.apache.poi.util.RecordFormatException) AbstractHemfComment(org.apache.poi.hemf.record.AbstractHemfComment) HemfCommentPublic(org.apache.poi.hemf.record.HemfCommentPublic) HemfExtractor(org.apache.poi.hemf.extractor.HemfExtractor)

Example 9 with RecordFormatException

use of org.apache.poi.util.RecordFormatException in project tika by apache.

the class WMFParser method parse.

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
    try {
        HwmfPicture picture = new HwmfPicture(stream);
        //to determine when to keep two text parts on the same line
        for (HwmfRecord record : picture.getRecords()) {
            Charset charset = LocaleUtil.CHARSET_1252;
            //This fix should be done within POI
            if (record.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
                HwmfFont font = ((HwmfText.WmfCreateFontIndirect) record).getFont();
                charset = (font.getCharSet() == null || font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
            }
            if (record.getRecordType().equals(HwmfRecordType.extTextOut)) {
                HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut) record;
                xhtml.startElement("p");
                xhtml.characters(textOut.getText(charset));
                xhtml.endElement("p");
            } else if (record.getRecordType().equals(HwmfRecordType.textOut)) {
                HwmfText.WmfTextOut textOut = (HwmfText.WmfTextOut) record;
                xhtml.startElement("p");
                xhtml.characters(textOut.getText(charset));
                xhtml.endElement("p");
            }
        }
    } catch (RecordFormatException e) {
        //POI's hwmfparser can throw these for "parse exceptions"
        throw new TikaException(e.getMessage(), e);
    } catch (RuntimeException e) {
        //convert Runtime to RecordFormatExceptions
        throw new TikaException(e.getMessage(), e);
    } catch (AssertionError e) {
        //POI's hwmfparser can throw these for parse exceptions
        throw new TikaException(e.getMessage(), e);
    }
    xhtml.endDocument();
}
Also used : TikaException(org.apache.tika.exception.TikaException) HwmfRecord(org.apache.poi.hwmf.record.HwmfRecord) Charset(java.nio.charset.Charset) HwmfText(org.apache.poi.hwmf.record.HwmfText) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) HwmfFont(org.apache.poi.hwmf.record.HwmfFont) HwmfPicture(org.apache.poi.hwmf.usermodel.HwmfPicture) RecordFormatException(org.apache.poi.util.RecordFormatException)

Aggregations

RecordFormatException (org.apache.poi.util.RecordFormatException)9 EscherContainerRecord (org.apache.poi.ddf.EscherContainerRecord)3 EscherOptRecord (org.apache.poi.ddf.EscherOptRecord)3 EscherRecord (org.apache.poi.ddf.EscherRecord)3 EscherBSERecord (org.apache.poi.ddf.EscherBSERecord)2 EscherDgRecord (org.apache.poi.ddf.EscherDgRecord)2 EscherDggRecord (org.apache.poi.ddf.EscherDggRecord)2 EscherSpRecord (org.apache.poi.ddf.EscherSpRecord)2 TikaException (org.apache.tika.exception.TikaException)2 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)2 Charset (java.nio.charset.Charset)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 EscherClientDataRecord (org.apache.poi.ddf.EscherClientDataRecord)1 EscherProperty (org.apache.poi.ddf.EscherProperty)1 EscherSimpleProperty (org.apache.poi.ddf.EscherSimpleProperty)1 EscherSplitMenuColorsRecord (org.apache.poi.ddf.EscherSplitMenuColorsRecord)1 HemfExtractor (org.apache.poi.hemf.extractor.HemfExtractor)1 AbstractHemfComment (org.apache.poi.hemf.record.AbstractHemfComment)1 HemfCommentPublic (org.apache.poi.hemf.record.HemfCommentPublic)1