Search in sources :

Example 6 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmLzxcResetTable method parse.

// @Override
public void parse(byte[] data, ChmLzxcResetTable chmLzxcResetTable) throws TikaException {
    setDataRemained(data.length);
    if (validateParamaters(data, chmLzxcResetTable)) {
        /* unmarshal fields */
        chmLzxcResetTable.setVersion(unmarshalUInt32(data, chmLzxcResetTable.getVersion()));
        chmLzxcResetTable.setBlockCount(unmarshalUInt32(data, chmLzxcResetTable.getBlockCount()));
        chmLzxcResetTable.setUnknown(unmarshalUInt32(data, chmLzxcResetTable.getUnknown()));
        chmLzxcResetTable.setTableOffset(unmarshalUInt32(data, chmLzxcResetTable.getTableOffset()));
        chmLzxcResetTable.setUncompressedLen(unmarshalUint64(data, chmLzxcResetTable.getUncompressedLen()));
        chmLzxcResetTable.setCompressedLen(unmarshalUint64(data, chmLzxcResetTable.getCompressedLen()));
        chmLzxcResetTable.setBlockLlen(unmarshalUint64(data, chmLzxcResetTable.getBlockLen()));
        chmLzxcResetTable.setBlockAddress(enumerateBlockAddresses(data));
    }
    /* checks chmLzxcResetTable */
    if (chmLzxcResetTable.getVersion() != ChmConstants.CHM_VER_2)
        throw new ChmParsingException("does not seem currect version of chmLzxcResetTable");
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Example 7 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmItsfHeader method parse.

// @Override
public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
    if (data.length < ChmConstants.CHM_ITSF_V2_LEN || data.length > ChmConstants.CHM_ITSF_V3_LEN)
        throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
    chmItsfHeader.setDataRemained(data.length);
    chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
    chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
    chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
    chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
    chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
    chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
    chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
    chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
    chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
    chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
    chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
    chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
    if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
        throw new TikaException("seems not valid file");
    if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
        if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
            throw new TikaException("something wrong with header");
    } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
        if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
            throw new TikaException("unknown v3 header lenght");
    } else
        throw new ChmParsingException("unsupported chm format");
    /*
         * now, if we have a V3 structure, unmarshal the rest, otherwise,
         * compute it
         */
    if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
        if (chmItsfHeader.getDataRemained() >= 0)
            chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
        else
            throw new TikaException("cannot set data offset, no data remained");
    } else
        chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException) TikaException(org.apache.tika.exception.TikaException)

Example 8 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmItspHeader method parse.

// @Override
public void parse(byte[] data, ChmItspHeader chmItspHeader) throws TikaException {
    /* we only know how to deal with the 0x58 and 0x60 byte structures */
    if (data.length != ChmConstants.CHM_ITSP_V1_LEN)
        throw new ChmParsingException("we only know how to deal with the 0x58 and 0x60 byte structures");
    /* unmarshal common fields */
    chmItspHeader.unmarshalCharArray(data, chmItspHeader, ChmConstants.CHM_SIGNATURE_LEN);
    // ChmCommons.unmarshalCharArray(data, chmItspHeader,
    // ChmConstants.CHM_SIGNATURE_LEN);
    chmItspHeader.setVersion(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getVersion()));
    chmItspHeader.setHeader_len(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getHeader_len()));
    chmItspHeader.setUnknown_000c(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_000c()));
    chmItspHeader.setBlock_len(chmItspHeader.unmarshalUInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getBlock_len()));
    chmItspHeader.setBlockidx_intvl(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getBlockidx_intvl()));
    chmItspHeader.setIndex_depth(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getIndex_depth()));
    chmItspHeader.setIndex_root(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getIndex_root()));
    chmItspHeader.setIndex_head(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getIndex_head()));
    chmItspHeader.setUnknown_0024(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_0024()));
    chmItspHeader.setNum_blocks(chmItspHeader.unmarshalUInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getNum_blocks()));
    chmItspHeader.setUnknown_002c((chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_002c())));
    chmItspHeader.setLang_id(chmItspHeader.unmarshalUInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getLang_id()));
    chmItspHeader.setSystem_uuid(chmItspHeader.unmarshalUuid(data, chmItspHeader.getDataRemained(), chmItspHeader.getSystem_uuid(), ChmConstants.BYTE_ARRAY_LENGHT));
    chmItspHeader.setUnknown_0044(chmItspHeader.unmarshalUuid(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_0044(), ChmConstants.BYTE_ARRAY_LENGHT));
    /* Checks validity of the itsp header */
    if (!new String(chmItspHeader.getSignature(), UTF_8).equals(ChmConstants.ITSP))
        throw new ChmParsingException("seems not valid signature");
    if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1)
        throw new ChmParsingException("!=ChmConstants.CHM_VER_1");
    if (chmItspHeader.getHeader_len() != ChmConstants.CHM_ITSP_V1_LEN)
        throw new ChmParsingException("!= ChmConstants.CHM_ITSP_V1_LEN");
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Example 9 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmLzxBlock method decompressAlignedBlock.

private void decompressAlignedBlock(int len, byte[] prevcontent) throws TikaException {
    if ((getChmSection() == null) || (getState() == null) || (getState().getMainTreeTable() == null))
        throw new ChmParsingException("chm section is null");
    short s;
    int x, i, border;
    int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
    int matchoffset = 0;
    for (i = getContentLength(); i < len; i++) {
        /* new code */
        //read huffman tree from main tree
        border = getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS);
        if (border >= getState().mainTreeTable.length)
            throw new ChmParsingException("error decompressing aligned block.");
        //break;
        /* end new code */
        s = getState().mainTreeTable[getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS)];
        if (s >= getState().getMainTreeElements()) {
            x = ChmConstants.LZX_MAINTREE_TABLEBITS;
            do {
                x++;
                s <<= 1;
                s += getChmSection().checkBit(x);
            } while ((s = getState().mainTreeTable[s]) >= getState().getMainTreeElements());
        }
        //System.out.printf("%d,", s);
        //?getChmSection().getSyncBits(getState().mainTreeTable[s]);
        getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
        if (s < ChmConstants.LZX_NUM_CHARS) {
            content[i] = (byte) s;
        } else {
            s -= ChmConstants.LZX_NUM_CHARS;
            matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
            if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
                matchfooter = getState().lengthTreeTable[getChmSection().peekBits(//.LZX_MAINTREE_TABLEBITS)];
                ChmConstants.LZX_LENGTH_TABLEBITS)];
                if (matchfooter >= ChmConstants.LZX_LENGTH_MAXSYMBOLS) /*?LZX_LENGTH_TABLEBITS*/
                {
                    x = ChmConstants.LZX_LENGTH_TABLEBITS;
                    do {
                        x++;
                        matchfooter <<= 1;
                        matchfooter += getChmSection().checkBit(x);
                    } while ((matchfooter = getState().lengthTreeTable[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
                }
                getChmSection().getSyncBits(getState().lengthTreeLengtsTable[matchfooter]);
                matchlen += matchfooter;
            }
            matchlen += ChmConstants.LZX_MIN_MATCH;
            matchoffset = s >>> 3;
            if (matchoffset > 2) {
                extra = ChmConstants.EXTRA_BITS[matchoffset];
                matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2);
                if (extra > 3) {
                    extra -= 3;
                    long verbatim_bits = getChmSection().getSyncBits(extra);
                    matchoffset += (verbatim_bits << 3);
                    //READ HUFF SYM in Aligned Tree
                    int aligned_bits = getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
                    int t = getState().getAlignedTreeTable()[aligned_bits];
                    if (t >= getState().getMainTreeElements()) {
                        //?LZX_MAINTREE_TABLEBITS; //?LZX_ALIGNED_TABLEBITS
                        x = ChmConstants.LZX_ALIGNED_TABLEBITS;
                        do {
                            x++;
                            t <<= 1;
                            t += getChmSection().checkBit(x);
                        } while ((t = getState().getAlignedTreeTable()[t]) >= getState().getMainTreeElements());
                    }
                    getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
                    matchoffset += t;
                } else if (extra == 3) {
                    int g = getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
                    int t = getState().getAlignedTreeTable()[g];
                    if (t >= getState().getMainTreeElements()) {
                        //?LZX_MAINTREE_TABLEBITS;
                        x = ChmConstants.LZX_ALIGNED_TABLEBITS;
                        do {
                            x++;
                            t <<= 1;
                            t += getChmSection().checkBit(x);
                        } while ((t = getState().getAlignedTreeTable()[t]) >= getState().getMainTreeElements());
                    }
                    getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
                    matchoffset += t;
                } else if (extra > 0) {
                    long l = getChmSection().getSyncBits(extra);
                    matchoffset += l;
                } else
                    matchoffset = 1;
                getState().setR2(getState().getR1());
                getState().setR1(getState().getR0());
                getState().setR0(matchoffset);
            } else if (matchoffset == 0) {
                matchoffset = (int) getState().getR0();
            } else if (matchoffset == 1) {
                matchoffset = (int) getState().getR1();
                getState().setR1(getState().getR0());
                getState().setR0(matchoffset);
            } else /** match_offset == 2 */
            {
                matchoffset = (int) getState().getR2();
                getState().setR2(getState().getR0());
                getState().setR0(matchoffset);
            }
            rundest = i;
            runsrc = rundest - matchoffset;
            i += (matchlen - 1);
            if (i > len)
                break;
            if (runsrc < 0) {
                if (matchlen + runsrc <= 0) {
                    runsrc = prevcontent.length + runsrc;
                    while (matchlen-- > 0) content[rundest++] = prevcontent[runsrc++];
                } else {
                    runsrc = prevcontent.length + runsrc;
                    while (runsrc < prevcontent.length) content[rundest++] = prevcontent[runsrc++];
                    matchlen = matchlen + runsrc - prevcontent.length;
                    runsrc = 0;
                    while (matchlen-- > 0) content[rundest++] = content[runsrc++];
                }
            } else {
                /* copies any wrappes around source data */
                while ((runsrc < 0) && (matchlen-- > 0)) {
                    content[rundest++] = content[(int) (runsrc + getBlockLength())];
                    runsrc++;
                }
                /* copies match data - no worries about destination wraps */
                while (matchlen-- > 0) content[rundest++] = content[runsrc++];
            }
        }
    }
    setContentLength(len);
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Example 10 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmPmgiHeader method unmarshalUInt32.

private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException {
    ChmAssert.assertByteArrayNotNull(data);
    if (4 > getDataRemained())
        throw new ChmParsingException("4 > dataLenght");
    dest = (data[this.getCurrentPlace()] & 0xff) | (data[this.getCurrentPlace() + 1] & 0xff) << 8 | (data[this.getCurrentPlace() + 2] & 0xff) << 16 | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
    setDataRemained(this.getDataRemained() - 4);
    this.setCurrentPlace(this.getCurrentPlace() + 4);
    return dest;
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Aggregations

ChmParsingException (org.apache.tika.parser.chm.exception.ChmParsingException)14 TikaException (org.apache.tika.exception.TikaException)2