use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmLzxcResetTable method parse.
// @Override
public void parse(byte[] data, ChmLzxcResetTable chmLzxcResetTable) throws TikaException {
setDataRemained(data.length);
if (validateParamaters(data, chmLzxcResetTable)) {
/* unmarshal fields */
chmLzxcResetTable.setVersion(unmarshalUInt32(data, chmLzxcResetTable.getVersion()));
chmLzxcResetTable.setBlockCount(unmarshalUInt32(data, chmLzxcResetTable.getBlockCount()));
chmLzxcResetTable.setUnknown(unmarshalUInt32(data, chmLzxcResetTable.getUnknown()));
chmLzxcResetTable.setTableOffset(unmarshalUInt32(data, chmLzxcResetTable.getTableOffset()));
chmLzxcResetTable.setUncompressedLen(unmarshalUint64(data, chmLzxcResetTable.getUncompressedLen()));
chmLzxcResetTable.setCompressedLen(unmarshalUint64(data, chmLzxcResetTable.getCompressedLen()));
chmLzxcResetTable.setBlockLlen(unmarshalUint64(data, chmLzxcResetTable.getBlockLen()));
chmLzxcResetTable.setBlockAddress(enumerateBlockAddresses(data));
}
/* checks chmLzxcResetTable */
if (chmLzxcResetTable.getVersion() != ChmConstants.CHM_VER_2)
throw new ChmParsingException("does not seem currect version of chmLzxcResetTable");
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmItsfHeader method parse.
// @Override
public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
if (data.length < ChmConstants.CHM_ITSF_V2_LEN || data.length > ChmConstants.CHM_ITSF_V3_LEN)
throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
chmItsfHeader.setDataRemained(data.length);
chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
throw new TikaException("seems not valid file");
if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
throw new TikaException("something wrong with header");
} else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
throw new TikaException("unknown v3 header lenght");
} else
throw new ChmParsingException("unsupported chm format");
/*
* now, if we have a V3 structure, unmarshal the rest, otherwise,
* compute it
*/
if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
if (chmItsfHeader.getDataRemained() >= 0)
chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
else
throw new TikaException("cannot set data offset, no data remained");
} else
chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmItspHeader method parse.
// @Override
public void parse(byte[] data, ChmItspHeader chmItspHeader) throws TikaException {
/* we only know how to deal with the 0x58 and 0x60 byte structures */
if (data.length != ChmConstants.CHM_ITSP_V1_LEN)
throw new ChmParsingException("we only know how to deal with the 0x58 and 0x60 byte structures");
/* unmarshal common fields */
chmItspHeader.unmarshalCharArray(data, chmItspHeader, ChmConstants.CHM_SIGNATURE_LEN);
// ChmCommons.unmarshalCharArray(data, chmItspHeader,
// ChmConstants.CHM_SIGNATURE_LEN);
chmItspHeader.setVersion(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getVersion()));
chmItspHeader.setHeader_len(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getHeader_len()));
chmItspHeader.setUnknown_000c(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_000c()));
chmItspHeader.setBlock_len(chmItspHeader.unmarshalUInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getBlock_len()));
chmItspHeader.setBlockidx_intvl(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getBlockidx_intvl()));
chmItspHeader.setIndex_depth(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getIndex_depth()));
chmItspHeader.setIndex_root(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getIndex_root()));
chmItspHeader.setIndex_head(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getIndex_head()));
chmItspHeader.setUnknown_0024(chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_0024()));
chmItspHeader.setNum_blocks(chmItspHeader.unmarshalUInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getNum_blocks()));
chmItspHeader.setUnknown_002c((chmItspHeader.unmarshalInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_002c())));
chmItspHeader.setLang_id(chmItspHeader.unmarshalUInt32(data, chmItspHeader.getDataRemained(), chmItspHeader.getLang_id()));
chmItspHeader.setSystem_uuid(chmItspHeader.unmarshalUuid(data, chmItspHeader.getDataRemained(), chmItspHeader.getSystem_uuid(), ChmConstants.BYTE_ARRAY_LENGHT));
chmItspHeader.setUnknown_0044(chmItspHeader.unmarshalUuid(data, chmItspHeader.getDataRemained(), chmItspHeader.getUnknown_0044(), ChmConstants.BYTE_ARRAY_LENGHT));
/* Checks validity of the itsp header */
if (!new String(chmItspHeader.getSignature(), UTF_8).equals(ChmConstants.ITSP))
throw new ChmParsingException("seems not valid signature");
if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1)
throw new ChmParsingException("!=ChmConstants.CHM_VER_1");
if (chmItspHeader.getHeader_len() != ChmConstants.CHM_ITSP_V1_LEN)
throw new ChmParsingException("!= ChmConstants.CHM_ITSP_V1_LEN");
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmLzxBlock method decompressAlignedBlock.
private void decompressAlignedBlock(int len, byte[] prevcontent) throws TikaException {
if ((getChmSection() == null) || (getState() == null) || (getState().getMainTreeTable() == null))
throw new ChmParsingException("chm section is null");
short s;
int x, i, border;
int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
int matchoffset = 0;
for (i = getContentLength(); i < len; i++) {
/* new code */
//read huffman tree from main tree
border = getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS);
if (border >= getState().mainTreeTable.length)
throw new ChmParsingException("error decompressing aligned block.");
//break;
/* end new code */
s = getState().mainTreeTable[getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS)];
if (s >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_MAINTREE_TABLEBITS;
do {
x++;
s <<= 1;
s += getChmSection().checkBit(x);
} while ((s = getState().mainTreeTable[s]) >= getState().getMainTreeElements());
}
//System.out.printf("%d,", s);
//?getChmSection().getSyncBits(getState().mainTreeTable[s]);
getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
if (s < ChmConstants.LZX_NUM_CHARS) {
content[i] = (byte) s;
} else {
s -= ChmConstants.LZX_NUM_CHARS;
matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
matchfooter = getState().lengthTreeTable[getChmSection().peekBits(//.LZX_MAINTREE_TABLEBITS)];
ChmConstants.LZX_LENGTH_TABLEBITS)];
if (matchfooter >= ChmConstants.LZX_LENGTH_MAXSYMBOLS) /*?LZX_LENGTH_TABLEBITS*/
{
x = ChmConstants.LZX_LENGTH_TABLEBITS;
do {
x++;
matchfooter <<= 1;
matchfooter += getChmSection().checkBit(x);
} while ((matchfooter = getState().lengthTreeTable[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
}
getChmSection().getSyncBits(getState().lengthTreeLengtsTable[matchfooter]);
matchlen += matchfooter;
}
matchlen += ChmConstants.LZX_MIN_MATCH;
matchoffset = s >>> 3;
if (matchoffset > 2) {
extra = ChmConstants.EXTRA_BITS[matchoffset];
matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2);
if (extra > 3) {
extra -= 3;
long verbatim_bits = getChmSection().getSyncBits(extra);
matchoffset += (verbatim_bits << 3);
//READ HUFF SYM in Aligned Tree
int aligned_bits = getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[aligned_bits];
if (t >= getState().getMainTreeElements()) {
//?LZX_MAINTREE_TABLEBITS; //?LZX_ALIGNED_TABLEBITS
x = ChmConstants.LZX_ALIGNED_TABLEBITS;
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >= getState().getMainTreeElements());
}
getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra == 3) {
int g = getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[g];
if (t >= getState().getMainTreeElements()) {
//?LZX_MAINTREE_TABLEBITS;
x = ChmConstants.LZX_ALIGNED_TABLEBITS;
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >= getState().getMainTreeElements());
}
getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra > 0) {
long l = getChmSection().getSyncBits(extra);
matchoffset += l;
} else
matchoffset = 1;
getState().setR2(getState().getR1());
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else if (matchoffset == 0) {
matchoffset = (int) getState().getR0();
} else if (matchoffset == 1) {
matchoffset = (int) getState().getR1();
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else /** match_offset == 2 */
{
matchoffset = (int) getState().getR2();
getState().setR2(getState().getR0());
getState().setR0(matchoffset);
}
rundest = i;
runsrc = rundest - matchoffset;
i += (matchlen - 1);
if (i > len)
break;
if (runsrc < 0) {
if (matchlen + runsrc <= 0) {
runsrc = prevcontent.length + runsrc;
while (matchlen-- > 0) content[rundest++] = prevcontent[runsrc++];
} else {
runsrc = prevcontent.length + runsrc;
while (runsrc < prevcontent.length) content[rundest++] = prevcontent[runsrc++];
matchlen = matchlen + runsrc - prevcontent.length;
runsrc = 0;
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
} else {
/* copies any wrappes around source data */
while ((runsrc < 0) && (matchlen-- > 0)) {
content[rundest++] = content[(int) (runsrc + getBlockLength())];
runsrc++;
}
/* copies match data - no worries about destination wraps */
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
}
}
setContentLength(len);
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmPmgiHeader method unmarshalUInt32.
private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException {
ChmAssert.assertByteArrayNotNull(data);
if (4 > getDataRemained())
throw new ChmParsingException("4 > dataLenght");
dest = (data[this.getCurrentPlace()] & 0xff) | (data[this.getCurrentPlace() + 1] & 0xff) << 8 | (data[this.getCurrentPlace() + 2] & 0xff) << 16 | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
setDataRemained(this.getDataRemained() - 4);
this.setCurrentPlace(this.getCurrentPlace() + 4);
return dest;
}
Aggregations