use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmPmglHeader method parse.
// @Override
public void parse(byte[] data, ChmPmglHeader chmPmglHeader) throws TikaException {
if (data.length < ChmConstants.CHM_PMGL_LEN)
throw new TikaException(ChmPmglHeader.class.getName() + " we only know how to deal with a 0x14 byte structures");
/* unmarshal fields */
chmPmglHeader.unmarshalCharArray(data, chmPmglHeader, ChmConstants.CHM_SIGNATURE_LEN);
chmPmglHeader.setFreeSpace(chmPmglHeader.unmarshalUInt32(data));
chmPmglHeader.setUnknown0008(chmPmglHeader.unmarshalUInt32(data));
chmPmglHeader.setBlockPrev(chmPmglHeader.unmarshalInt32(data));
chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data));
/* check structure */
if (!new String(chmPmglHeader.getSignature(), UTF_8).equals(ChmConstants.PMGL))
throw new ChmParsingException(ChmPmglHeader.class.getName() + " pmgl != pmgl.signature");
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmPmglHeader method unmarshalUInt32.
private long unmarshalUInt32(byte[] data) throws ChmParsingException {
ChmAssert.assertByteArrayNotNull(data);
long dest;
if (4 > getDataRemained())
throw new ChmParsingException("4 > dataLenght");
dest = (data[this.getCurrentPlace()] & 0xff) | (data[this.getCurrentPlace() + 1] & 0xff) << 8 | (data[this.getCurrentPlace() + 2] & 0xff) << 16 | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
setDataRemained(this.getDataRemained() - 4);
this.setCurrentPlace(this.getCurrentPlace() + 4);
return dest;
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmCommons method indexOf.
/**
* Searches some pattern in byte[]
*
* @param text
* byte[]
* @param pattern
* byte[]
* @return an index, if nothing found returns -1
* @throws ChmParsingException
*/
public static int indexOf(byte[] text, byte[] pattern) throws ChmParsingException {
int[] next = null;
int i = 0, j = -1;
/* Preprocessing */
if (pattern != null && text != null) {
next = new int[pattern.length];
next[0] = -1;
} else
throw new ChmParsingException("pattern and/or text should not be null");
/* Computes a failure function */
while (i < pattern.length - 1) {
if (j == -1 || pattern[i] == pattern[j]) {
i++;
j++;
if (pattern[i] != pattern[j])
next[i] = j;
else
next[i] = next[j];
} else
j = next[j];
}
/* Reinitializes local variables */
i = j = 0;
/* Matching */
while (i < text.length && j < pattern.length) {
if (j == -1 || pattern[j] == text[i]) {
i++;
j++;
} else
j = next[j];
}
if (j == pattern.length)
// match found at offset i - M
return (i - j);
else
// not found
return -1;
}
use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.
the class ChmDirectoryListingSet method enumerateOneSegment.
/**
* Enumerates chm directory listing entries in single chm segment
*
* @param dir_chunk
*/
private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException, TikaException {
// try {
if (dir_chunk != null) {
int header_len;
if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
header_len = ChmConstants.CHM_PMGI_LEN;
//skip PMGI
return;
} else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
header_len = ChmConstants.CHM_PMGL_LEN;
} else {
throw new ChmParsingException("Bad dir entry block.");
}
placeHolder = header_len;
//setPlaceHolder(header_len);
while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()) /*&& dir_chunk[placeHolder - 1] != 115*/
{
//get entry name length
// = getEncint(data);
int strlen = 0;
byte temp;
while ((temp = dir_chunk[placeHolder++]) >= 0x80) {
strlen <<= 7;
strlen += temp & 0x7f;
}
strlen = (strlen << 7) + temp & 0x7f;
if (strlen > dir_chunk.length) {
throw new ChmParsingException("Bad data of a string length.");
}
DirectoryListingEntry dle = new DirectoryListingEntry();
dle.setNameLength(strlen);
dle.setName(new String(ChmCommons.copyOfRange(dir_chunk, placeHolder, (placeHolder + dle.getNameLength())), UTF_8));
checkControlData(dle);
checkResetTable(dle);
setPlaceHolder(placeHolder + dle.getNameLength());
/* Sets entry type */
if (placeHolder < dir_chunk.length && dir_chunk[placeHolder] == 0)
dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
else
dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
setPlaceHolder(placeHolder + 1);
dle.setOffset(getEncint(dir_chunk));
dle.setLength(getEncint(dir_chunk));
getDirectoryListingEntryList().add(dle);
}
// int indexWorkData = ChmCommons.indexOf(dir_chunk,
// "::".getBytes(UTF_8));
// int indexUserData = ChmCommons.indexOf(dir_chunk,
// "/".getBytes(UTF_8));
//
// if (indexUserData>=0 && indexUserData < indexWorkData)
// setPlaceHolder(indexUserData);
// else if (indexWorkData>=0) {
// setPlaceHolder(indexWorkData);
// }
// else {
// setPlaceHolder(indexUserData);
// }
//
// if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
// && dir_chunk[placeHolder - 1] != 115) {// #{
// do {
// if (dir_chunk[placeHolder - 1] > 0) {
// DirectoryListingEntry dle = new DirectoryListingEntry();
//
// // two cases: 1. when dir_chunk[placeHolder -
// // 1] == 0x73
// // 2. when dir_chunk[placeHolder + 1] == 0x2f
// doNameCheck(dir_chunk, dle);
//
// // dle.setName(new
// // String(Arrays.copyOfRange(dir_chunk,
// // placeHolder, (placeHolder +
// // dle.getNameLength()))));
// dle.setName(new String(ChmCommons.copyOfRange(
// dir_chunk, placeHolder,
// (placeHolder + dle.getNameLength())), UTF_8));
// checkControlData(dle);
// checkResetTable(dle);
// setPlaceHolder(placeHolder
// + dle.getNameLength());
//
// /* Sets entry type */
// if (placeHolder < dir_chunk.length
// && dir_chunk[placeHolder] == 0)
// dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
// else
// dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
//
// setPlaceHolder(placeHolder + 1);
// dle.setOffset(getEncint(dir_chunk));
// dle.setLength(getEncint(dir_chunk));
// getDirectoryListingEntryList().add(dle);
// } else
// setPlaceHolder(placeHolder + 1);
//
// } while (nextEntry(dir_chunk));
// }
}
// } catch (Exception e) {
// LOG.warn("problem parsing", e);
// }
}
Aggregations