Search in sources :

Example 11 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmPmglHeader method parse.

// @Override
public void parse(byte[] data, ChmPmglHeader chmPmglHeader) throws TikaException {
    if (data.length < ChmConstants.CHM_PMGL_LEN)
        throw new TikaException(ChmPmglHeader.class.getName() + " we only know how to deal with a 0x14 byte structures");
    /* unmarshal fields */
    chmPmglHeader.unmarshalCharArray(data, chmPmglHeader, ChmConstants.CHM_SIGNATURE_LEN);
    chmPmglHeader.setFreeSpace(chmPmglHeader.unmarshalUInt32(data));
    chmPmglHeader.setUnknown0008(chmPmglHeader.unmarshalUInt32(data));
    chmPmglHeader.setBlockPrev(chmPmglHeader.unmarshalInt32(data));
    chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data));
    /* check structure */
    if (!new String(chmPmglHeader.getSignature(), UTF_8).equals(ChmConstants.PMGL))
        throw new ChmParsingException(ChmPmglHeader.class.getName() + " pmgl != pmgl.signature");
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException) TikaException(org.apache.tika.exception.TikaException)

Example 12 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmPmglHeader method unmarshalUInt32.

private long unmarshalUInt32(byte[] data) throws ChmParsingException {
    ChmAssert.assertByteArrayNotNull(data);
    long dest;
    if (4 > getDataRemained())
        throw new ChmParsingException("4 > dataLenght");
    dest = (data[this.getCurrentPlace()] & 0xff) | (data[this.getCurrentPlace() + 1] & 0xff) << 8 | (data[this.getCurrentPlace() + 2] & 0xff) << 16 | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
    setDataRemained(this.getDataRemained() - 4);
    this.setCurrentPlace(this.getCurrentPlace() + 4);
    return dest;
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Example 13 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmCommons method indexOf.

/**
     * Searches some pattern in byte[]
     * 
     * @param text
     *            byte[]
     * @param pattern
     *            byte[]
     * @return an index, if nothing found returns -1
     * @throws ChmParsingException 
     */
public static int indexOf(byte[] text, byte[] pattern) throws ChmParsingException {
    int[] next = null;
    int i = 0, j = -1;
    /* Preprocessing */
    if (pattern != null && text != null) {
        next = new int[pattern.length];
        next[0] = -1;
    } else
        throw new ChmParsingException("pattern and/or text should not be null");
    /* Computes a failure function */
    while (i < pattern.length - 1) {
        if (j == -1 || pattern[i] == pattern[j]) {
            i++;
            j++;
            if (pattern[i] != pattern[j])
                next[i] = j;
            else
                next[i] = next[j];
        } else
            j = next[j];
    }
    /* Reinitializes local variables */
    i = j = 0;
    /* Matching */
    while (i < text.length && j < pattern.length) {
        if (j == -1 || pattern[j] == text[i]) {
            i++;
            j++;
        } else
            j = next[j];
    }
    if (j == pattern.length)
        // match found at offset i - M
        return (i - j);
    else
        // not found
        return -1;
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Example 14 with ChmParsingException

use of org.apache.tika.parser.chm.exception.ChmParsingException in project tika by apache.

the class ChmDirectoryListingSet method enumerateOneSegment.

/**
     * Enumerates chm directory listing entries in single chm segment
     * 
     * @param dir_chunk
     */
private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException, TikaException {
    //        try {
    if (dir_chunk != null) {
        int header_len;
        if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
            header_len = ChmConstants.CHM_PMGI_LEN;
            //skip PMGI
            return;
        } else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
            header_len = ChmConstants.CHM_PMGL_LEN;
        } else {
            throw new ChmParsingException("Bad dir entry block.");
        }
        placeHolder = header_len;
        //setPlaceHolder(header_len);
        while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()) /*&& dir_chunk[placeHolder - 1] != 115*/
        {
            //get entry name length
            // = getEncint(data);
            int strlen = 0;
            byte temp;
            while ((temp = dir_chunk[placeHolder++]) >= 0x80) {
                strlen <<= 7;
                strlen += temp & 0x7f;
            }
            strlen = (strlen << 7) + temp & 0x7f;
            if (strlen > dir_chunk.length) {
                throw new ChmParsingException("Bad data of a string length.");
            }
            DirectoryListingEntry dle = new DirectoryListingEntry();
            dle.setNameLength(strlen);
            dle.setName(new String(ChmCommons.copyOfRange(dir_chunk, placeHolder, (placeHolder + dle.getNameLength())), UTF_8));
            checkControlData(dle);
            checkResetTable(dle);
            setPlaceHolder(placeHolder + dle.getNameLength());
            /* Sets entry type */
            if (placeHolder < dir_chunk.length && dir_chunk[placeHolder] == 0)
                dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
            else
                dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
            setPlaceHolder(placeHolder + 1);
            dle.setOffset(getEncint(dir_chunk));
            dle.setLength(getEncint(dir_chunk));
            getDirectoryListingEntryList().add(dle);
        }
    //                int indexWorkData = ChmCommons.indexOf(dir_chunk,
    //                        "::".getBytes(UTF_8));
    //                int indexUserData = ChmCommons.indexOf(dir_chunk,
    //                        "/".getBytes(UTF_8));
    //
    //                if (indexUserData>=0 && indexUserData < indexWorkData)
    //                    setPlaceHolder(indexUserData);
    //                else if (indexWorkData>=0) {
    //                    setPlaceHolder(indexWorkData);
    //                }
    //                else {
    //                    setPlaceHolder(indexUserData);
    //                }
    //
    //                if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
    //                        && dir_chunk[placeHolder - 1] != 115) {// #{
    //                    do {
    //                        if (dir_chunk[placeHolder - 1] > 0) {
    //                            DirectoryListingEntry dle = new DirectoryListingEntry();
    //
    //                            // two cases: 1. when dir_chunk[placeHolder -
    //                            // 1] == 0x73
    //                            // 2. when dir_chunk[placeHolder + 1] == 0x2f
    //                            doNameCheck(dir_chunk, dle);
    //
    //                            // dle.setName(new
    //                            // String(Arrays.copyOfRange(dir_chunk,
    //                            // placeHolder, (placeHolder +
    //                            // dle.getNameLength()))));
    //                            dle.setName(new String(ChmCommons.copyOfRange(
    //                                    dir_chunk, placeHolder,
    //                                    (placeHolder + dle.getNameLength())), UTF_8));
    //                            checkControlData(dle);
    //                            checkResetTable(dle);
    //                            setPlaceHolder(placeHolder
    //                                    + dle.getNameLength());
    //
    //                            /* Sets entry type */
    //                            if (placeHolder < dir_chunk.length
    //                                    && dir_chunk[placeHolder] == 0)
    //                                dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
    //                            else
    //                                dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
    //
    //                            setPlaceHolder(placeHolder + 1);
    //                            dle.setOffset(getEncint(dir_chunk));
    //                            dle.setLength(getEncint(dir_chunk));
    //                            getDirectoryListingEntryList().add(dle);
    //                        } else
    //                            setPlaceHolder(placeHolder + 1);
    //
    //                    } while (nextEntry(dir_chunk));
    //                }
    }
//        } catch (Exception e) {
//                LOG.warn("problem parsing", e);
//        }
}
Also used : ChmParsingException(org.apache.tika.parser.chm.exception.ChmParsingException)

Aggregations

ChmParsingException (org.apache.tika.parser.chm.exception.ChmParsingException)14 TikaException (org.apache.tika.exception.TikaException)2