Search in sources :

Example 26 with TikaException

use of org.apache.tika.exception.TikaException in project tika by apache.

the class JSONMessageBodyWriter method writeTo.

@Override
public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
    try {
        Writer writer = new OutputStreamWriter(entityStream, UTF_8);
        JsonMetadata.toJson(metadata, writer);
        writer.flush();
    } catch (TikaException e) {
        throw new IOException(e);
    }
    entityStream.flush();
}
Also used : TikaException(org.apache.tika.exception.TikaException) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) MessageBodyWriter(javax.ws.rs.ext.MessageBodyWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 27 with TikaException

use of org.apache.tika.exception.TikaException in project tika by apache.

the class XHTMLClassVisitor method parse.

public void parse(InputStream stream) throws TikaException, SAXException, IOException {
    try {
        ClassReader reader = new ClassReader(stream);
        reader.accept(this, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE);
    } catch (RuntimeException e) {
        if (e.getCause() instanceof SAXException) {
            throw (SAXException) e.getCause();
        } else {
            throw new TikaException("Failed to parse a Java class", e);
        }
    }
}
Also used : TikaException(org.apache.tika.exception.TikaException) ClassReader(org.objectweb.asm.ClassReader) SAXException(org.xml.sax.SAXException)

Example 28 with TikaException

use of org.apache.tika.exception.TikaException in project tika by apache.

the class ChmExtractor method extractChmEntry.

/**
     * Decompresses a chm entry
     * 
     * @param directoryListingEntry
     * 
     * @return decompressed data
     * @throws TikaException 
     */
public byte[] extractChmEntry(DirectoryListingEntry directoryListingEntry) throws TikaException {
    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    ChmLzxBlock lzxBlock = null;
    try {
        /* UNCOMPRESSED type is easiest one */
        if (directoryListingEntry.getEntryType() == EntryType.UNCOMPRESSED && directoryListingEntry.getLength() > 0 && !ChmCommons.hasSkip(directoryListingEntry)) {
            int dataOffset = (int) (getChmItsfHeader().getDataOffset() + directoryListingEntry.getOffset());
            // dataSegment = Arrays.copyOfRange(getData(), dataOffset,
            // dataOffset + directoryListingEntry.getLength());
            buffer.write(ChmCommons.copyOfRange(getData(), dataOffset, dataOffset + directoryListingEntry.getLength()));
        } else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED && !ChmCommons.hasSkip(directoryListingEntry)) {
            /* Gets a chm hit_cache info */
            ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance(directoryListingEntry, (int) getChmLzxcResetTable().getBlockLen(), getChmLzxcControlData());
            int i = 0, start = 0, hit_cache = 0;
            if ((getLzxBlockLength() < Integer.MAX_VALUE) && (getLzxBlockOffset() < Integer.MAX_VALUE)) {
                // TODO: Improve the caching
                // caching ... = O(n^2) - depends on startBlock and endBlock
                start = -1;
                if (!getLzxBlocksCache().isEmpty()) {
                    for (i = 0; i < getLzxBlocksCache().size(); i++) {
                        //lzxBlock = getLzxBlocksCache().get(i);
                        int bn = getLzxBlocksCache().get(i).getBlockNumber();
                        for (int j = bb.getIniBlock(); j <= bb.getStartBlock(); j++) {
                            if (bn == j) {
                                if (j > start) {
                                    start = j;
                                    hit_cache = i;
                                }
                            }
                        }
                        if (start == bb.getStartBlock())
                            break;
                    }
                }
                //                    if (i == getLzxBlocksCache().size() && i == 0) {
                if (start < 0) {
                    start = bb.getIniBlock();
                    byte[] dataSegment = ChmCommons.getChmBlockSegment(getData(), getChmLzxcResetTable(), start, (int) getLzxBlockOffset(), (int) getLzxBlockLength());
                    lzxBlock = new ChmLzxBlock(start, dataSegment, getChmLzxcResetTable().getBlockLen(), null);
                    getLzxBlocksCache().add(lzxBlock);
                } else {
                    lzxBlock = getLzxBlocksCache().get(hit_cache);
                }
                for (i = start; i <= bb.getEndBlock(); ) {
                    if (i == bb.getStartBlock() && i == bb.getEndBlock()) {
                        buffer.write(lzxBlock.getContent(bb.getStartOffset(), bb.getEndOffset()));
                        break;
                    }
                    if (i == bb.getStartBlock()) {
                        buffer.write(lzxBlock.getContent(bb.getStartOffset()));
                    }
                    if (i > bb.getStartBlock() && i < bb.getEndBlock()) {
                        buffer.write(lzxBlock.getContent());
                    }
                    if (i == bb.getEndBlock()) {
                        buffer.write(lzxBlock.getContent(0, bb.getEndOffset()));
                        break;
                    }
                    i++;
                    if (i % getChmLzxcControlData().getResetInterval() == 0) {
                        lzxBlock = new ChmLzxBlock(i, ChmCommons.getChmBlockSegment(getData(), getChmLzxcResetTable(), i, (int) getLzxBlockOffset(), (int) getLzxBlockLength()), getChmLzxcResetTable().getBlockLen(), null);
                    } else {
                        lzxBlock = new ChmLzxBlock(i, ChmCommons.getChmBlockSegment(getData(), getChmLzxcResetTable(), i, (int) getLzxBlockOffset(), (int) getLzxBlockLength()), getChmLzxcResetTable().getBlockLen(), lzxBlock);
                    }
                    getLzxBlocksCache().add(lzxBlock);
                }
                if (getLzxBlocksCache().size() > getChmLzxcResetTable().getBlockCount()) {
                    getLzxBlocksCache().clear();
                }
            }
            if (buffer.size() != directoryListingEntry.getLength()) {
                throw new TikaException("CHM file extract error: extracted Length is wrong.");
            }
        }
    //end of if compressed
    } catch (Exception e) {
        throw new TikaException(e.getMessage());
    }
    return buffer.toByteArray();
}
Also used : TikaException(org.apache.tika.exception.TikaException) ChmLzxBlock(org.apache.tika.parser.chm.lzx.ChmLzxBlock) ChmBlockInfo(org.apache.tika.parser.chm.lzx.ChmBlockInfo) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException)

Example 29 with TikaException

use of org.apache.tika.exception.TikaException in project tika by apache.

the class ChmItspHeader method unmarshalUInt32.

private long unmarshalUInt32(byte[] data, int dataLenght, long dest) throws TikaException {
    ChmAssert.assertByteArrayNotNull(data);
    if (4 > dataLenght)
        throw new TikaException("4 > dataLenght");
    dest = (data[this.getCurrentPlace()] & 0xff) | (data[this.getCurrentPlace() + 1] & 0xff) << 8 | (data[this.getCurrentPlace() + 2] & 0xff) << 16 | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
    setDataRemained(this.getDataRemained() - 4);
    this.setCurrentPlace(this.getCurrentPlace() + 4);
    return dest;
}
Also used : TikaException(org.apache.tika.exception.TikaException)

Example 30 with TikaException

use of org.apache.tika.exception.TikaException in project tika by apache.

the class ChmItsfHeader method unmarshalUint64.

/**
     * Takes 8 bytes and reverses them
     * 
     * @param data
     * @param dest
     * @return
     * @throws TikaException 
     */
private long unmarshalUint64(byte[] data, long dest) throws TikaException {
    byte[] temp = new byte[8];
    int i, j;
    if (8 > this.getDataRemained())
        throw new TikaException("8 > this.getDataRemained()");
    for (i = 8, j = 7; i > 0; i--) {
        temp[j--] = data[this.getCurrentPlace()];
        this.setCurrentPlace(this.getCurrentPlace() + 1);
    }
    dest = new BigInteger(temp).longValue();
    this.setDataRemained(this.getDataRemained() - 8);
    return dest;
}
Also used : TikaException(org.apache.tika.exception.TikaException) BigInteger(java.math.BigInteger)

Aggregations

TikaException (org.apache.tika.exception.TikaException)142 IOException (java.io.IOException)54 SAXException (org.xml.sax.SAXException)42 InputStream (java.io.InputStream)37 TikaInputStream (org.apache.tika.io.TikaInputStream)33 Metadata (org.apache.tika.metadata.Metadata)33 XHTMLContentHandler (org.apache.tika.sax.XHTMLContentHandler)29 Test (org.junit.Test)19 ParseContext (org.apache.tika.parser.ParseContext)18 ContentHandler (org.xml.sax.ContentHandler)17 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)16 CloseShieldInputStream (org.apache.commons.io.input.CloseShieldInputStream)15 TemporaryResources (org.apache.tika.io.TemporaryResources)15 MediaType (org.apache.tika.mime.MediaType)13 Parser (org.apache.tika.parser.Parser)13 ByteArrayInputStream (java.io.ByteArrayInputStream)12 ArrayList (java.util.ArrayList)11 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)11 File (java.io.File)8 EmbeddedContentHandler (org.apache.tika.sax.EmbeddedContentHandler)8