use of org.apache.tika.exception.TikaException in project tika by apache.
the class ChmItspHeader method unmarshalUInt32.
private long unmarshalUInt32(byte[] data, int dataLenght, long dest) throws TikaException {
ChmAssert.assertByteArrayNotNull(data);
if (4 > dataLenght)
throw new TikaException("4 > dataLenght");
dest = (data[this.getCurrentPlace()] & 0xff) | (data[this.getCurrentPlace() + 1] & 0xff) << 8 | (data[this.getCurrentPlace() + 2] & 0xff) << 16 | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
setDataRemained(this.getDataRemained() - 4);
this.setCurrentPlace(this.getCurrentPlace() + 4);
return dest;
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class ChmItsfHeader method unmarshalUint64.
/**
* Takes 8 bytes and reverses them
*
* @param data
* @param dest
* @return
* @throws TikaException
*/
private long unmarshalUint64(byte[] data, long dest) throws TikaException {
byte[] temp = new byte[8];
int i, j;
if (8 > this.getDataRemained())
throw new TikaException("8 > this.getDataRemained()");
for (i = 8, j = 7; i > 0; i--) {
temp[j--] = data[this.getCurrentPlace()];
this.setCurrentPlace(this.getCurrentPlace() + 1);
}
dest = new BigInteger(temp).longValue();
this.setDataRemained(this.getDataRemained() - 8);
return dest;
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class ChmItsfHeader method parse.
// @Override
public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
if (data.length < ChmConstants.CHM_ITSF_V2_LEN || data.length > ChmConstants.CHM_ITSF_V3_LEN)
throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
chmItsfHeader.setDataRemained(data.length);
chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
throw new TikaException("seems not valid file");
if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
throw new TikaException("something wrong with header");
} else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
throw new TikaException("unknown v3 header lenght");
} else
throw new ChmParsingException("unsupported chm format");
/*
* now, if we have a V3 structure, unmarshal the rest, otherwise,
* compute it
*/
if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
if (chmItsfHeader.getDataRemained() >= 0)
chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
else
throw new TikaException("cannot set data offset, no data remained");
} else
chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + chmItsfHeader.getDirLen());
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class JsonMetadataTest method testDeserializationException.
@Test
public void testDeserializationException() {
//malformed json; 500,000 should be in quotes
String json = "{\"k1\":[\"v1\",\"v2\"],\"k3\":\"v3\",\"k4\":500,000}";
boolean ex = false;
try {
Metadata deserialized = JsonMetadata.fromJson(new StringReader(json));
} catch (TikaException e) {
ex = true;
}
assertTrue(ex);
}
use of org.apache.tika.exception.TikaException in project tika by apache.
the class ExtractReader method loadExtract.
public List<Metadata> loadExtract(Path extractFile) throws ExtractReaderException {
List<Metadata> metadataList = null;
if (extractFile == null || !Files.isRegularFile(extractFile)) {
throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
}
FileSuffixes fileSuffixes = parseSuffixes(extractFile.getFileName().toString());
if (fileSuffixes.txtOrJson == null) {
throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
}
if (!Files.isRegularFile(extractFile)) {
throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
}
long length = -1L;
try {
length = Files.size(extractFile);
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
}
if (length == 0L) {
throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
}
if (minExtractLength > IGNORE_LENGTH && length < minExtractLength) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
}
if (maxExtractLength > IGNORE_LENGTH && length > maxExtractLength) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
}
Reader reader = null;
InputStream is = null;
try {
is = Files.newInputStream(extractFile);
if (fileSuffixes.compression != null) {
if (fileSuffixes.compression.equals("bz2")) {
is = new BZip2CompressorInputStream(is);
} else if (fileSuffixes.compression.equals("gz") || fileSuffixes.compression.equals("gzip")) {
is = new GzipCompressorInputStream(is);
} else if (fileSuffixes.compression.equals("zip")) {
is = new ZCompressorInputStream(is);
} else {
LOG.warn("Can't yet process compression of type: {}", fileSuffixes.compression);
return metadataList;
}
}
reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
}
try {
if (fileSuffixes.txtOrJson.equals("json")) {
metadataList = JsonMetadataList.fromJson(reader);
if (alterMetadataList.equals(ALTER_METADATA_LIST.FIRST_ONLY) && metadataList.size() > 1) {
while (metadataList.size() > 1) {
metadataList.remove(metadataList.size() - 1);
}
} else if (alterMetadataList.equals(ALTER_METADATA_LIST.AS_IS.CONCATENATE_CONTENT_INTO_FIRST) && metadataList.size() > 1) {
StringBuilder sb = new StringBuilder();
Metadata containerMetadata = metadataList.get(0);
for (int i = 0; i < metadataList.size(); i++) {
Metadata m = metadataList.get(i);
String c = m.get(RecursiveParserWrapper.TIKA_CONTENT);
if (c != null) {
sb.append(c);
sb.append(" ");
}
}
containerMetadata.set(RecursiveParserWrapper.TIKA_CONTENT, sb.toString());
while (metadataList.size() > 1) {
metadataList.remove(metadataList.size() - 1);
}
}
} else {
metadataList = generateListFromTextFile(reader, fileSuffixes);
}
} catch (IOException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
} catch (TikaException e) {
throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(is);
}
return metadataList;
}
Aggregations