use of com.accenture.trac.common.util.ByteSeekableChannel in project tracdap by finos.
the class ArrowFileDecoder method decodeChunk.
@Override
protected void decodeChunk(ByteBuf chunk) {
try (var stream = new ByteSeekableChannel(chunk);
var reader = new ArrowFileReader(stream, arrowAllocator);
var root = reader.getVectorSchemaRoot()) {
var schema = root.getSchema();
emitBlock(DataBlock.forSchema(schema));
var unloader = new VectorUnloader(root);
while (reader.loadNextBatch()) {
var batch = unloader.getRecordBatch();
emitBlock(DataBlock.forRecords(batch));
}
} catch (InvalidArrowFileException e) {
// A nice clean validation failure from the Arrow framework
// E.g. missing / incorrect magic number at the start (or end) of the file
var errorMessage = "Arrow file decoding failed, file is invalid: " + e.getMessage();
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (IllegalArgumentException | IndexOutOfBoundsException | IOException e) {
// These errors occur if the data stream contains bad values for vector sizes, offsets etc.
// This may be as a result of a corrupt data stream, or a maliciously crafted message
// Decoders work on a stream of buffers, "real" IO exceptions should not occur
var errorMessage = "Arrow file decoding failed, content is garbled";
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (Throwable e) {
// Ensure unexpected errors are still reported to the Flow API
log.error("Unexpected error in Arrow file decoding", e);
throw new EUnexpected(e);
} finally {
chunk.release();
}
}
use of com.accenture.trac.common.util.ByteSeekableChannel in project tracdap by finos.
the class ArrowStreamDecoder method decodeChunk.
@Override
protected void decodeChunk(ByteBuf chunk) {
try (var stream = new ByteSeekableChannel(chunk)) {
// Arrow does not attempt to validate the stream before reading
// This quick validation peeks at the start of the stream for a basic sanity check
// It should be enough to flag e.g. if data has been sent in a totally different format
// Make sure to do this check before setting up reader + root,
// since that will trigger reading the initial schema message
validateStartOfStream(stream);
try (var reader = new ArrowStreamReader(stream, arrowAllocator);
var root = reader.getVectorSchemaRoot()) {
var schema = root.getSchema();
emitBlock(DataBlock.forSchema(schema));
var unloader = new VectorUnloader(root);
while (reader.loadNextBatch()) {
var batch = unloader.getRecordBatch();
emitBlock(DataBlock.forRecords(batch));
// Release memory retained in VSR (batch still has a reference)
root.clear();
}
}
} catch (NotAnArrowStream e) {
// A nice clean validation exception
var errorMessage = "Arrow stream decoding failed, content does not look like an Arrow stream";
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (IllegalArgumentException | IndexOutOfBoundsException | IOException e) {
// These errors occur if the data stream contains bad values for vector sizes, offsets etc.
// This may be as a result of a corrupt data stream, or a maliciously crafted message
// Decoders work on a stream of buffers, "real" IO exceptions should not occur
var errorMessage = "Arrow stream decoding failed, content is garbled";
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (Throwable e) {
// Ensure unexpected errors are still reported to the Flow API
log.error("Unexpected error in Arrow stream decoding", e);
throw new EUnexpected(e);
} finally {
chunk.release();
}
}
Aggregations