use of com.accenture.trac.common.exception.EDataCorruption in project tracdap by finos.
the class JsonDecoder method decodeChunk.
@Override
protected void decodeChunk(ByteBuf chunk) {
try {
var bytes = new byte[chunk.readableBytes()];
chunk.readBytes(bytes);
parser.feedInput(bytes, 0, bytes.length);
JsonToken token;
while ((token = parser.nextToken()) != JsonToken.NOT_AVAILABLE) parser.acceptToken(token);
} catch (JacksonException e) {
// This exception is a "well-behaved" parse failure, parse location and message should be meaningful
var errorMessage = String.format("JSON decoding failed on line %d: %s", e.getLocation().getLineNr(), e.getOriginalMessage());
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (IOException e) {
// Decoders work on a stream of buffers, "real" IO exceptions should not occur
// IO exceptions here indicate parse failures, not file/socket communication errors
// This is likely to be a more "badly-behaved" failure, or at least one that was not anticipated
var errorMessage = "JSON decoding failed, content is garbled: " + e.getMessage();
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (Throwable e) {
// Ensure unexpected errors are still reported to the Flow API
log.error("Unexpected error during decoding", e);
throw new EUnexpected(e);
} finally {
chunk.release();
}
}
use of com.accenture.trac.common.exception.EDataCorruption in project tracdap by finos.
the class ArrowFileDecoder method decodeChunk.
@Override
protected void decodeChunk(ByteBuf chunk) {
try (var stream = new ByteSeekableChannel(chunk);
var reader = new ArrowFileReader(stream, arrowAllocator);
var root = reader.getVectorSchemaRoot()) {
var schema = root.getSchema();
emitBlock(DataBlock.forSchema(schema));
var unloader = new VectorUnloader(root);
while (reader.loadNextBatch()) {
var batch = unloader.getRecordBatch();
emitBlock(DataBlock.forRecords(batch));
}
} catch (InvalidArrowFileException e) {
// A nice clean validation failure from the Arrow framework
// E.g. missing / incorrect magic number at the start (or end) of the file
var errorMessage = "Arrow file decoding failed, file is invalid: " + e.getMessage();
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (IllegalArgumentException | IndexOutOfBoundsException | IOException e) {
// These errors occur if the data stream contains bad values for vector sizes, offsets etc.
// This may be as a result of a corrupt data stream, or a maliciously crafted message
// Decoders work on a stream of buffers, "real" IO exceptions should not occur
var errorMessage = "Arrow file decoding failed, content is garbled";
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (Throwable e) {
// Ensure unexpected errors are still reported to the Flow API
log.error("Unexpected error in Arrow file decoding", e);
throw new EUnexpected(e);
} finally {
chunk.release();
}
}
use of com.accenture.trac.common.exception.EDataCorruption in project tracdap by finos.
the class ArrowStreamDecoder method decodeChunk.
@Override
protected void decodeChunk(ByteBuf chunk) {
try (var stream = new ByteSeekableChannel(chunk)) {
// Arrow does not attempt to validate the stream before reading
// This quick validation peeks at the start of the stream for a basic sanity check
// It should be enough to flag e.g. if data has been sent in a totally different format
// Make sure to do this check before setting up reader + root,
// since that will trigger reading the initial schema message
validateStartOfStream(stream);
try (var reader = new ArrowStreamReader(stream, arrowAllocator);
var root = reader.getVectorSchemaRoot()) {
var schema = root.getSchema();
emitBlock(DataBlock.forSchema(schema));
var unloader = new VectorUnloader(root);
while (reader.loadNextBatch()) {
var batch = unloader.getRecordBatch();
emitBlock(DataBlock.forRecords(batch));
// Release memory retained in VSR (batch still has a reference)
root.clear();
}
}
} catch (NotAnArrowStream e) {
// A nice clean validation exception
var errorMessage = "Arrow stream decoding failed, content does not look like an Arrow stream";
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (IllegalArgumentException | IndexOutOfBoundsException | IOException e) {
// These errors occur if the data stream contains bad values for vector sizes, offsets etc.
// This may be as a result of a corrupt data stream, or a maliciously crafted message
// Decoders work on a stream of buffers, "real" IO exceptions should not occur
var errorMessage = "Arrow stream decoding failed, content is garbled";
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (Throwable e) {
// Ensure unexpected errors are still reported to the Flow API
log.error("Unexpected error in Arrow stream decoding", e);
throw new EUnexpected(e);
} finally {
chunk.release();
}
}
use of com.accenture.trac.common.exception.EDataCorruption in project tracdap by finos.
the class CsvDecoder method decodeChunk.
@Override
protected void decodeChunk(ByteBuf chunk) {
var csvFactory = new CsvFactory().enable(CsvParser.Feature.TRIM_SPACES).enable(CsvParser.Feature.FAIL_ON_MISSING_COLUMNS);
try (var stream = new ByteBufInputStream(chunk);
var parser = (CsvParser) csvFactory.createParser((InputStream) stream)) {
var csvSchema = CsvSchemaMapping.arrowToCsv(this.arrowSchema).build();
csvSchema = DEFAULT_HEADER_FLAG ? csvSchema.withHeader() : csvSchema.withoutHeader();
parser.setSchema(csvSchema);
var row = 0;
var col = 0;
JsonToken token;
while ((token = parser.nextToken()) != null) {
switch(token) {
// For CSV files, a null field name is produced for every field
case FIELD_NAME:
continue;
case VALUE_NULL:
case VALUE_TRUE:
case VALUE_FALSE:
case VALUE_STRING:
case VALUE_NUMBER_INT:
case VALUE_NUMBER_FLOAT:
var vector = root.getVector(col);
JacksonValues.parseAndSet(vector, row, parser, token);
col++;
break;
case START_OBJECT:
if (row == 0)
for (var vector_ : root.getFieldVectors()) vector_.allocateNew();
break;
case END_OBJECT:
row++;
col = 0;
if (row == BATCH_SIZE) {
root.setRowCount(row);
dispatchBatch(root);
row = 0;
}
break;
default:
var msg = String.format("Unexpected token %s", token.name());
throw new CsvReadException(parser, msg, csvSchema);
}
}
if (row > 0 || col > 0) {
root.setRowCount(row);
dispatchBatch(root);
}
} catch (JacksonException e) {
// This exception is a "well-behaved" parse failure, parse location and message should be meaningful
var errorMessage = String.format("CSV decoding failed on line %d: %s", e.getLocation().getLineNr(), e.getOriginalMessage());
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (IOException e) {
// Decoders work on a stream of buffers, "real" IO exceptions should not occur
// IO exceptions here indicate parse failures, not file/socket communication errors
// This is likely to be a more "badly-behaved" failure, or at least one that was not anticipated
var errorMessage = "CSV decoding failed, content is garbled: " + e.getMessage();
log.error(errorMessage, e);
throw new EDataCorruption(errorMessage, e);
} catch (Throwable e) {
// Ensure unexpected errors are still reported to the Flow API
log.error("Unexpected error in CSV decoding", e);
throw new EUnexpected(e);
} finally {
chunk.release();
}
}
Aggregations