use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.
the class TestStructBatchStreamReader method testExtraFieldsInReader.
/**
* The reader has a field that is missing from the ORC file
*/
@Test
public void testExtraFieldsInReader() throws IOException {
List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
// field_b is missing
List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_c"));
List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_c_value"));
Type readerType = getType(readerFields);
Type writerType = getType(writerFields);
write(tempFile, writerType, writerData);
RowBlock readBlock = read(tempFile, readerType);
List actual = (List) readerType.getObjectValue(SESSION.getSqlFunctionProperties(), readBlock, 0);
assertEquals(actual.size(), readerFields.size());
assertEquals(actual.get(0), "field_a_value");
assertNull(actual.get(1));
assertEquals(actual.get(2), "field_c_value");
}
use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.
the class ParquetReader method readStruct.
private ColumnChunk readStruct(GroupField field) throws IOException {
List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
Block[] blocks = new Block[fields.size()];
ColumnChunk columnChunk = null;
List<Optional<Field>> parameters = field.getChildren();
for (int i = 0; i < fields.size(); i++) {
Optional<Field> parameter = parameters.get(i);
if (parameter.isPresent()) {
columnChunk = readColumnChunk(parameter.get());
blocks[i] = columnChunk.getBlock();
}
}
for (int i = 0; i < fields.size(); i++) {
if (blocks[i] == null) {
blocks[i] = RunLengthEncodedBlock.create(field.getType(), null, columnChunk.getBlock().getPositionCount());
}
}
BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
boolean[] structIsNullVector = structIsNull.toBooleanArray();
Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), blocks);
return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.
the class OptimizedPartitionedOutputOperator method decodeBlock.
/**
* Flatten the block and convert the nested-typed block into ColumnarArray/Map/Row.
* For performance considerations we decode the block only once for each block instead of for each batch.
*
* @return A tree structure that contains the decoded block
*/
@VisibleForTesting
static DecodedBlockNode decodeBlock(BlockFlattener flattener, Closer blockLeaseCloser, Block block) {
BlockLease lease = flattener.flatten(block);
blockLeaseCloser.register(lease::close);
Block decodedBlock = lease.get();
long estimatedSizeInBytes = decodedBlock.getLogicalSizeInBytes();
if (decodedBlock instanceof ArrayBlock) {
ColumnarArray columnarArray = ColumnarArray.toColumnarArray(decodedBlock);
Block childBlock = columnarArray.getElementsBlock();
return new DecodedBlockNode(columnarArray, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, childBlock)), columnarArray.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
if (decodedBlock instanceof MapBlock) {
ColumnarMap columnarMap = ColumnarMap.toColumnarMap(decodedBlock);
Block keyBlock = columnarMap.getKeysBlock();
Block valueBlock = columnarMap.getValuesBlock();
return new DecodedBlockNode(columnarMap, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, keyBlock), decodeBlock(flattener, blockLeaseCloser, valueBlock)), columnarMap.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
if (decodedBlock instanceof RowBlock) {
ColumnarRow columnarRow = ColumnarRow.toColumnarRow(decodedBlock);
ImmutableList.Builder<DecodedBlockNode> children = ImmutableList.builder();
for (int i = 0; i < columnarRow.getFieldCount(); i++) {
Block childBlock = columnarRow.getField(i);
children.add(decodeBlock(flattener, blockLeaseCloser, childBlock));
}
return new DecodedBlockNode(columnarRow, children.build(), columnarRow.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
if (decodedBlock instanceof DictionaryBlock) {
Block dictionary = ((DictionaryBlock) decodedBlock).getDictionary();
return new DecodedBlockNode(decodedBlock, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, dictionary)), decodedBlock.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
if (decodedBlock instanceof RunLengthEncodedBlock) {
Block childBlock = ((RunLengthEncodedBlock) decodedBlock).getValue();
return new DecodedBlockNode(decodedBlock, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, childBlock)), decodedBlock.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
return new DecodedBlockNode(decodedBlock, ImmutableList.of(), block.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.
the class TestStructBatchStreamReader method write.
private void write(TempFile tempFile, Type writerType, List<String> data) throws IOException {
OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of(STRUCT_COL_NAME), ImmutableList.of(writerType), ORC, NONE, Optional.empty(), NO_ENCRYPTION, OrcWriterOptions.builder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).build()).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)).build(), ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, BOTH, new OrcWriterStats());
// write down some data with unsorted streams
Block[] fieldBlocks = new Block[data.size()];
int entries = 10;
boolean[] rowIsNull = new boolean[entries];
Arrays.fill(rowIsNull, false);
BlockBuilder blockBuilder = TEST_DATA_TYPE.createBlockBuilder(null, entries);
for (int i = 0; i < data.size(); i++) {
byte[] bytes = data.get(i).getBytes();
for (int j = 0; j < entries; j++) {
blockBuilder.writeBytes(Slices.wrappedBuffer(bytes), 0, bytes.length);
blockBuilder.closeEntry();
}
fieldBlocks[i] = blockBuilder.build();
blockBuilder = blockBuilder.newBlockBuilderLike(null);
}
Block rowBlock = RowBlock.fromFieldBlocks(rowIsNull.length, Optional.of(rowIsNull), fieldBlocks);
writer.write(new Page(rowBlock));
writer.close();
}
use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.
the class TestStructBatchStreamReader method testValuesAreReadInCorrectly.
/**
* Reader and writer have the same fields. Checks that fields are read in correctly
*/
@Test
public void testValuesAreReadInCorrectly() throws IOException {
List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_b_value", "field_c_value"));
Type readerType = getType(readerFields);
Type writerType = getType(writerFields);
write(tempFile, writerType, writerData);
RowBlock readBlock = read(tempFile, readerType);
List actual = (List) readerType.getObjectValue(SESSION.getSqlFunctionProperties(), readBlock, 0);
assertEquals(actual.size(), readerFields.size());
assertEquals(actual.get(0), "field_a_value");
assertEquals(actual.get(1), "field_b_value");
assertEquals(actual.get(2), "field_c_value");
}
Aggregations