Search in sources :

Example 6 with RowBlock

use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.

the class TestStructBatchStreamReader method testExtraFieldsInReader.

/**
 * The reader has a field that is missing from the ORC file
 */
@Test
public void testExtraFieldsInReader() throws IOException {
    List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    // field_b is missing
    List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_c"));
    List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_c_value"));
    Type readerType = getType(readerFields);
    Type writerType = getType(writerFields);
    write(tempFile, writerType, writerData);
    RowBlock readBlock = read(tempFile, readerType);
    List actual = (List) readerType.getObjectValue(SESSION.getSqlFunctionProperties(), readBlock, 0);
    assertEquals(actual.size(), readerFields.size());
    assertEquals(actual.get(0), "field_a_value");
    assertNull(actual.get(1));
    assertEquals(actual.get(2), "field_c_value");
}
Also used : Type(com.facebook.presto.common.type.Type) ArrayList(java.util.ArrayList) RowBlock(com.facebook.presto.common.block.RowBlock) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test)

Example 7 with RowBlock

use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.

the class ParquetReader method readStruct.

private ColumnChunk readStruct(GroupField field) throws IOException {
    List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
    Block[] blocks = new Block[fields.size()];
    ColumnChunk columnChunk = null;
    List<Optional<Field>> parameters = field.getChildren();
    for (int i = 0; i < fields.size(); i++) {
        Optional<Field> parameter = parameters.get(i);
        if (parameter.isPresent()) {
            columnChunk = readColumnChunk(parameter.get());
            blocks[i] = columnChunk.getBlock();
        }
    }
    for (int i = 0; i < fields.size(); i++) {
        if (blocks[i] == null) {
            blocks[i] = RunLengthEncodedBlock.create(field.getType(), null, columnChunk.getBlock().getPositionCount());
        }
    }
    BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    boolean[] structIsNullVector = structIsNull.toBooleanArray();
    Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), blocks);
    return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(com.facebook.presto.parquet.GroupField) PrimitiveField(com.facebook.presto.parquet.PrimitiveField) Field(com.facebook.presto.parquet.Field) Optional(java.util.Optional) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) IntArrayBlock(com.facebook.presto.common.block.IntArrayBlock) RowBlock(com.facebook.presto.common.block.RowBlock) ArrayBlock(com.facebook.presto.common.block.ArrayBlock) LongArrayBlock(com.facebook.presto.common.block.LongArrayBlock) Block(com.facebook.presto.common.block.Block)

Example 8 with RowBlock

use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.

the class OptimizedPartitionedOutputOperator method decodeBlock.

/**
 * Flatten the block and convert the nested-typed block into ColumnarArray/Map/Row.
 * For performance considerations we decode the block only once for each block instead of for each batch.
 *
 * @return A tree structure that contains the decoded block
 */
@VisibleForTesting
static DecodedBlockNode decodeBlock(BlockFlattener flattener, Closer blockLeaseCloser, Block block) {
    BlockLease lease = flattener.flatten(block);
    blockLeaseCloser.register(lease::close);
    Block decodedBlock = lease.get();
    long estimatedSizeInBytes = decodedBlock.getLogicalSizeInBytes();
    if (decodedBlock instanceof ArrayBlock) {
        ColumnarArray columnarArray = ColumnarArray.toColumnarArray(decodedBlock);
        Block childBlock = columnarArray.getElementsBlock();
        return new DecodedBlockNode(columnarArray, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, childBlock)), columnarArray.getRetainedSizeInBytes(), estimatedSizeInBytes);
    }
    if (decodedBlock instanceof MapBlock) {
        ColumnarMap columnarMap = ColumnarMap.toColumnarMap(decodedBlock);
        Block keyBlock = columnarMap.getKeysBlock();
        Block valueBlock = columnarMap.getValuesBlock();
        return new DecodedBlockNode(columnarMap, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, keyBlock), decodeBlock(flattener, blockLeaseCloser, valueBlock)), columnarMap.getRetainedSizeInBytes(), estimatedSizeInBytes);
    }
    if (decodedBlock instanceof RowBlock) {
        ColumnarRow columnarRow = ColumnarRow.toColumnarRow(decodedBlock);
        ImmutableList.Builder<DecodedBlockNode> children = ImmutableList.builder();
        for (int i = 0; i < columnarRow.getFieldCount(); i++) {
            Block childBlock = columnarRow.getField(i);
            children.add(decodeBlock(flattener, blockLeaseCloser, childBlock));
        }
        return new DecodedBlockNode(columnarRow, children.build(), columnarRow.getRetainedSizeInBytes(), estimatedSizeInBytes);
    }
    if (decodedBlock instanceof DictionaryBlock) {
        Block dictionary = ((DictionaryBlock) decodedBlock).getDictionary();
        return new DecodedBlockNode(decodedBlock, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, dictionary)), decodedBlock.getRetainedSizeInBytes(), estimatedSizeInBytes);
    }
    if (decodedBlock instanceof RunLengthEncodedBlock) {
        Block childBlock = ((RunLengthEncodedBlock) decodedBlock).getValue();
        return new DecodedBlockNode(decodedBlock, ImmutableList.of(decodeBlock(flattener, blockLeaseCloser, childBlock)), decodedBlock.getRetainedSizeInBytes(), estimatedSizeInBytes);
    }
    return new DecodedBlockNode(decodedBlock, ImmutableList.of(), block.getRetainedSizeInBytes(), estimatedSizeInBytes);
}
Also used : ColumnarArray(com.facebook.presto.common.block.ColumnarArray) BlockLease(com.facebook.presto.common.block.BlockLease) ArrayBlock(com.facebook.presto.common.block.ArrayBlock) ImmutableList(com.google.common.collect.ImmutableList) DictionaryBlock(com.facebook.presto.common.block.DictionaryBlock) RowBlock(com.facebook.presto.common.block.RowBlock) ColumnarRow(com.facebook.presto.common.block.ColumnarRow) ColumnarMap(com.facebook.presto.common.block.ColumnarMap) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) MapBlock(com.facebook.presto.common.block.MapBlock) RowBlock(com.facebook.presto.common.block.RowBlock) ArrayBlock(com.facebook.presto.common.block.ArrayBlock) DictionaryBlock(com.facebook.presto.common.block.DictionaryBlock) Block(com.facebook.presto.common.block.Block) MapBlock(com.facebook.presto.common.block.MapBlock) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with RowBlock

use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.

the class TestStructBatchStreamReader method write.

private void write(TempFile tempFile, Type writerType, List<String> data) throws IOException {
    OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of(STRUCT_COL_NAME), ImmutableList.of(writerType), ORC, NONE, Optional.empty(), NO_ENCRYPTION, OrcWriterOptions.builder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).build()).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)).build(), ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, BOTH, new OrcWriterStats());
    // write down some data with unsorted streams
    Block[] fieldBlocks = new Block[data.size()];
    int entries = 10;
    boolean[] rowIsNull = new boolean[entries];
    Arrays.fill(rowIsNull, false);
    BlockBuilder blockBuilder = TEST_DATA_TYPE.createBlockBuilder(null, entries);
    for (int i = 0; i < data.size(); i++) {
        byte[] bytes = data.get(i).getBytes();
        for (int j = 0; j < entries; j++) {
            blockBuilder.writeBytes(Slices.wrappedBuffer(bytes), 0, bytes.length);
            blockBuilder.closeEntry();
        }
        fieldBlocks[i] = blockBuilder.build();
        blockBuilder = blockBuilder.newBlockBuilderLike(null);
    }
    Block rowBlock = RowBlock.fromFieldBlocks(rowIsNull.length, Optional.of(rowIsNull), fieldBlocks);
    writer.write(new Page(rowBlock));
    writer.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataSize(io.airlift.units.DataSize) RowBlock(com.facebook.presto.common.block.RowBlock) Block(com.facebook.presto.common.block.Block) Page(com.facebook.presto.common.Page) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink) BlockBuilder(com.facebook.presto.common.block.BlockBuilder)

Example 10 with RowBlock

use of com.facebook.presto.common.block.RowBlock in project presto by prestodb.

the class TestStructBatchStreamReader method testValuesAreReadInCorrectly.

/**
 * Reader and writer have the same fields. Checks that fields are read in correctly
 */
@Test
public void testValuesAreReadInCorrectly() throws IOException {
    List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_b_value", "field_c_value"));
    Type readerType = getType(readerFields);
    Type writerType = getType(writerFields);
    write(tempFile, writerType, writerData);
    RowBlock readBlock = read(tempFile, readerType);
    List actual = (List) readerType.getObjectValue(SESSION.getSqlFunctionProperties(), readBlock, 0);
    assertEquals(actual.size(), readerFields.size());
    assertEquals(actual.get(0), "field_a_value");
    assertEquals(actual.get(1), "field_b_value");
    assertEquals(actual.get(2), "field_c_value");
}
Also used : Type(com.facebook.presto.common.type.Type) ArrayList(java.util.ArrayList) RowBlock(com.facebook.presto.common.block.RowBlock) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test)

Aggregations

RowBlock (com.facebook.presto.common.block.RowBlock)11 Type (com.facebook.presto.common.type.Type)8 ImmutableList (com.google.common.collect.ImmutableList)6 Test (org.testng.annotations.Test)6 Block (com.facebook.presto.common.block.Block)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)3 ArrayBlock (com.facebook.presto.common.block.ArrayBlock)2 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)2 RowType (com.facebook.presto.common.type.RowType)2 DataSize (io.airlift.units.DataSize)2 Page (com.facebook.presto.common.Page)1 RuntimeStats (com.facebook.presto.common.RuntimeStats)1 BlockLease (com.facebook.presto.common.block.BlockLease)1 ColumnarArray (com.facebook.presto.common.block.ColumnarArray)1 ColumnarMap (com.facebook.presto.common.block.ColumnarMap)1 ColumnarRow (com.facebook.presto.common.block.ColumnarRow)1 DictionaryBlock (com.facebook.presto.common.block.DictionaryBlock)1 IntArrayBlock (com.facebook.presto.common.block.IntArrayBlock)1