Search in sources :

Example 6 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class TestStructColumnReader method testValuesAreReadInCorrectly.

/**
 * Reader and writer have the same fields. Checks that fields are read in correctly
 */
@Test
public void testValuesAreReadInCorrectly() throws IOException {
    List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_b_value", "field_c_value"));
    Type readerType = getType(readerFields);
    Type writerType = getType(writerFields);
    write(tempFile, writerType, writerData);
    RowBlock readBlock = read(tempFile, readerType);
    List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
    assertEquals(actual.size(), readerFields.size());
    assertEquals(actual.get(0), "field_a_value");
    assertEquals(actual.get(1), "field_b_value");
    assertEquals(actual.get(2), "field_c_value");
}
Also used : Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType) ArrayList(java.util.ArrayList) RowBlock(io.trino.spi.block.RowBlock) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test)

Example 7 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class ParquetReader method readStruct.

private ColumnChunk readStruct(GroupField field) throws IOException {
    List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
    Block[] blocks = new Block[fields.size()];
    ColumnChunk columnChunk = null;
    List<Optional<Field>> parameters = field.getChildren();
    for (int i = 0; i < fields.size(); i++) {
        Optional<Field> parameter = parameters.get(i);
        if (parameter.isPresent()) {
            columnChunk = readColumnChunk(parameter.get());
            blocks[i] = columnChunk.getBlock();
        }
    }
    for (int i = 0; i < fields.size(); i++) {
        if (blocks[i] == null) {
            blocks[i] = RunLengthEncodedBlock.create(field.getType().getTypeParameters().get(i), null, columnChunk.getBlock().getPositionCount());
        }
    }
    BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    boolean[] structIsNullVector = structIsNull.toBooleanArray();
    Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), blocks);
    return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
Also used : BooleanList(it.unimi.dsi.fastutil.booleans.BooleanList) GroupField(io.trino.parquet.GroupField) PrimitiveField(io.trino.parquet.PrimitiveField) Field(io.trino.parquet.Field) Optional(java.util.Optional) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ArrayBlock(io.trino.spi.block.ArrayBlock) RowBlock(io.trino.spi.block.RowBlock)

Example 8 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class TestStructColumnReader method testExtraFieldsInWriter.

/**
 * The ORC file has a field that is missing from the reader
 */
@Test
public void testExtraFieldsInWriter() throws IOException {
    // field_b is missing
    List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_c"));
    List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_b_value", "field_c_value"));
    Type readerType = getType(readerFields);
    Type writerType = getType(writerFields);
    write(tempFile, writerType, writerData);
    RowBlock readBlock = read(tempFile, readerType);
    List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
    assertEquals(actual.size(), readerFields.size());
    assertEquals(actual.get(0), "field_a_value");
    assertEquals(actual.get(1), "field_c_value");
}
Also used : Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType) ArrayList(java.util.ArrayList) RowBlock(io.trino.spi.block.RowBlock) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test)

Example 9 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class TestStructColumnReader method testExtraFieldsInReader.

/**
 * The reader has a field that is missing from the ORC file
 */
@Test
public void testExtraFieldsInReader() throws IOException {
    List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    // field_b is missing
    List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_c"));
    List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_c_value"));
    Type readerType = getType(readerFields);
    Type writerType = getType(writerFields);
    write(tempFile, writerType, writerData);
    RowBlock readBlock = read(tempFile, readerType);
    List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
    assertEquals(actual.size(), readerFields.size());
    assertEquals(actual.get(0), "field_a_value");
    assertNull(actual.get(1));
    assertEquals(actual.get(2), "field_c_value");
}
Also used : Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType) ArrayList(java.util.ArrayList) RowBlock(io.trino.spi.block.RowBlock) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test)

Example 10 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class TestStructColumnReader method write.

private void write(TempFile tempFile, Type writerType, List<String> data) throws IOException {
    List<String> columnNames = ImmutableList.of(STRUCT_COL_NAME);
    List<Type> types = ImmutableList.of(writerType);
    OrcWriter writer = new OrcWriter(new OutputStreamOrcDataSink(new FileOutputStream(tempFile.getFile())), columnNames, types, OrcType.createRootOrcType(columnNames, types), NONE, new OrcWriterOptions().withStripeMinSize(DataSize.of(0, MEGABYTE)).withStripeMaxSize(DataSize.of(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(DataSize.of(32, MEGABYTE)), ImmutableMap.of(), true, BOTH, new OrcWriterStats());
    // write down some data with unsorted streams
    Block[] fieldBlocks = new Block[data.size()];
    int entries = 10;
    boolean[] rowIsNull = new boolean[entries];
    Arrays.fill(rowIsNull, false);
    BlockBuilder blockBuilder = TEST_DATA_TYPE.createBlockBuilder(null, entries);
    for (int i = 0; i < data.size(); i++) {
        byte[] bytes = data.get(i).getBytes(UTF_8);
        for (int j = 0; j < entries; j++) {
            blockBuilder.writeBytes(Slices.wrappedBuffer(bytes), 0, bytes.length);
            blockBuilder.closeEntry();
        }
        fieldBlocks[i] = blockBuilder.build();
        blockBuilder = blockBuilder.newBlockBuilderLike(null);
    }
    Block rowBlock = RowBlock.fromFieldBlocks(rowIsNull.length, Optional.of(rowIsNull), fieldBlocks);
    writer.write(new Page(rowBlock));
    writer.close();
}
Also used : Page(io.trino.spi.Page) Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType) FileOutputStream(java.io.FileOutputStream) Block(io.trino.spi.block.Block) RowBlock(io.trino.spi.block.RowBlock) BlockBuilder(io.trino.spi.block.BlockBuilder)

Aggregations

RowBlock (io.trino.spi.block.RowBlock)14 Block (io.trino.spi.block.Block)8 Type (io.trino.spi.type.Type)8 ImmutableList (com.google.common.collect.ImmutableList)6 OrcType (io.trino.orc.metadata.OrcType)6 ArrayList (java.util.ArrayList)6 Test (org.testng.annotations.Test)6 List (java.util.List)5 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)4 Page (io.trino.spi.Page)3 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 ColumnarRow (io.trino.spi.block.ColumnarRow)2 ColumnarRow.toColumnarRow (io.trino.spi.block.ColumnarRow.toColumnarRow)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowType (io.trino.spi.type.RowType)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 OrcCorruptionException (io.trino.orc.OrcCorruptionException)1 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1 Field (io.trino.parquet.Field)1