use of io.trino.spi.block.RowBlock in project trino by trinodb.
the class TestStructColumnReader method testValuesAreReadInCorrectly.
/**
* Reader and writer have the same fields. Checks that fields are read in correctly
*/
@Test
public void testValuesAreReadInCorrectly() throws IOException {
List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_b_value", "field_c_value"));
Type readerType = getType(readerFields);
Type writerType = getType(writerFields);
write(tempFile, writerType, writerData);
RowBlock readBlock = read(tempFile, readerType);
List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
assertEquals(actual.size(), readerFields.size());
assertEquals(actual.get(0), "field_a_value");
assertEquals(actual.get(1), "field_b_value");
assertEquals(actual.get(2), "field_c_value");
}
use of io.trino.spi.block.RowBlock in project trino by trinodb.
the class ParquetReader method readStruct.
private ColumnChunk readStruct(GroupField field) throws IOException {
List<TypeSignatureParameter> fields = field.getType().getTypeSignature().getParameters();
Block[] blocks = new Block[fields.size()];
ColumnChunk columnChunk = null;
List<Optional<Field>> parameters = field.getChildren();
for (int i = 0; i < fields.size(); i++) {
Optional<Field> parameter = parameters.get(i);
if (parameter.isPresent()) {
columnChunk = readColumnChunk(parameter.get());
blocks[i] = columnChunk.getBlock();
}
}
for (int i = 0; i < fields.size(); i++) {
if (blocks[i] == null) {
blocks[i] = RunLengthEncodedBlock.create(field.getType().getTypeParameters().get(i), null, columnChunk.getBlock().getPositionCount());
}
}
BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
boolean[] structIsNullVector = structIsNull.toBooleanArray();
Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), blocks);
return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
use of io.trino.spi.block.RowBlock in project trino by trinodb.
the class TestStructColumnReader method testExtraFieldsInWriter.
/**
* The ORC file has a field that is missing from the reader
*/
@Test
public void testExtraFieldsInWriter() throws IOException {
// field_b is missing
List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_c"));
List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_b_value", "field_c_value"));
Type readerType = getType(readerFields);
Type writerType = getType(writerFields);
write(tempFile, writerType, writerData);
RowBlock readBlock = read(tempFile, readerType);
List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
assertEquals(actual.size(), readerFields.size());
assertEquals(actual.get(0), "field_a_value");
assertEquals(actual.get(1), "field_c_value");
}
use of io.trino.spi.block.RowBlock in project trino by trinodb.
the class TestStructColumnReader method testExtraFieldsInReader.
/**
* The reader has a field that is missing from the ORC file
*/
@Test
public void testExtraFieldsInReader() throws IOException {
List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
// field_b is missing
List<String> writerFields = new ArrayList<>(Arrays.asList("field_a", "field_c"));
List<String> writerData = new ArrayList<>(Arrays.asList("field_a_value", "field_c_value"));
Type readerType = getType(readerFields);
Type writerType = getType(writerFields);
write(tempFile, writerType, writerData);
RowBlock readBlock = read(tempFile, readerType);
List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
assertEquals(actual.size(), readerFields.size());
assertEquals(actual.get(0), "field_a_value");
assertNull(actual.get(1));
assertEquals(actual.get(2), "field_c_value");
}
use of io.trino.spi.block.RowBlock in project trino by trinodb.
the class TestStructColumnReader method write.
private void write(TempFile tempFile, Type writerType, List<String> data) throws IOException {
List<String> columnNames = ImmutableList.of(STRUCT_COL_NAME);
List<Type> types = ImmutableList.of(writerType);
OrcWriter writer = new OrcWriter(new OutputStreamOrcDataSink(new FileOutputStream(tempFile.getFile())), columnNames, types, OrcType.createRootOrcType(columnNames, types), NONE, new OrcWriterOptions().withStripeMinSize(DataSize.of(0, MEGABYTE)).withStripeMaxSize(DataSize.of(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(DataSize.of(32, MEGABYTE)), ImmutableMap.of(), true, BOTH, new OrcWriterStats());
// write down some data with unsorted streams
Block[] fieldBlocks = new Block[data.size()];
int entries = 10;
boolean[] rowIsNull = new boolean[entries];
Arrays.fill(rowIsNull, false);
BlockBuilder blockBuilder = TEST_DATA_TYPE.createBlockBuilder(null, entries);
for (int i = 0; i < data.size(); i++) {
byte[] bytes = data.get(i).getBytes(UTF_8);
for (int j = 0; j < entries; j++) {
blockBuilder.writeBytes(Slices.wrappedBuffer(bytes), 0, bytes.length);
blockBuilder.closeEntry();
}
fieldBlocks[i] = blockBuilder.build();
blockBuilder = blockBuilder.newBlockBuilderLike(null);
}
Block rowBlock = RowBlock.fromFieldBlocks(rowIsNull.length, Optional.of(rowIsNull), fieldBlocks);
writer.write(new Page(rowBlock));
writer.close();
}
Aggregations