use of com.facebook.presto.common.type.ArrayType in project presto by prestodb.
the class TestOrcFileRewriter method testRewrite.
@Test
public void testRewrite() throws Exception {
FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
ArrayType arrayType = new ArrayType(BIGINT);
ArrayType arrayOfArrayType = new ArrayType(arrayType);
Type mapType = functionAndTypeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(createVarcharType(5).getTypeSignature()), TypeSignatureParameter.of(BOOLEAN.getTypeSignature())));
List<Long> columnIds = ImmutableList.of(3L, 7L, 9L, 10L, 11L, 12L);
DecimalType decimalType = DecimalType.createDecimalType(4, 4);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), arrayType, mapType, arrayOfArrayType, decimalType);
File file = new File(temporary, randomUUID().toString());
try (FileWriter writer = OrcTestingUtil.createFileWriter(columnIds, columnTypes, file)) {
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)), new BigDecimal("2.3")).row(777L, "sky", arrayBlockOf(BIGINT, 3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6)), new BigDecimal("2.3")).row(456L, "bye", arrayBlockOf(BIGINT, 5, 6), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7)), new BigDecimal("2.3")).row(888L, "world", arrayBlockOf(BIGINT, 7, 8), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null), new BigDecimal("2.3")).row(999L, "done", arrayBlockOf(BIGINT, 9, 10), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10)), new BigDecimal("2.3")).build();
writer.appendPages(pages);
}
try (OrcDataSource dataSource = fileOrcDataSource(file)) {
OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 5);
assertEquals(reader.getFileRowCount(), 5);
assertEquals(reader.getSplitLength(), file.length());
assertEquals(reader.nextBatch(), 5);
Block column0 = reader.readBlock(0);
assertEquals(column0.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column0.isNull(i), false);
}
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 777L);
assertEquals(BIGINT.getLong(column0, 2), 456L);
assertEquals(BIGINT.getLong(column0, 3), 888L);
assertEquals(BIGINT.getLong(column0, 4), 999L);
Block column1 = reader.readBlock(1);
assertEquals(column1.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column1.isNull(i), false);
}
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
assertEquals(createVarcharType(20).getSlice(column1, 2), utf8Slice("bye"));
assertEquals(createVarcharType(20).getSlice(column1, 3), utf8Slice("world"));
assertEquals(createVarcharType(20).getSlice(column1, 4), utf8Slice("done"));
Block column2 = reader.readBlock(2);
assertEquals(column2.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column2.isNull(i), false);
}
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 3, 4)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 2), arrayBlockOf(BIGINT, 5, 6)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 3), arrayBlockOf(BIGINT, 7, 8)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 4), arrayBlockOf(BIGINT, 9, 10)));
Block column3 = reader.readBlock(3);
assertEquals(column3.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column3.isNull(i), false);
}
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 3), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true)));
Block column4 = reader.readBlock(4);
assertEquals(column4.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column4.isNull(i), false);
}
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 3), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 4), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))));
assertEquals(reader.nextBatch(), -1);
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).put(12L, decimalType.getTypeSignature()).build()));
}
BitSet rowsToDelete = new BitSet(5);
rowsToDelete.set(1);
rowsToDelete.set(3);
rowsToDelete.set(4);
File newFile = new File(temporary, randomUUID().toString());
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(columnIds, columnTypes), path(file), path(newFile), rowsToDelete);
assertEquals(info.getRowCount(), 2);
assertBetweenInclusive(info.getUncompressedSize(), 94L, 118L * 2);
try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 2);
assertEquals(reader.getFileRowCount(), 2);
assertEquals(reader.getSplitLength(), newFile.length());
assertEquals(reader.nextBatch(), 2);
Block column0 = reader.readBlock(0);
assertEquals(column0.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column0.isNull(i), false);
}
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 456L);
Block column1 = reader.readBlock(1);
assertEquals(column1.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column1.isNull(i), false);
}
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("bye"));
Block column2 = reader.readBlock(2);
assertEquals(column2.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column2.isNull(i), false);
}
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 5, 6)));
Block column3 = reader.readBlock(3);
assertEquals(column3.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column3.isNull(i), false);
}
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
Block column4 = reader.readBlock(4);
assertEquals(column4.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column4.isNull(i), false);
}
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
assertEquals(reader.nextBatch(), -1);
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).put(12L, decimalType.getTypeSignature()).build()));
}
}
use of com.facebook.presto.common.type.ArrayType in project presto by prestodb.
the class TestWriterBlockRawSize method testArrayType.
@Test
public void testArrayType() {
Type elementType = INTEGER;
Type arrayType = new ArrayType(elementType);
ColumnWriter columnWriter = createColumnWriter(arrayType);
BlockBuilder blockBuilder = arrayType.createBlockBuilder(null, NUM_ELEMENTS * 2);
int totalChildElements = 0;
for (int i = 0; i < NUM_ELEMENTS; i++) {
blockBuilder.appendNull();
BlockBuilder elementBlockBuilder = blockBuilder.beginBlockEntry();
for (int j = 0; j < i; j++) {
elementType.writeLong(elementBlockBuilder, j);
}
blockBuilder.closeEntry();
totalChildElements += i;
}
long rawSize = columnWriter.writeBlock(blockBuilder.build());
long expectedSize = NUM_ELEMENTS + (totalChildElements * ((FixedWidthType) elementType).getFixedSize());
assertEquals(rawSize, expectedSize);
}
use of com.facebook.presto.common.type.ArrayType in project presto by prestodb.
the class TestShardWriter method testWriter.
@Test(dataProvider = "useOptimizedOrcWriter")
public void testWriter(boolean useOptimizedOrcWriter) throws Exception {
FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L);
ArrayType arrayType = new ArrayType(BIGINT);
ArrayType arrayOfArrayType = new ArrayType(arrayType);
Type mapType = functionAndTypeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(createVarcharType(10).getTypeSignature()), TypeSignatureParameter.of(BOOLEAN.getTypeSignature())));
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10), VARBINARY, DOUBLE, BOOLEAN, arrayType, mapType, arrayOfArrayType, TIMESTAMP, TIME, DATE);
File file = new File(directory, System.nanoTime() + ".orc");
byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);
long timestampValue = sqlTimestampOf(2002, 4, 6, 7, 8, 9, 0, UTC, UTC_KEY, SESSION).getMillisUtc();
long timeValue = new SqlTime(NANOSECONDS.toMillis(new DateTime(2004, 11, 29, 0, 0, 0, 0, UTC).toLocalTime().getMillisOfDay())).getMillis();
DateTime date = new DateTime(2001, 11, 22, 0, 0, 0, 0, UTC);
int dateValue = new SqlDate(Days.daysBetween(new DateTime(0, ISOChronology.getInstanceUTC()), date).getDays()).getDays();
RowPagesBuilder rowPagesBuilder = RowPagesBuilder.rowPagesBuilder(columnTypes).row(123L, "hello", wrappedBuffer(bytes1), 123.456, true, arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)), timestampValue, timeValue, dateValue).row(null, "world", null, Double.POSITIVE_INFINITY, null, arrayBlockOf(BIGINT, 3, null), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7)), timestampValue, timeValue, dateValue).row(456L, "bye \u2603", wrappedBuffer(bytes3), Double.NaN, false, arrayBlockOf(BIGINT), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT)), timestampValue, timeValue, dateValue);
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader());
FileWriter writer = createFileWriter(columnIds, columnTypes, file)) {
writer.appendPages(rowPagesBuilder.build());
}
try (OrcDataSource dataSource = fileOrcDataSource(file)) {
OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 3);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
assertEquals(reader.getFilePosition(), reader.getFilePosition());
assertEquals(reader.nextBatch(), 3);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFilePosition(), reader.getFilePosition());
Block column0 = reader.readBlock(0);
assertEquals(column0.isNull(0), false);
assertEquals(column0.isNull(1), true);
assertEquals(column0.isNull(2), false);
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 2), 456L);
Block column1 = reader.readBlock(1);
assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world"));
assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye \u2603"));
Block column2 = reader.readBlock(2);
assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1));
assertEquals(column2.isNull(1), true);
assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3));
Block column3 = reader.readBlock(3);
assertEquals(column3.isNull(0), false);
assertEquals(column3.isNull(1), false);
assertEquals(column3.isNull(2), false);
assertEquals(DOUBLE.getDouble(column3, 0), 123.456);
assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY);
assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN);
Block column4 = reader.readBlock(4);
assertEquals(column4.isNull(0), false);
assertEquals(column4.isNull(1), true);
assertEquals(column4.isNull(2), false);
assertEquals(BOOLEAN.getBoolean(column4, 0), true);
assertEquals(BOOLEAN.getBoolean(column4, 2), false);
Block column5 = reader.readBlock(5);
assertEquals(column5.getPositionCount(), 3);
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT)));
Block column6 = reader.readBlock(6);
assertEquals(column6.getPositionCount(), 3);
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
Block object = arrayType.getObject(column6, 1);
Block k2 = mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null);
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, object, k2));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false)));
Block column7 = reader.readBlock(7);
assertEquals(column7.getPositionCount(), 3);
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 1), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT))));
Block column8 = reader.readBlock(8);
assertEquals(TIMESTAMP.getLong(column8, 0), timestampValue);
assertEquals(TIMESTAMP.getLong(column8, 1), timestampValue);
assertEquals(TIMESTAMP.getLong(column8, 2), timestampValue);
Block column9 = reader.readBlock(9);
assertEquals(TIME.getLong(column9, 0), timeValue);
assertEquals(TIME.getLong(column9, 1), timeValue);
assertEquals(TIME.getLong(column9, 2), timeValue);
Block column10 = reader.readBlock(10);
assertEquals(DATE.getLong(column10, 0), dateValue);
assertEquals(DATE.getLong(column10, 1), dateValue);
assertEquals(DATE.getLong(column10, 2), dateValue);
assertEquals(reader.nextBatch(), -1);
assertEquals(reader.getReaderPosition(), 3);
assertEquals(reader.getFilePosition(), reader.getFilePosition());
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(1L, BIGINT.getTypeSignature()).put(2L, createVarcharType(10).getTypeSignature()).put(4L, VARBINARY.getTypeSignature()).put(6L, DOUBLE.getTypeSignature()).put(7L, BOOLEAN.getTypeSignature()).put(8L, arrayType.getTypeSignature()).put(9L, mapType.getTypeSignature()).put(10L, arrayOfArrayType.getTypeSignature()).put(11L, TIMESTAMP.getTypeSignature()).put(12L, TIME.getTypeSignature()).put(13L, DATE.getTypeSignature()).build()));
}
File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc");
assertFalse(crcFile.exists());
// Test unsupported types
for (Type type : ImmutableList.of(TIMESTAMP_WITH_TIME_ZONE, RowType.anonymous(ImmutableList.of(BIGINT, DOUBLE)))) {
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader())) {
createFileWriter(ImmutableList.of(1L), ImmutableList.of(type), file);
fail();
} catch (PrestoException e) {
assertTrue(e.getMessage().toLowerCase(ENGLISH).contains("type"));
}
}
}
use of com.facebook.presto.common.type.ArrayType in project presto by prestodb.
the class OrcTester method testListRoundTrip.
private void testListRoundTrip(Type type, List<?> readValues) throws Exception {
Type arrayType = arrayType(type);
// values in simple list
testRoundTripType(arrayType, readValues.stream().map(OrcTester::toHiveList).collect(toList()));
if (structuralNullTestsEnabled) {
// values and nulls in simple list
testRoundTripType(arrayType, insertNullEvery(5, readValues).stream().map(OrcTester::toHiveList).collect(toList()));
// all null values in simple list
testRoundTripType(arrayType, readValues.stream().map(value -> toHiveList(null)).collect(toList()));
}
}
use of com.facebook.presto.common.type.ArrayType in project presto by prestodb.
the class SubfieldExtractor method toRowExpression.
public RowExpression toRowExpression(Subfield subfield, Type columnType) {
List<Subfield.PathElement> path = subfield.getPath();
ImmutableList.Builder<Type> types = ImmutableList.builder();
types.add(columnType);
Type type = columnType;
for (int i = 0; i < path.size(); i++) {
if (type instanceof RowType) {
type = getFieldType((RowType) type, ((Subfield.NestedField) path.get(i)).getName());
types.add(type);
} else if (type instanceof ArrayType) {
type = ((ArrayType) type).getElementType();
types.add(type);
} else if (type instanceof MapType) {
type = ((MapType) type).getValueType();
types.add(type);
} else {
verify(false, "Unexpected type: " + type);
}
}
return toRowExpression(subfield, types.build());
}
Aggregations