use of com.facebook.presto.raptor.storage.OrcFileRewriter.OrcFileInfo in project presto by prestodb.
the class TestOrcFileRewriter method testRewrite.
@Test
public void testRewrite() throws Exception {
ArrayType arrayType = new ArrayType(BIGINT);
ArrayType arrayOfArrayType = new ArrayType(arrayType);
MapType mapType = new MapType(createVarcharType(5), BOOLEAN);
List<Long> columnIds = ImmutableList.of(3L, 7L, 9L, 10L, 11L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), arrayType, mapType, arrayOfArrayType);
File file = new File(temporary, randomUUID().toString());
try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))).row(777L, "sky", arrayBlockOf(BIGINT, 3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))).row(456L, "bye", arrayBlockOf(BIGINT, 5, 6), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))).row(888L, "world", arrayBlockOf(BIGINT, 7, 8), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)).row(999L, "done", arrayBlockOf(BIGINT, 9, 10), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))).build();
writer.appendPages(pages);
}
try (OrcDataSource dataSource = fileOrcDataSource(file)) {
OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 5);
assertEquals(reader.getFileRowCount(), 5);
assertEquals(reader.getSplitLength(), file.length());
assertEquals(reader.nextBatch(), 5);
Block column0 = reader.readBlock(BIGINT, 0);
assertEquals(column0.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column0.isNull(i), false);
}
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 777L);
assertEquals(BIGINT.getLong(column0, 2), 456L);
assertEquals(BIGINT.getLong(column0, 3), 888L);
assertEquals(BIGINT.getLong(column0, 4), 999L);
Block column1 = reader.readBlock(createVarcharType(20), 1);
assertEquals(column1.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column1.isNull(i), false);
}
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
assertEquals(createVarcharType(20).getSlice(column1, 2), utf8Slice("bye"));
assertEquals(createVarcharType(20).getSlice(column1, 3), utf8Slice("world"));
assertEquals(createVarcharType(20).getSlice(column1, 4), utf8Slice("done"));
Block column2 = reader.readBlock(arrayType, 2);
assertEquals(column2.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column2.isNull(i), false);
}
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 3, 4)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 2), arrayBlockOf(BIGINT, 5, 6)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 3), arrayBlockOf(BIGINT, 7, 8)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 4), arrayBlockOf(BIGINT, 9, 10)));
Block column3 = reader.readBlock(mapType, 3);
assertEquals(column3.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column3.isNull(i), false);
}
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 3), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true)));
Block column4 = reader.readBlock(arrayOfArrayType, 4);
assertEquals(column4.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column4.isNull(i), false);
}
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 3), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 4), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))));
assertEquals(reader.nextBatch(), -1);
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).build()));
}
BitSet rowsToDelete = new BitSet(5);
rowsToDelete.set(1);
rowsToDelete.set(3);
rowsToDelete.set(4);
File newFile = new File(temporary, randomUUID().toString());
OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, rowsToDelete);
assertEquals(info.getRowCount(), 2);
assertEquals(info.getUncompressedSize(), 78);
try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 2);
assertEquals(reader.getFileRowCount(), 2);
assertEquals(reader.getSplitLength(), newFile.length());
assertEquals(reader.nextBatch(), 2);
Block column0 = reader.readBlock(BIGINT, 0);
assertEquals(column0.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column0.isNull(i), false);
}
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 456L);
Block column1 = reader.readBlock(createVarcharType(20), 1);
assertEquals(column1.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column1.isNull(i), false);
}
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("bye"));
Block column2 = reader.readBlock(arrayType, 2);
assertEquals(column2.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column2.isNull(i), false);
}
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 5, 6)));
Block column3 = reader.readBlock(mapType, 3);
assertEquals(column3.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column3.isNull(i), false);
}
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
Block column4 = reader.readBlock(arrayOfArrayType, 4);
assertEquals(column4.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column4.isNull(i), false);
}
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
assertEquals(reader.nextBatch(), -1);
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).build()));
}
}
Aggregations