Search in sources :

Example 6 with OrcFileInfo

use of com.facebook.presto.raptor.storage.OrcFileRewriter.OrcFileInfo in project presto by prestodb.

the class TestOrcFileRewriter method testRewrite.

@Test
public void testRewrite() throws Exception {
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    MapType mapType = new MapType(createVarcharType(5), BOOLEAN);
    List<Long> columnIds = ImmutableList.of(3L, 7L, 9L, 10L, 11L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), arrayType, mapType, arrayOfArrayType);
    File file = new File(temporary, randomUUID().toString());
    try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))).row(777L, "sky", arrayBlockOf(BIGINT, 3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))).row(456L, "bye", arrayBlockOf(BIGINT, 5, 6), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))).row(888L, "world", arrayBlockOf(BIGINT, 7, 8), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)).row(999L, "done", arrayBlockOf(BIGINT, 9, 10), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))).build();
        writer.appendPages(pages);
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 5);
        assertEquals(reader.getFileRowCount(), 5);
        assertEquals(reader.getSplitLength(), file.length());
        assertEquals(reader.nextBatch(), 5);
        Block column0 = reader.readBlock(BIGINT, 0);
        assertEquals(column0.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 777L);
        assertEquals(BIGINT.getLong(column0, 2), 456L);
        assertEquals(BIGINT.getLong(column0, 3), 888L);
        assertEquals(BIGINT.getLong(column0, 4), 999L);
        Block column1 = reader.readBlock(createVarcharType(20), 1);
        assertEquals(column1.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
        assertEquals(createVarcharType(20).getSlice(column1, 2), utf8Slice("bye"));
        assertEquals(createVarcharType(20).getSlice(column1, 3), utf8Slice("world"));
        assertEquals(createVarcharType(20).getSlice(column1, 4), utf8Slice("done"));
        Block column2 = reader.readBlock(arrayType, 2);
        assertEquals(column2.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column2.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 3, 4)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 2), arrayBlockOf(BIGINT, 5, 6)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 3), arrayBlockOf(BIGINT, 7, 8)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 4), arrayBlockOf(BIGINT, 9, 10)));
        Block column3 = reader.readBlock(mapType, 3);
        assertEquals(column3.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column3.isNull(i), false);
        }
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 3), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true)));
        Block column4 = reader.readBlock(arrayOfArrayType, 4);
        assertEquals(column4.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column4.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 3), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 4), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))));
        assertEquals(reader.nextBatch(), -1);
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).build()));
    }
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(1);
    rowsToDelete.set(3);
    rowsToDelete.set(4);
    File newFile = new File(temporary, randomUUID().toString());
    OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, rowsToDelete);
    assertEquals(info.getRowCount(), 2);
    assertEquals(info.getUncompressedSize(), 78);
    try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 2);
        assertEquals(reader.getFileRowCount(), 2);
        assertEquals(reader.getSplitLength(), newFile.length());
        assertEquals(reader.nextBatch(), 2);
        Block column0 = reader.readBlock(BIGINT, 0);
        assertEquals(column0.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 456L);
        Block column1 = reader.readBlock(createVarcharType(20), 1);
        assertEquals(column1.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("bye"));
        Block column2 = reader.readBlock(arrayType, 2);
        assertEquals(column2.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column2.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 5, 6)));
        Block column3 = reader.readBlock(mapType, 3);
        assertEquals(column3.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column3.isNull(i), false);
        }
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
        Block column4 = reader.readBlock(arrayOfArrayType, 4);
        assertEquals(column4.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column4.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
        assertEquals(reader.nextBatch(), -1);
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).build()));
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcTestingUtil.fileOrcDataSource(com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource) BitSet(java.util.BitSet) Page(com.facebook.presto.spi.Page) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) MapType(com.facebook.presto.type.MapType) ArrayType(com.facebook.presto.type.ArrayType) ArrayType(com.facebook.presto.type.ArrayType) MapType(com.facebook.presto.type.MapType) Type(com.facebook.presto.spi.type.Type) VarcharType.createVarcharType(com.facebook.presto.spi.type.VarcharType.createVarcharType) TypeSignature(com.facebook.presto.spi.type.TypeSignature) OrcFileInfo(com.facebook.presto.raptor.storage.OrcFileRewriter.OrcFileInfo) Block(com.facebook.presto.spi.block.Block) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

OrcFileInfo (com.facebook.presto.raptor.storage.OrcFileRewriter.OrcFileInfo)6 File (java.io.File)6 Type (com.facebook.presto.spi.type.Type)5 VarcharType.createVarcharType (com.facebook.presto.spi.type.VarcharType.createVarcharType)5 ArrayType (com.facebook.presto.type.ArrayType)5 MapType (com.facebook.presto.type.MapType)5 BitSet (java.util.BitSet)5 Test (org.testng.annotations.Test)5 Page (com.facebook.presto.spi.Page)3 OrcDataSource (com.facebook.presto.orc.OrcDataSource)2 OrcRecordReader (com.facebook.presto.orc.OrcRecordReader)2 OrcTestingUtil.fileOrcDataSource (com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource)2 Block (com.facebook.presto.spi.block.Block)2 ShardInfo (com.facebook.presto.raptor.metadata.ShardInfo)1 TypeSignature (com.facebook.presto.spi.type.TypeSignature)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 UUID (java.util.UUID)1