Search in sources :

Example 1 with OrcFileInfo

use of io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo in project trino by trinodb.

the class TestOrcFileRewriter method testRewriteNoRowsDeleted.

@Test
public void testRewriteNoRowsDeleted() throws Exception {
    List<Long> columnIds = ImmutableList.of(3L);
    List<Type> columnTypes = ImmutableList.of(BIGINT);
    File file = temporary.resolve(randomUUID().toString()).toFile();
    try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
        writer.appendPages(rowPagesBuilder(columnTypes).row(123L).row(456L).build());
    }
    BitSet rowsToDelete = new BitSet();
    File newFile = temporary.resolve(randomUUID().toString()).toFile();
    OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, rowsToDelete);
    assertEquals(info.getRowCount(), 2);
    assertEquals(info.getUncompressedSize(), 16);
    assertEquals(readAllBytes(newFile.toPath()), readAllBytes(file.toPath()));
}
Also used : Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) DecimalType(io.trino.spi.type.DecimalType) OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) BitSet(java.util.BitSet) File(java.io.File) Test(org.testng.annotations.Test)

Example 2 with OrcFileInfo

use of io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo in project trino by trinodb.

the class TestOrcFileRewriter method testRewriteWithoutMetadata.

@Test
public void testRewriteWithoutMetadata() throws Exception {
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
    File file = temporary.resolve(randomUUID().toString()).toFile();
    try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file, false)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(777L, "sky").build();
        writer.appendPages(pages);
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 2);
        assertEquals(reader.getFileRowCount(), 2);
        assertEquals(reader.getSplitLength(), file.length());
        Page page = reader.nextPage();
        assertEquals(page.getPositionCount(), 2);
        Block column0 = page.getBlock(0);
        assertEquals(column0.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 777L);
        Block column1 = page.getBlock(1);
        assertEquals(column1.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
        assertFalse(reader.getUserMetadata().containsKey(OrcFileMetadata.KEY));
    }
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(1);
    File newFile = temporary.resolve(randomUUID().toString()).toFile();
    OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, rowsToDelete);
    assertEquals(info.getRowCount(), 1);
    assertEquals(info.getUncompressedSize(), 13);
    try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 1);
        assertEquals(reader.getFileRowCount(), 1);
        assertEquals(reader.getSplitLength(), newFile.length());
        Page page = reader.nextPage();
        assertEquals(page.getPositionCount(), 1);
        Block column0 = page.getBlock(0);
        assertEquals(column0.getPositionCount(), 1);
        assertEquals(column0.isNull(0), false);
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        Block column1 = page.getBlock(1);
        assertEquals(column1.getPositionCount(), 1);
        assertEquals(column1.isNull(0), false);
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertFalse(reader.getUserMetadata().containsKey(OrcFileMetadata.KEY));
    }
}
Also used : OrcTestingUtil.fileOrcDataSource(io.trino.plugin.raptor.legacy.storage.OrcTestingUtil.fileOrcDataSource) OrcDataSource(io.trino.orc.OrcDataSource) BitSet(java.util.BitSet) Page(io.trino.spi.Page) OrcRecordReader(io.trino.orc.OrcRecordReader) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) DecimalType(io.trino.spi.type.DecimalType) OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) Block(io.trino.spi.block.Block) File(java.io.File) Test(org.testng.annotations.Test)

Example 3 with OrcFileInfo

use of io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo in project trino by trinodb.

the class TestOrcFileRewriter method testUncompressedSize.

@Test
public void testUncompressedSize() throws Exception {
    List<Long> columnIds = ImmutableList.of(1L, 2L, 3L, 4L, 5L);
    List<Type> columnTypes = ImmutableList.of(BOOLEAN, BIGINT, DOUBLE, createVarcharType(10), VARBINARY);
    File file = temporary.resolve(randomUUID().toString()).toFile();
    try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(true, 123L, 98.7, "hello", utf8Slice("abc")).row(false, 456L, 65.4, "world", utf8Slice("xyz")).row(null, null, null, null, null).build();
        writer.appendPages(pages);
    }
    File newFile = temporary.resolve(randomUUID().toString()).toFile();
    OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, new BitSet());
    assertEquals(info.getRowCount(), 3);
    assertEquals(info.getUncompressedSize(), 55);
}
Also used : Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) DecimalType(io.trino.spi.type.DecimalType) OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) BitSet(java.util.BitSet) Page(io.trino.spi.Page) File(java.io.File) Test(org.testng.annotations.Test)

Example 4 with OrcFileInfo

use of io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo in project trino by trinodb.

the class TestOrcFileRewriter method testRewriteAllRowsDeleted.

@Test
public void testRewriteAllRowsDeleted() throws Exception {
    List<Long> columnIds = ImmutableList.of(3L);
    List<Type> columnTypes = ImmutableList.of(BIGINT);
    File file = temporary.resolve(randomUUID().toString()).toFile();
    try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
        writer.appendPages(rowPagesBuilder(columnTypes).row(123L).row(456L).build());
    }
    BitSet rowsToDelete = new BitSet();
    rowsToDelete.set(0);
    rowsToDelete.set(1);
    File newFile = temporary.resolve(randomUUID().toString()).toFile();
    OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, rowsToDelete);
    assertEquals(info.getRowCount(), 0);
    assertEquals(info.getUncompressedSize(), 0);
    assertFalse(newFile.exists());
}
Also used : Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) DecimalType(io.trino.spi.type.DecimalType) OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) BitSet(java.util.BitSet) File(java.io.File) Test(org.testng.annotations.Test)

Example 5 with OrcFileInfo

use of io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo in project trino by trinodb.

the class TestOrcFileRewriter method testRewrite.

@Test
public void testRewrite() throws Exception {
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    Type mapType = TESTING_TYPE_MANAGER.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.typeParameter(createVarcharType(5).getTypeSignature()), TypeSignatureParameter.typeParameter(BOOLEAN.getTypeSignature())));
    List<Long> columnIds = ImmutableList.of(3L, 7L, 9L, 10L, 11L, 12L);
    DecimalType decimalType = DecimalType.createDecimalType(4, 4);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), arrayType, mapType, arrayOfArrayType, decimalType);
    File file = temporary.resolve(randomUUID().toString()).toFile();
    try (OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)), new BigDecimal("2.3")).row(777L, "sky", arrayBlockOf(BIGINT, 3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6)), new BigDecimal("2.3")).row(456L, "bye", arrayBlockOf(BIGINT, 5, 6), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7)), new BigDecimal("2.3")).row(888L, "world", arrayBlockOf(BIGINT, 7, 8), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null), new BigDecimal("2.3")).row(999L, "done", arrayBlockOf(BIGINT, 9, 10), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10)), new BigDecimal("2.3")).build();
        writer.appendPages(pages);
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 5);
        assertEquals(reader.getFileRowCount(), 5);
        assertEquals(reader.getSplitLength(), file.length());
        Page page = reader.nextPage();
        assertEquals(page.getPositionCount(), 5);
        Block column0 = page.getBlock(0);
        assertEquals(column0.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 777L);
        assertEquals(BIGINT.getLong(column0, 2), 456L);
        assertEquals(BIGINT.getLong(column0, 3), 888L);
        assertEquals(BIGINT.getLong(column0, 4), 999L);
        Block column1 = page.getBlock(1);
        assertEquals(column1.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
        assertEquals(createVarcharType(20).getSlice(column1, 2), utf8Slice("bye"));
        assertEquals(createVarcharType(20).getSlice(column1, 3), utf8Slice("world"));
        assertEquals(createVarcharType(20).getSlice(column1, 4), utf8Slice("done"));
        Block column2 = page.getBlock(2);
        assertEquals(column2.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column2.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 3, 4)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 2), arrayBlockOf(BIGINT, 5, 6)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 3), arrayBlockOf(BIGINT, 7, 8)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 4), arrayBlockOf(BIGINT, 9, 10)));
        Block column3 = page.getBlock(3);
        assertEquals(column3.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column3.isNull(i), false);
        }
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 3), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true)));
        Block column4 = page.getBlock(4);
        assertEquals(column4.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column4.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 3), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 4), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))));
        assertNull(reader.nextPage());
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeId>builder().put(3L, BIGINT.getTypeId()).put(7L, createVarcharType(20).getTypeId()).put(9L, arrayType.getTypeId()).put(10L, mapType.getTypeId()).put(11L, arrayOfArrayType.getTypeId()).put(12L, decimalType.getTypeId()).buildOrThrow()));
    }
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(1);
    rowsToDelete.set(3);
    rowsToDelete.set(4);
    File newFile = temporary.resolve(randomUUID().toString()).toFile();
    OrcFileInfo info = OrcFileRewriter.rewrite(file, newFile, rowsToDelete);
    assertEquals(info.getRowCount(), 2);
    assertEquals(info.getUncompressedSize(), 94);
    try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 2);
        assertEquals(reader.getFileRowCount(), 2);
        assertEquals(reader.getSplitLength(), newFile.length());
        Page page = reader.nextPage();
        assertEquals(page.getPositionCount(), 2);
        Block column0 = page.getBlock(0);
        assertEquals(column0.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 456L);
        Block column1 = page.getBlock(1);
        assertEquals(column1.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("bye"));
        Block column2 = page.getBlock(2);
        assertEquals(column2.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column2.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 5, 6)));
        Block column3 = page.getBlock(3);
        assertEquals(column3.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column3.isNull(i), false);
        }
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
        Block column4 = page.getBlock(4);
        assertEquals(column4.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column4.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
        assertEquals(reader.nextPage(), null);
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeId>builder().put(3L, BIGINT.getTypeId()).put(7L, createVarcharType(20).getTypeId()).put(9L, arrayType.getTypeId()).put(10L, mapType.getTypeId()).put(11L, arrayOfArrayType.getTypeId()).put(12L, decimalType.getTypeId()).buildOrThrow()));
    }
}
Also used : OrcTestingUtil.fileOrcDataSource(io.trino.plugin.raptor.legacy.storage.OrcTestingUtil.fileOrcDataSource) OrcDataSource(io.trino.orc.OrcDataSource) BitSet(java.util.BitSet) Page(io.trino.spi.Page) OrcRecordReader(io.trino.orc.OrcRecordReader) BigDecimal(java.math.BigDecimal) ArrayType(io.trino.spi.type.ArrayType) Type(io.trino.spi.type.Type) ArrayType(io.trino.spi.type.ArrayType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) DecimalType(io.trino.spi.type.DecimalType) OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) DecimalType(io.trino.spi.type.DecimalType) Block(io.trino.spi.block.Block) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

OrcFileInfo (io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo)6 File (java.io.File)6 ArrayType (io.trino.spi.type.ArrayType)5 DecimalType (io.trino.spi.type.DecimalType)5 Type (io.trino.spi.type.Type)5 VarcharType.createVarcharType (io.trino.spi.type.VarcharType.createVarcharType)5 BitSet (java.util.BitSet)5 Test (org.testng.annotations.Test)5 Page (io.trino.spi.Page)3 OrcDataSource (io.trino.orc.OrcDataSource)2 OrcRecordReader (io.trino.orc.OrcRecordReader)2 OrcTestingUtil.fileOrcDataSource (io.trino.plugin.raptor.legacy.storage.OrcTestingUtil.fileOrcDataSource)2 Block (io.trino.spi.block.Block)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ShardInfo (io.trino.plugin.raptor.legacy.metadata.ShardInfo)1 BigDecimal (java.math.BigDecimal)1 UUID (java.util.UUID)1