Search in sources :

Example 1 with OrcBatchRecordReader

use of com.facebook.presto.orc.OrcBatchRecordReader in project presto by prestodb.

the class TestOrcFileRewriter method testRewrite.

@Test
public void testRewrite() throws Exception {
    FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    Type mapType = functionAndTypeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(createVarcharType(5).getTypeSignature()), TypeSignatureParameter.of(BOOLEAN.getTypeSignature())));
    List<Long> columnIds = ImmutableList.of(3L, 7L, 9L, 10L, 11L, 12L);
    DecimalType decimalType = DecimalType.createDecimalType(4, 4);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), arrayType, mapType, arrayOfArrayType, decimalType);
    File file = new File(temporary, randomUUID().toString());
    try (FileWriter writer = OrcTestingUtil.createFileWriter(columnIds, columnTypes, file)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)), new BigDecimal("2.3")).row(777L, "sky", arrayBlockOf(BIGINT, 3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6)), new BigDecimal("2.3")).row(456L, "bye", arrayBlockOf(BIGINT, 5, 6), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7)), new BigDecimal("2.3")).row(888L, "world", arrayBlockOf(BIGINT, 7, 8), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null), new BigDecimal("2.3")).row(999L, "done", arrayBlockOf(BIGINT, 9, 10), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10)), new BigDecimal("2.3")).build();
        writer.appendPages(pages);
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 5);
        assertEquals(reader.getFileRowCount(), 5);
        assertEquals(reader.getSplitLength(), file.length());
        assertEquals(reader.nextBatch(), 5);
        Block column0 = reader.readBlock(0);
        assertEquals(column0.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 777L);
        assertEquals(BIGINT.getLong(column0, 2), 456L);
        assertEquals(BIGINT.getLong(column0, 3), 888L);
        assertEquals(BIGINT.getLong(column0, 4), 999L);
        Block column1 = reader.readBlock(1);
        assertEquals(column1.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
        assertEquals(createVarcharType(20).getSlice(column1, 2), utf8Slice("bye"));
        assertEquals(createVarcharType(20).getSlice(column1, 3), utf8Slice("world"));
        assertEquals(createVarcharType(20).getSlice(column1, 4), utf8Slice("done"));
        Block column2 = reader.readBlock(2);
        assertEquals(column2.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column2.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 3, 4)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 2), arrayBlockOf(BIGINT, 5, 6)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 3), arrayBlockOf(BIGINT, 7, 8)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 4), arrayBlockOf(BIGINT, 9, 10)));
        Block column3 = reader.readBlock(3);
        assertEquals(column3.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column3.isNull(i), false);
        }
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 3), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true)));
        Block column4 = reader.readBlock(4);
        assertEquals(column4.getPositionCount(), 5);
        for (int i = 0; i < 5; i++) {
            assertEquals(column4.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 3), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 4), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))));
        assertEquals(reader.nextBatch(), -1);
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).put(12L, decimalType.getTypeSignature()).build()));
    }
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(1);
    rowsToDelete.set(3);
    rowsToDelete.set(4);
    File newFile = new File(temporary, randomUUID().toString());
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(columnIds, columnTypes), path(file), path(newFile), rowsToDelete);
    assertEquals(info.getRowCount(), 2);
    assertBetweenInclusive(info.getUncompressedSize(), 94L, 118L * 2);
    try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
        OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 2);
        assertEquals(reader.getFileRowCount(), 2);
        assertEquals(reader.getSplitLength(), newFile.length());
        assertEquals(reader.nextBatch(), 2);
        Block column0 = reader.readBlock(0);
        assertEquals(column0.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 456L);
        Block column1 = reader.readBlock(1);
        assertEquals(column1.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("bye"));
        Block column2 = reader.readBlock(2);
        assertEquals(column2.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column2.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 5, 6)));
        Block column3 = reader.readBlock(3);
        assertEquals(column3.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column3.isNull(i), false);
        }
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
        Block column4 = reader.readBlock(4);
        assertEquals(column4.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column4.isNull(i), false);
        }
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
        assertEquals(reader.nextBatch(), -1);
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).put(12L, decimalType.getTypeSignature()).build()));
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcTestingUtil.fileOrcDataSource(com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) BitSet(java.util.BitSet) Page(com.facebook.presto.common.Page) BigDecimal(java.math.BigDecimal) ArrayType(com.facebook.presto.common.type.ArrayType) DecimalType(com.facebook.presto.common.type.DecimalType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) DecimalType(com.facebook.presto.common.type.DecimalType) Block(com.facebook.presto.common.block.Block) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) File(java.io.File) Test(org.testng.annotations.Test)

Example 2 with OrcBatchRecordReader

use of com.facebook.presto.orc.OrcBatchRecordReader in project presto by prestodb.

the class TestShardWriter method testWriterZeroRows.

@SuppressWarnings("EmptyTryBlock")
@Test
public void testWriterZeroRows() throws Exception {
    List<Long> columnIds = ImmutableList.of(1L);
    List<Type> columnTypes = ImmutableList.of(BIGINT);
    File file = new File(directory, System.nanoTime() + ".orc");
    // optimized ORC writer will flush metadata on close
    try (FileWriter ignored = createFileWriter(columnIds, columnTypes, file)) {
    // no rows
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 0);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.nextBatch(), -1);
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcTestingUtil.fileOrcDataSource(com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) RowType(com.facebook.presto.common.type.RowType) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) OrcTestingUtil.createFileWriter(com.facebook.presto.raptor.storage.OrcTestingUtil.createFileWriter) File(java.io.File) Test(org.testng.annotations.Test)

Example 3 with OrcBatchRecordReader

use of com.facebook.presto.orc.OrcBatchRecordReader in project presto by prestodb.

the class TestShardWriter method testWriter.

@Test(dataProvider = "useOptimizedOrcWriter")
public void testWriter(boolean useOptimizedOrcWriter) throws Exception {
    FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
    List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L);
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    Type mapType = functionAndTypeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(createVarcharType(10).getTypeSignature()), TypeSignatureParameter.of(BOOLEAN.getTypeSignature())));
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10), VARBINARY, DOUBLE, BOOLEAN, arrayType, mapType, arrayOfArrayType, TIMESTAMP, TIME, DATE);
    File file = new File(directory, System.nanoTime() + ".orc");
    byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
    byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);
    long timestampValue = sqlTimestampOf(2002, 4, 6, 7, 8, 9, 0, UTC, UTC_KEY, SESSION).getMillisUtc();
    long timeValue = new SqlTime(NANOSECONDS.toMillis(new DateTime(2004, 11, 29, 0, 0, 0, 0, UTC).toLocalTime().getMillisOfDay())).getMillis();
    DateTime date = new DateTime(2001, 11, 22, 0, 0, 0, 0, UTC);
    int dateValue = new SqlDate(Days.daysBetween(new DateTime(0, ISOChronology.getInstanceUTC()), date).getDays()).getDays();
    RowPagesBuilder rowPagesBuilder = RowPagesBuilder.rowPagesBuilder(columnTypes).row(123L, "hello", wrappedBuffer(bytes1), 123.456, true, arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)), timestampValue, timeValue, dateValue).row(null, "world", null, Double.POSITIVE_INFINITY, null, arrayBlockOf(BIGINT, 3, null), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7)), timestampValue, timeValue, dateValue).row(456L, "bye \u2603", wrappedBuffer(bytes3), Double.NaN, false, arrayBlockOf(BIGINT), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT)), timestampValue, timeValue, dateValue);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader());
        FileWriter writer = createFileWriter(columnIds, columnTypes, file)) {
        writer.appendPages(rowPagesBuilder.build());
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 3);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
        assertEquals(reader.getFilePosition(), reader.getFilePosition());
        assertEquals(reader.nextBatch(), 3);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.getFilePosition(), reader.getFilePosition());
        Block column0 = reader.readBlock(0);
        assertEquals(column0.isNull(0), false);
        assertEquals(column0.isNull(1), true);
        assertEquals(column0.isNull(2), false);
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 2), 456L);
        Block column1 = reader.readBlock(1);
        assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world"));
        assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye \u2603"));
        Block column2 = reader.readBlock(2);
        assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1));
        assertEquals(column2.isNull(1), true);
        assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3));
        Block column3 = reader.readBlock(3);
        assertEquals(column3.isNull(0), false);
        assertEquals(column3.isNull(1), false);
        assertEquals(column3.isNull(2), false);
        assertEquals(DOUBLE.getDouble(column3, 0), 123.456);
        assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY);
        assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN);
        Block column4 = reader.readBlock(4);
        assertEquals(column4.isNull(0), false);
        assertEquals(column4.isNull(1), true);
        assertEquals(column4.isNull(2), false);
        assertEquals(BOOLEAN.getBoolean(column4, 0), true);
        assertEquals(BOOLEAN.getBoolean(column4, 2), false);
        Block column5 = reader.readBlock(5);
        assertEquals(column5.getPositionCount(), 3);
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT)));
        Block column6 = reader.readBlock(6);
        assertEquals(column6.getPositionCount(), 3);
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        Block object = arrayType.getObject(column6, 1);
        Block k2 = mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null);
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, object, k2));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false)));
        Block column7 = reader.readBlock(7);
        assertEquals(column7.getPositionCount(), 3);
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 1), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT))));
        Block column8 = reader.readBlock(8);
        assertEquals(TIMESTAMP.getLong(column8, 0), timestampValue);
        assertEquals(TIMESTAMP.getLong(column8, 1), timestampValue);
        assertEquals(TIMESTAMP.getLong(column8, 2), timestampValue);
        Block column9 = reader.readBlock(9);
        assertEquals(TIME.getLong(column9, 0), timeValue);
        assertEquals(TIME.getLong(column9, 1), timeValue);
        assertEquals(TIME.getLong(column9, 2), timeValue);
        Block column10 = reader.readBlock(10);
        assertEquals(DATE.getLong(column10, 0), dateValue);
        assertEquals(DATE.getLong(column10, 1), dateValue);
        assertEquals(DATE.getLong(column10, 2), dateValue);
        assertEquals(reader.nextBatch(), -1);
        assertEquals(reader.getReaderPosition(), 3);
        assertEquals(reader.getFilePosition(), reader.getFilePosition());
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(1L, BIGINT.getTypeSignature()).put(2L, createVarcharType(10).getTypeSignature()).put(4L, VARBINARY.getTypeSignature()).put(6L, DOUBLE.getTypeSignature()).put(7L, BOOLEAN.getTypeSignature()).put(8L, arrayType.getTypeSignature()).put(9L, mapType.getTypeSignature()).put(10L, arrayOfArrayType.getTypeSignature()).put(11L, TIMESTAMP.getTypeSignature()).put(12L, TIME.getTypeSignature()).put(13L, DATE.getTypeSignature()).build()));
    }
    File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc");
    assertFalse(crcFile.exists());
    // Test unsupported types
    for (Type type : ImmutableList.of(TIMESTAMP_WITH_TIME_ZONE, RowType.anonymous(ImmutableList.of(BIGINT, DOUBLE)))) {
        try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader())) {
            createFileWriter(ImmutableList.of(1L), ImmutableList.of(type), file);
            fail();
        } catch (PrestoException e) {
            assertTrue(e.getMessage().toLowerCase(ENGLISH).contains("type"));
        }
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcTestingUtil.fileOrcDataSource(com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource) RowPagesBuilder(com.facebook.presto.RowPagesBuilder) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) OrcTestingUtil.createFileWriter(com.facebook.presto.raptor.storage.OrcTestingUtil.createFileWriter) SqlTime(com.facebook.presto.common.type.SqlTime) PrestoException(com.facebook.presto.spi.PrestoException) DateTime(org.joda.time.DateTime) ArrayType(com.facebook.presto.common.type.ArrayType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) RowType(com.facebook.presto.common.type.RowType) TypeSignature(com.facebook.presto.common.type.TypeSignature) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) SqlDate(com.facebook.presto.common.type.SqlDate) Block(com.facebook.presto.common.block.Block) File(java.io.File) ThreadContextClassLoader(com.facebook.presto.spi.classloader.ThreadContextClassLoader) Test(org.testng.annotations.Test)

Example 4 with OrcBatchRecordReader

use of com.facebook.presto.orc.OrcBatchRecordReader in project presto by prestodb.

the class TestOrcFileRewriter method testRewriteWithoutMetadata.

@Test
public void testRewriteWithoutMetadata() throws Exception {
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
    File file = new File(temporary, randomUUID().toString());
    try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(777L, "sky").build();
        writer.appendPages(pages);
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 2);
        assertEquals(reader.getFileRowCount(), 2);
        assertEquals(reader.getSplitLength(), file.length());
        assertEquals(reader.nextBatch(), 2);
        Block column0 = reader.readBlock(0);
        assertEquals(column0.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column0.isNull(i), false);
        }
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 777L);
        Block column1 = reader.readBlock(1);
        assertEquals(column1.getPositionCount(), 2);
        for (int i = 0; i < 2; i++) {
            assertEquals(column1.isNull(i), false);
        }
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
        assertFalse(reader.getUserMetadata().containsKey(OrcFileMetadata.KEY));
    }
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(1);
    File newFile = new File(temporary, randomUUID().toString());
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(columnIds, columnTypes), path(file), path(newFile), rowsToDelete);
    assertEquals(info.getRowCount(), 1);
    assertBetweenInclusive(info.getUncompressedSize(), 13L, 13L * 2);
    try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
        OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 1);
        assertEquals(reader.getFileRowCount(), 1);
        assertEquals(reader.getSplitLength(), newFile.length());
        assertEquals(reader.nextBatch(), 1);
        Block column0 = reader.readBlock(0);
        assertEquals(column0.getPositionCount(), 1);
        assertEquals(column0.isNull(0), false);
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        Block column1 = reader.readBlock(1);
        assertEquals(column1.getPositionCount(), 1);
        assertEquals(column1.isNull(0), false);
        assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
        assertFalse(reader.getUserMetadata().containsKey(OrcFileMetadata.KEY));
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcTestingUtil.fileOrcDataSource(com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) BitSet(java.util.BitSet) Page(com.facebook.presto.common.Page) DecimalType(com.facebook.presto.common.type.DecimalType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) FileSystem(org.apache.hadoop.fs.FileSystem) Block(com.facebook.presto.common.block.Block) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) File(java.io.File) Test(org.testng.annotations.Test)

Example 5 with OrcBatchRecordReader

use of com.facebook.presto.orc.OrcBatchRecordReader in project presto by prestodb.

the class OrcStorageManager method getRowsFromUuid.

Optional<BitSet> getRowsFromUuid(FileSystem fileSystem, Optional<UUID> deltaShardUuid) {
    if (!deltaShardUuid.isPresent()) {
        return Optional.empty();
    }
    try (OrcDataSource dataSource = openShard(fileSystem, deltaShardUuid.get(), defaultReaderAttributes)) {
        OrcAggregatedMemoryContext systemMemoryUsage = new RaptorOrcAggregatedMemoryContext();
        OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, defaultReaderAttributes.isZstdJniDecompressionEnabled()), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) {
            throw new IOException("File has too many rows");
        }
        try (OrcBatchRecordReader recordReader = reader.createBatchRecordReader(ImmutableMap.of(0, BIGINT), OrcPredicate.TRUE, DEFAULT_STORAGE_TIMEZONE, systemMemoryUsage, INITIAL_BATCH_SIZE)) {
            BitSet bitSet = new BitSet();
            while (recordReader.nextBatch() > 0) {
                Block block = recordReader.readBlock(0);
                for (int i = 0; i < block.getPositionCount(); i++) {
                    bitSet.set(toIntExact(block.getLong(i)));
                }
            }
            return Optional.of(bitSet);
        }
    } catch (IOException | RuntimeException e) {
        throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + deltaShardUuid, e);
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) RuntimeStats(com.facebook.presto.common.RuntimeStats) BitSet(java.util.BitSet) PrestoException(com.facebook.presto.spi.PrestoException) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) IOException(java.io.IOException) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) OrcReader(com.facebook.presto.orc.OrcReader) Block(com.facebook.presto.common.block.Block) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource)

Aggregations

OrcBatchRecordReader (com.facebook.presto.orc.OrcBatchRecordReader)11 OrcDataSource (com.facebook.presto.orc.OrcDataSource)10 Type (com.facebook.presto.common.type.Type)9 Block (com.facebook.presto.common.block.Block)6 PrestoException (com.facebook.presto.spi.PrestoException)6 ArrayType (com.facebook.presto.common.type.ArrayType)5 VarcharType.createVarcharType (com.facebook.presto.common.type.VarcharType.createVarcharType)5 OrcReader (com.facebook.presto.orc.OrcReader)5 OrcReaderOptions (com.facebook.presto.orc.OrcReaderOptions)5 IOException (java.io.IOException)5 Page (com.facebook.presto.common.Page)4 RuntimeStats (com.facebook.presto.common.RuntimeStats)4 OrcAggregatedMemoryContext (com.facebook.presto.orc.OrcAggregatedMemoryContext)4 OrcTestingUtil.fileOrcDataSource (com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource)4 ImmutableList (com.google.common.collect.ImmutableList)4 File (java.io.File)4 Path (org.apache.hadoop.fs.Path)4 Test (org.testng.annotations.Test)4 DecimalType (com.facebook.presto.common.type.DecimalType)3 RowType (com.facebook.presto.common.type.RowType)3