Search in sources :

Example 6 with OrcDataSource

use of com.facebook.presto.orc.OrcDataSource in project presto by prestodb.

the class TestOrcStorageManager method testWriter.

@Test
public void testWriter() throws Exception {
    OrcStorageManager manager = createOrcStorageManager();
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
    StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
    List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build();
    sink.appendPages(pages);
    // shard is not recorded until flush
    assertEquals(shardRecorder.getShards().size(), 0);
    sink.flush();
    // shard is recorded after flush
    List<RecordedShard> recordedShards = shardRecorder.getShards();
    assertEquals(recordedShards.size(), 1);
    List<ShardInfo> shards = getFutureValue(sink.commit());
    assertEquals(shards.size(), 1);
    ShardInfo shardInfo = Iterables.getOnlyElement(shards);
    UUID shardUuid = shardInfo.getShardUuid();
    File file = storageService.getStorageFile(shardUuid);
    File backupFile = fileBackupStore.getBackupFile(shardUuid);
    assertEquals(recordedShards.get(0).getTransactionId(), TRANSACTION_ID);
    assertEquals(recordedShards.get(0).getShardUuid(), shardUuid);
    assertEquals(shardInfo.getRowCount(), 2);
    assertEquals(shardInfo.getCompressedSize(), file.length());
    // verify primary and backup shard exist
    assertFile(file, "primary shard");
    assertFile(backupFile, "backup shard");
    assertFileEquals(file, backupFile);
    // remove primary shard to force recovery from backup
    assertTrue(file.delete());
    assertTrue(file.getParentFile().delete());
    assertFalse(file.exists());
    recoveryManager.restoreFromBackup(shardUuid, OptionalLong.empty());
    try (OrcDataSource dataSource = manager.openShard(shardUuid, READER_ATTRIBUTES)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.nextBatch(), 2);
        Block column0 = reader.readBlock(BIGINT, 0);
        assertEquals(column0.isNull(0), false);
        assertEquals(column0.isNull(1), false);
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 456L);
        Block column1 = reader.readBlock(createVarcharType(10), 1);
        assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("bye"));
        assertEquals(reader.nextBatch(), -1);
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) Page(com.facebook.presto.spi.Page) RecordedShard(com.facebook.presto.raptor.storage.InMemoryShardRecorder.RecordedShard) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) VarcharType.createVarcharType(com.facebook.presto.spi.type.VarcharType.createVarcharType) Type(com.facebook.presto.spi.type.Type) OptionalLong(java.util.OptionalLong) Block(com.facebook.presto.spi.block.Block) UUID(java.util.UUID) FileAssert.assertFile(org.testng.FileAssert.assertFile) File(java.io.File) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo) Test(org.testng.annotations.Test)

Example 7 with OrcDataSource

use of com.facebook.presto.orc.OrcDataSource in project presto by prestodb.

the class OrcStorageManager method computeShardStats.

private List<ColumnStats> computeShardStats(File file) {
    try (OrcDataSource dataSource = fileOrcDataSource(defaultReaderAttributes, file)) {
        OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getMaxReadSize());
        ImmutableList.Builder<ColumnStats> list = ImmutableList.builder();
        for (ColumnInfo info : getColumnInfo(reader)) {
            computeColumnStats(reader, info.getColumnId(), info.getType()).ifPresent(list::add);
        }
        return list.build();
    } catch (IOException e) {
        throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + file, e);
    }
}
Also used : FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcReader(com.facebook.presto.orc.OrcReader) ImmutableList(com.google.common.collect.ImmutableList) ColumnStats(com.facebook.presto.raptor.metadata.ColumnStats) ShardStats.computeColumnStats(com.facebook.presto.raptor.storage.ShardStats.computeColumnStats) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) ColumnInfo(com.facebook.presto.raptor.metadata.ColumnInfo) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException)

Example 8 with OrcDataSource

use of com.facebook.presto.orc.OrcDataSource in project presto by prestodb.

the class TestShardWriter method testWriter.

@Test
public void testWriter() throws Exception {
    List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L);
    ArrayType arrayType = new ArrayType(BIGINT);
    ArrayType arrayOfArrayType = new ArrayType(arrayType);
    MapType mapType = new MapType(createVarcharType(10), BOOLEAN);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10), VARBINARY, DOUBLE, BOOLEAN, arrayType, mapType, arrayOfArrayType);
    File file = new File(directory, System.nanoTime() + ".orc");
    byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
    byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);
    RowPagesBuilder rowPagesBuilder = RowPagesBuilder.rowPagesBuilder(columnTypes).row(123L, "hello", wrappedBuffer(bytes1), 123.456, true, arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))).row(null, "world", null, Double.POSITIVE_INFINITY, null, arrayBlockOf(BIGINT, 3, null), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))).row(456L, "bye ☃", wrappedBuffer(bytes3), Double.NaN, false, arrayBlockOf(BIGINT), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT)));
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader());
        OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
        writer.appendPages(rowPagesBuilder.build());
    }
    try (OrcDataSource dataSource = fileOrcDataSource(file)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        assertEquals(reader.getReaderRowCount(), 3);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
        assertEquals(reader.getFilePosition(), reader.getFilePosition());
        assertEquals(reader.nextBatch(), 3);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.getFilePosition(), reader.getFilePosition());
        Block column0 = reader.readBlock(BIGINT, 0);
        assertEquals(column0.isNull(0), false);
        assertEquals(column0.isNull(1), true);
        assertEquals(column0.isNull(2), false);
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 2), 456L);
        Block column1 = reader.readBlock(createVarcharType(10), 1);
        assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world"));
        assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye ☃"));
        Block column2 = reader.readBlock(VARBINARY, 2);
        assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1));
        assertEquals(column2.isNull(1), true);
        assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3));
        Block column3 = reader.readBlock(DOUBLE, 3);
        assertEquals(column3.isNull(0), false);
        assertEquals(column3.isNull(1), false);
        assertEquals(column3.isNull(2), false);
        assertEquals(DOUBLE.getDouble(column3, 0), 123.456);
        assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY);
        assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN);
        Block column4 = reader.readBlock(BOOLEAN, 4);
        assertEquals(column4.isNull(0), false);
        assertEquals(column4.isNull(1), true);
        assertEquals(column4.isNull(2), false);
        assertEquals(BOOLEAN.getBoolean(column4, 0), true);
        assertEquals(BOOLEAN.getBoolean(column4, 2), false);
        Block column5 = reader.readBlock(arrayType, 5);
        assertEquals(column5.getPositionCount(), 3);
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null)));
        assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT)));
        Block column6 = reader.readBlock(mapType, 6);
        assertEquals(column6.getPositionCount(), 3);
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null)));
        assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false)));
        Block column7 = reader.readBlock(arrayOfArrayType, 7);
        assertEquals(column7.getPositionCount(), 3);
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 1), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))));
        assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT))));
        assertEquals(reader.nextBatch(), -1);
        assertEquals(reader.getReaderPosition(), 3);
        assertEquals(reader.getFilePosition(), reader.getFilePosition());
        OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
        assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(1L, BIGINT.getTypeSignature()).put(2L, createVarcharType(10).getTypeSignature()).put(4L, VARBINARY.getTypeSignature()).put(6L, DOUBLE.getTypeSignature()).put(7L, BOOLEAN.getTypeSignature()).put(8L, arrayType.getTypeSignature()).put(9L, mapType.getTypeSignature()).put(10L, arrayOfArrayType.getTypeSignature()).build()));
    }
    File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc");
    assertFalse(crcFile.exists());
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcTestingUtil.fileOrcDataSource(com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource) RowPagesBuilder(com.facebook.presto.RowPagesBuilder) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) MapType(com.facebook.presto.type.MapType) ArrayType(com.facebook.presto.type.ArrayType) ArrayType(com.facebook.presto.type.ArrayType) MapType(com.facebook.presto.type.MapType) Type(com.facebook.presto.spi.type.Type) VarcharType.createVarcharType(com.facebook.presto.spi.type.VarcharType.createVarcharType) Block(com.facebook.presto.spi.block.Block) File(java.io.File) ThreadContextClassLoader(com.facebook.presto.spi.classloader.ThreadContextClassLoader) Test(org.testng.annotations.Test)

Aggregations

OrcDataSource (com.facebook.presto.orc.OrcDataSource)8 OrcRecordReader (com.facebook.presto.orc.OrcRecordReader)7 Type (com.facebook.presto.spi.type.Type)7 VarcharType.createVarcharType (com.facebook.presto.spi.type.VarcharType.createVarcharType)5 File (java.io.File)5 Test (org.testng.annotations.Test)5 OrcTestingUtil.fileOrcDataSource (com.facebook.presto.raptor.storage.OrcTestingUtil.fileOrcDataSource)4 Block (com.facebook.presto.spi.block.Block)4 ArrayType (com.facebook.presto.type.ArrayType)4 MapType (com.facebook.presto.type.MapType)4 OrcReader (com.facebook.presto.orc.OrcReader)3 Page (com.facebook.presto.spi.Page)3 PrestoException (com.facebook.presto.spi.PrestoException)3 ImmutableList (com.google.common.collect.ImmutableList)3 IOException (java.io.IOException)3 FileOrcDataSource (com.facebook.presto.orc.FileOrcDataSource)2 OrcPredicate (com.facebook.presto.orc.OrcPredicate)2 TupleDomainOrcPredicate (com.facebook.presto.orc.TupleDomainOrcPredicate)2 AggregatedMemoryContext (com.facebook.presto.orc.memory.AggregatedMemoryContext)2 OrcMetadataReader (com.facebook.presto.orc.metadata.OrcMetadataReader)2