Search in sources :

Example 1 with RaptorOrcAggregatedMemoryContext

use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.

the class OrcTestingUtil method createReader.

public static OrcBatchRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException {
    OrcReader orcReader = new OrcReader(dataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    List<String> columnNames = orcReader.getColumnNames();
    assertEquals(columnNames.size(), columnIds.size());
    Map<Integer, Type> includedColumns = new HashMap<>();
    int ordinal = 0;
    for (long columnId : columnIds) {
        assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
        includedColumns.put(ordinal, types.get(ordinal));
        ordinal++;
    }
    return createRecordReader(orcReader, includedColumns);
}
Also used : HashMap(java.util.HashMap) RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource)

Example 2 with RaptorOrcAggregatedMemoryContext

use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.

the class TestOrcFileRewriter method testRewriterDropThenAddDifferentColumns.

/**
 * The following test add or drop different columns
 */
@Test
public void testRewriterDropThenAddDifferentColumns() throws Exception {
    FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
    DBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
    dbi.registerMapper(new TableColumn.Mapper(functionAndTypeManager));
    Handle dummyHandle = dbi.open();
    File dataDir = Files.createTempDir();
    StorageManager storageManager = createOrcStorageManager(dbi, dataDir);
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
    File file = new File(temporary, randomUUID().toString());
    try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(1L, "1").row(2L, "2").row(3L, "3").row(4L, "4").build();
        writer.appendPages(pages);
    }
    // Add a column
    File newFile1 = new File(temporary, randomUUID().toString());
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(3L, 7L, 10L), ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE)), path(file), path(newFile1), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    assertEquals(readAllBytes(file.toPath()), readAllBytes(newFile1.toPath()));
    // Drop a column
    File newFile2 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L), ImmutableList.of(createVarcharType(20), DOUBLE)), path(newFile1), path(newFile2), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    // Optimized writer will keep the only column
    OrcReader orcReader = new OrcReader(fileOrcDataSource(newFile2), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), OrcTestingUtil.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    orcReader.getColumnNames().equals(ImmutableList.of("7"));
    // Add a column with the different ID with different type
    File newFile3 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L, 13L), ImmutableList.of(createVarcharType(20), DOUBLE, createVarcharType(5))), path(newFile2), path(newFile3), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    assertEquals(readAllBytes(newFile2.toPath()), readAllBytes(newFile3.toPath()));
    // Get prepared for the final file; make sure it is accessible from storage manager
    UUID uuid = randomUUID();
    File newFile4 = getFileSystemPath(new File(dataDir, "data/storage"), uuid);
    // Optimized ORC writer does not create the file itself
    newFile4.getParentFile().mkdirs();
    newFile4.createNewFile();
    // Drop a column and add a column; also delete 3 rows
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(0);
    rowsToDelete.set(1);
    rowsToDelete.set(3);
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 13L, 18L), ImmutableList.of(createVarcharType(20), createVarcharType(5), INTEGER)), path(newFile3), path(newFile4), rowsToDelete);
    assertEquals(info.getRowCount(), 1);
    ConnectorPageSource source = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), TupleDomain.all(), READER_ATTRIBUTES);
    Page page = null;
    while (page == null) {
        page = source.getNextPage();
    }
    assertEquals(page.getPositionCount(), 1);
    // Column 13L
    Block column0 = page.getBlock(0);
    assertTrue(column0.isNull(0));
    // Column 7L
    Block column1 = page.getBlock(1);
    assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("3"));
    // Column 8L
    Block column2 = page.getBlock(2);
    assertTrue(column2.isNull(0));
    // Remove all the columns
    File newFile5 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(13L, 18L), ImmutableList.of(createVarcharType(5), INTEGER)), path(newFile4), path(newFile5), new BitSet(5));
    // Optimized writer will drop the file
    assertEquals(info.getRowCount(), 0);
    assertFalse(newFile5.exists());
    dummyHandle.close();
    deleteRecursively(dataDir.toPath(), ALLOW_INSECURE);
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) TestOrcStorageManager.createOrcStorageManager(com.facebook.presto.raptor.storage.TestOrcStorageManager.createOrcStorageManager) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DBI(org.skife.jdbi.v2.DBI) Page(com.facebook.presto.common.Page) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) BitSet(java.util.BitSet) TableColumn(com.facebook.presto.raptor.metadata.TableColumn) Handle(org.skife.jdbi.v2.Handle) DecimalType(com.facebook.presto.common.type.DecimalType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) Block(com.facebook.presto.common.block.Block) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) File(java.io.File) Test(org.testng.annotations.Test)

Example 3 with RaptorOrcAggregatedMemoryContext

use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.

the class OrcStorageManager method getRowsFromUuid.

Optional<BitSet> getRowsFromUuid(FileSystem fileSystem, Optional<UUID> deltaShardUuid) {
    if (!deltaShardUuid.isPresent()) {
        return Optional.empty();
    }
    try (OrcDataSource dataSource = openShard(fileSystem, deltaShardUuid.get(), defaultReaderAttributes)) {
        OrcAggregatedMemoryContext systemMemoryUsage = new RaptorOrcAggregatedMemoryContext();
        OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, defaultReaderAttributes.isZstdJniDecompressionEnabled()), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) {
            throw new IOException("File has too many rows");
        }
        try (OrcBatchRecordReader recordReader = reader.createBatchRecordReader(ImmutableMap.of(0, BIGINT), OrcPredicate.TRUE, DEFAULT_STORAGE_TIMEZONE, systemMemoryUsage, INITIAL_BATCH_SIZE)) {
            BitSet bitSet = new BitSet();
            while (recordReader.nextBatch() > 0) {
                Block block = recordReader.readBlock(0);
                for (int i = 0; i < block.getPositionCount(); i++) {
                    bitSet.set(toIntExact(block.getLong(i)));
                }
            }
            return Optional.of(bitSet);
        }
    } catch (IOException | RuntimeException e) {
        throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + deltaShardUuid, e);
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) RuntimeStats(com.facebook.presto.common.RuntimeStats) BitSet(java.util.BitSet) PrestoException(com.facebook.presto.spi.PrestoException) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) IOException(java.io.IOException) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) OrcReader(com.facebook.presto.orc.OrcReader) Block(com.facebook.presto.common.block.Block) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource)

Example 4 with RaptorOrcAggregatedMemoryContext

use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.

the class OrcStorageManager method computeShardStats.

private List<ColumnStats> computeShardStats(FileSystem fileSystem, Path file) {
    try (OrcDataSource dataSource = orcDataEnvironment.createOrcDataSource(fileSystem, file, defaultReaderAttributes)) {
        OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, stripeMetadataSourceFactory, new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, defaultReaderAttributes.isZstdJniDecompressionEnabled()), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        ImmutableList.Builder<ColumnStats> list = ImmutableList.builder();
        for (ColumnInfo info : getColumnInfo(reader)) {
            computeColumnStats(reader, info.getColumnId(), info.getType(), typeManager).ifPresent(list::add);
        }
        return list.build();
    } catch (IOException e) {
        throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + file, e);
    }
}
Also used : OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) OrcReader(com.facebook.presto.orc.OrcReader) RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ColumnStats(com.facebook.presto.raptor.metadata.ColumnStats) ShardStats.computeColumnStats(com.facebook.presto.raptor.storage.ShardStats.computeColumnStats) ColumnInfo(com.facebook.presto.raptor.metadata.ColumnInfo) PrestoException(com.facebook.presto.spi.PrestoException) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) IOException(java.io.IOException)

Example 5 with RaptorOrcAggregatedMemoryContext

use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.

the class OrcStorageManager method getPageSource.

@Override
public ConnectorPageSource getPageSource(HdfsContext hdfsContext, HiveFileContext hiveFileContext, UUID shardUuid, Optional<UUID> deltaShardUuid, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId, Optional<Map<String, Type>> allColumnTypes) {
    FileSystem fileSystem = orcDataEnvironment.getFileSystem(hdfsContext);
    OrcDataSource dataSource = openShard(fileSystem, shardUuid, readerAttributes);
    OrcAggregatedMemoryContext systemMemoryUsage = new RaptorOrcAggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, stripeMetadataSourceFactory, new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(readerAttributes.getMaxMergeDistance(), readerAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, readerAttributes.isZstdJniDecompressionEnabled()), hiveFileContext.isCacheable(), NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
        for (int i = 0; i < columnIds.size(); i++) {
            long columnId = columnIds.get(i);
            if (isHiddenColumn(columnId)) {
                columnIndexes.add(toSpecialIndex(columnId));
                continue;
            }
            Integer index = indexMap.get(columnId);
            if (index == null) {
                columnIndexes.add(NULL_COLUMN);
            } else {
                columnIndexes.add(index);
                includedColumns.put(index, toOrcFileType(columnTypes.get(i), typeManager));
            }
        }
        OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
        StorageTypeConverter storageTypeConverter = new StorageTypeConverter(typeManager);
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(storageTypeConverter.toStorageTypes(includedColumns.build()), predicate, DEFAULT_STORAGE_TIMEZONE, systemMemoryUsage, INITIAL_BATCH_SIZE);
        Optional<ShardRewriter> shardRewriter = Optional.empty();
        if (transactionId.isPresent()) {
            checkState(allColumnTypes.isPresent());
            if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) {
                throw new PrestoException(RAPTOR_ERROR, "File has too many rows, failed to read file: " + shardUuid);
            }
            shardRewriter = Optional.of(createShardRewriter(hdfsContext, fileSystem, transactionId.getAsLong(), bucketNumber, shardUuid, toIntExact(reader.getFooter().getNumberOfRows()), deltaShardUuid, tableSupportsDeltaDelete, allColumnTypes.get()));
        }
        return new OrcUpdatablePageSource(shardRewriter, recordReader, new OrcPageSource(recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage, new DeltaShardLoader(deltaShardUuid, tableSupportsDeltaDelete, this, fileSystem)));
    } catch (IOException | RuntimeException e) {
        closeQuietly(dataSource);
        throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
    } catch (Throwable t) {
        closeQuietly(dataSource);
        throw t;
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(com.facebook.presto.spi.PrestoException) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) FileSystem(org.apache.hadoop.fs.FileSystem) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) RowType(com.facebook.presto.common.type.RowType) TimestampType(com.facebook.presto.common.type.TimestampType) MapType(com.facebook.presto.common.type.MapType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) OptionalLong(java.util.OptionalLong) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Aggregations

RaptorOrcAggregatedMemoryContext (com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext)8 RuntimeStats (com.facebook.presto.common.RuntimeStats)6 OrcReader (com.facebook.presto.orc.OrcReader)6 Type (com.facebook.presto.common.type.Type)4 OrcBatchRecordReader (com.facebook.presto.orc.OrcBatchRecordReader)4 OrcDataSource (com.facebook.presto.orc.OrcDataSource)4 OrcReaderOptions (com.facebook.presto.orc.OrcReaderOptions)4 PrestoException (com.facebook.presto.spi.PrestoException)4 Block (com.facebook.presto.common.block.Block)3 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)3 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 IOException (java.io.IOException)3 Page (com.facebook.presto.common.Page)2 ArrayType (com.facebook.presto.common.type.ArrayType)2 DecimalType (com.facebook.presto.common.type.DecimalType)2 VarcharType.createVarcharType (com.facebook.presto.common.type.VarcharType.createVarcharType)2 FunctionAndTypeManager (com.facebook.presto.metadata.FunctionAndTypeManager)2 FunctionAndTypeManager.createTestFunctionAndTypeManager (com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager)2 OrcAggregatedMemoryContext (com.facebook.presto.orc.OrcAggregatedMemoryContext)2