Search in sources :

Example 16 with ShardInfo

use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.

the class ShardCompactor method compact.

public List<ShardInfo> compact(long transactionId, OptionalInt bucketNumber, Set<UUID> uuids, List<ColumnInfo> columns) throws IOException {
    long start = System.nanoTime();
    List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
    List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
    StoragePageSink storagePageSink = storageManager.createStoragePageSink(transactionId, bucketNumber, columnIds, columnTypes, false);
    List<ShardInfo> shardInfos;
    try {
        shardInfos = compact(storagePageSink, bucketNumber, uuids, columnIds, columnTypes);
    } catch (IOException | RuntimeException e) {
        storagePageSink.rollback();
        throw e;
    }
    updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis());
    return shardInfos;
}
Also used : Type(com.facebook.presto.spi.type.Type) StoragePageSink(com.facebook.presto.raptor.storage.StoragePageSink) IOException(java.io.IOException) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo)

Example 17 with ShardInfo

use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.

the class ShardCompactor method compactSorted.

public List<ShardInfo> compactSorted(long transactionId, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, Map<UUID, Optional<UUID>> uuidsMap, List<ColumnInfo> columns, List<Long> sortColumnIds, List<SortOrder> sortOrders) throws IOException {
    checkArgument(sortColumnIds.size() == sortOrders.size(), "sortColumnIds and sortOrders must be of the same size");
    long start = System.nanoTime();
    List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
    List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
    checkArgument(columnIds.containsAll(sortColumnIds), "sortColumnIds must be a subset of columnIds");
    List<Integer> sortIndexes = sortColumnIds.stream().map(columnIds::indexOf).collect(toList());
    Queue<SortedPageSource> rowSources = new PriorityQueue<>();
    StoragePageSink outputPageSink = storageManager.createStoragePageSink(DEFAULT_RAPTOR_CONTEXT, transactionId, bucketNumber, columnIds, columnTypes, false);
    try {
        uuidsMap.forEach((uuid, deltaUuid) -> {
            ConnectorPageSource pageSource = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, deltaUuid, tableSupportsDeltaDelete, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes);
            SortedPageSource rowSource = new SortedPageSource(pageSource, columnTypes, sortIndexes, sortOrders);
            rowSources.add(rowSource);
        });
        while (!rowSources.isEmpty()) {
            SortedPageSource rowSource = rowSources.poll();
            if (!rowSource.hasNext()) {
                // rowSource is empty, close it
                rowSource.close();
                continue;
            }
            outputPageSink.appendPages(ImmutableList.of(rowSource.next()));
            if (outputPageSink.isFull()) {
                outputPageSink.flush();
            }
            rowSources.add(rowSource);
        }
        outputPageSink.flush();
        List<ShardInfo> shardInfos = getFutureValue(outputPageSink.commit());
        int deltaCount = uuidsMap.values().stream().filter(Optional::isPresent).collect(toSet()).size();
        updateStats(uuidsMap.size(), deltaCount, shardInfos.size(), nanosSince(start).toMillis());
        return shardInfos;
    } catch (IOException | RuntimeException e) {
        outputPageSink.rollback();
        throw e;
    } finally {
        rowSources.forEach(SortedPageSource::closeQuietly);
    }
}
Also used : Optional(java.util.Optional) StoragePageSink(com.facebook.presto.raptor.storage.StoragePageSink) IOException(java.io.IOException) PriorityQueue(java.util.PriorityQueue) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Type(com.facebook.presto.common.type.Type) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo)

Example 18 with ShardInfo

use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.

the class TestOrcStorageManager method testRewriter.

@Test
public void testRewriter() throws Exception {
    OrcStorageManager manager = createOrcStorageManager();
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    long transactionId = TRANSACTION_ID;
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
    // create file with 2 rows
    StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
    List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build();
    sink.appendPages(pages);
    List<ShardInfo> shards = getFutureValue(sink.commit());
    assertEquals(shardRecorder.getShards().size(), 1);
    // delete one row
    BitSet rowsToDelete = new BitSet();
    rowsToDelete.set(0);
    InplaceShardRewriter shardRewriter = (InplaceShardRewriter) manager.createShardRewriter(DEFAULT_RAPTOR_CONTEXT, fileSystem, transactionId, OptionalInt.empty(), shards.get(0).getShardUuid(), 2, Optional.empty(), false, IntStream.range(0, columnIds.size()).boxed().collect(Collectors.toMap(index -> String.valueOf(columnIds.get(index)), columnTypes::get)));
    Collection<Slice> fragments = shardRewriter.rewriteShard(rowsToDelete);
    Slice shardDelta = Iterables.getOnlyElement(fragments);
    ShardDelta shardDeltas = jsonCodec(ShardDelta.class).fromJson(shardDelta.getBytes());
    ShardInfo shardInfo = Iterables.getOnlyElement(shardDeltas.getNewShards());
    // check that output file has one row
    assertEquals(shardInfo.getRowCount(), 1);
    // check that storage file is same as backup file
    File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString());
    File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid());
    assertFileEquals(storageFile, backupFile);
    // verify recorded shard
    List<RecordedShard> recordedShards = shardRecorder.getShards();
    assertEquals(recordedShards.size(), 2);
    assertEquals(recordedShards.get(1).getTransactionId(), TRANSACTION_ID);
    assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid());
}
Also used : Page(com.facebook.presto.common.Page) ShardDeleteDelta(com.facebook.presto.raptor.metadata.ShardDeleteDelta) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) MaterializedResult.resultBuilder(com.facebook.presto.testing.MaterializedResult.resultBuilder) Test(org.testng.annotations.Test) OrcTestingUtil.octets(com.facebook.presto.raptor.storage.OrcTestingUtil.octets) AfterMethod(org.testng.annotations.AfterMethod) MoreFutures.getFutureValue(com.facebook.airlift.concurrent.MoreFutures.getFutureValue) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) Configuration(org.apache.hadoop.conf.Configuration) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) FileAssert.assertFile(org.testng.FileAssert.assertFile) OrcDataSource(com.facebook.presto.orc.OrcDataSource) Assert.assertFalse(org.testng.Assert.assertFalse) Assert.assertNotEquals(org.testng.Assert.assertNotEquals) FileBackupStore(com.facebook.presto.raptor.backup.FileBackupStore) NullableValue(com.facebook.presto.common.predicate.NullableValue) JsonCodec.jsonCodec(com.facebook.airlift.json.JsonCodec.jsonCodec) SchemaDaoUtil.createTablesWithRetry(com.facebook.presto.raptor.metadata.SchemaDaoUtil.createTablesWithRetry) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) ISOChronology(org.joda.time.chrono.ISOChronology) RowPagesBuilder.rowPagesBuilder(com.facebook.presto.RowPagesBuilder.rowPagesBuilder) Iterables(com.google.common.collect.Iterables) ColumnStats(com.facebook.presto.raptor.metadata.ColumnStats) Slice(io.airlift.slice.Slice) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) RaptorLocalFileSystem(com.facebook.presto.raptor.filesystem.RaptorLocalFileSystem) DATE(com.facebook.presto.common.type.DateType.DATE) OptionalLong(java.util.OptionalLong) SqlDate(com.facebook.presto.common.type.SqlDate) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DBI(org.skife.jdbi.v2.DBI) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) RaptorColumnHandle(com.facebook.presto.raptor.RaptorColumnHandle) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) SNAPPY(com.facebook.presto.orc.metadata.CompressionKind.SNAPPY) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) UTC_KEY(com.facebook.presto.common.type.TimeZoneKey.UTC_KEY) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) DEFAULT_RAPTOR_CONTEXT(com.facebook.presto.raptor.filesystem.FileSystemUtil.DEFAULT_RAPTOR_CONTEXT) ShardDelta(com.facebook.presto.raptor.metadata.ShardDelta) IDBI(org.skife.jdbi.v2.IDBI) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Duration(io.airlift.units.Duration) MaterializedResult.materializeSourceDataStream(com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Days(org.joda.time.Days) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) TIME(com.facebook.presto.common.type.TimeType.TIME) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) Files.createTempDir(com.google.common.io.Files.createTempDir) NodeManager(com.facebook.presto.spi.NodeManager) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) BeforeMethod(org.testng.annotations.BeforeMethod) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) TestingNodeManager(com.facebook.presto.testing.TestingNodeManager) Collectors(java.util.stream.Collectors) ENABLED_AND_VALIDATED(com.facebook.presto.raptor.storage.StorageManagerConfig.OrcOptimizedWriterStage.ENABLED_AND_VALIDATED) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) Optional(java.util.Optional) SqlTime(com.facebook.presto.common.type.SqlTime) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) BackupManager(com.facebook.presto.raptor.backup.BackupManager) IntStream(java.util.stream.IntStream) FileSystemUtil.xxhash64(com.facebook.presto.raptor.filesystem.FileSystemUtil.xxhash64) Assert.assertEquals(com.facebook.presto.testing.assertions.Assert.assertEquals) NANOSECONDS(java.util.concurrent.TimeUnit.NANOSECONDS) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) BackupStore(com.facebook.presto.raptor.backup.BackupStore) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) Hashing.md5(com.google.common.hash.Hashing.md5) Files.hash(com.google.common.io.Files.hash) RecordedShard(com.facebook.presto.raptor.storage.InMemoryShardRecorder.RecordedShard) ImmutableList(com.google.common.collect.ImmutableList) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) FileAssert.assertDirectory(org.testng.FileAssert.assertDirectory) Type(com.facebook.presto.common.type.Type) TestDatabaseShardManager.createShardManager(com.facebook.presto.raptor.metadata.TestDatabaseShardManager.createShardManager) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo) ShardRecorder(com.facebook.presto.raptor.metadata.ShardRecorder) TimeUnit(java.util.concurrent.TimeUnit) OrcTestingUtil.createReader(com.facebook.presto.raptor.storage.OrcTestingUtil.createReader) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Handle(org.skife.jdbi.v2.Handle) Assert.assertTrue(org.testng.Assert.assertTrue) LocalFileStorageService(com.facebook.presto.raptor.filesystem.LocalFileStorageService) BitSet(java.util.BitSet) Block(com.facebook.presto.common.block.Block) BYTE(io.airlift.units.DataSize.Unit.BYTE) BitSet(java.util.BitSet) Page(com.facebook.presto.common.Page) RecordedShard(com.facebook.presto.raptor.storage.InMemoryShardRecorder.RecordedShard) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) Type(com.facebook.presto.common.type.Type) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) RaptorLocalFileSystem(com.facebook.presto.raptor.filesystem.RaptorLocalFileSystem) OptionalLong(java.util.OptionalLong) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) ShardDelta(com.facebook.presto.raptor.metadata.ShardDelta) FileAssert.assertFile(org.testng.FileAssert.assertFile) File(java.io.File) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo) Test(org.testng.annotations.Test)

Example 19 with ShardInfo

use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.

the class TestOrcStorageManager method deltaDelete.

private Collection<Slice> deltaDelete(BitSet rowsToDelete, boolean oldDeltaDeleteExist) {
    OrcStorageManager manager = createOrcStorageManager();
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
    // create file with 3 rows
    StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
    List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").row(456L, "test").build();
    sink.appendPages(pages);
    List<ShardInfo> shards = getFutureValue(sink.commit());
    assertEquals(shardRecorder.getShards().size(), 1);
    List<ShardInfo> oldDeltaDeleteShards = null;
    if (oldDeltaDeleteExist) {
        // create oldDeltaDeleteExist with 1 row
        List<Long> deltaColumnIds = ImmutableList.of(0L);
        List<Type> deltaColumnTypes = ImmutableList.of(BIGINT);
        StoragePageSink deltaSink = createStoragePageSink(manager, deltaColumnIds, deltaColumnTypes);
        List<Page> deltaPages = rowPagesBuilder(deltaColumnTypes).row(2L).build();
        deltaSink.appendPages(deltaPages);
        oldDeltaDeleteShards = getFutureValue(deltaSink.commit());
    }
    // delta delete
    DeltaShardRewriter shardRewriter = (DeltaShardRewriter) manager.createShardRewriter(DEFAULT_RAPTOR_CONTEXT, fileSystem, TRANSACTION_ID, OptionalInt.empty(), shards.get(0).getShardUuid(), 3, oldDeltaDeleteExist ? Optional.of(oldDeltaDeleteShards.get(0).getShardUuid()) : Optional.empty(), true, null);
    Collection<Slice> fragments = shardRewriter.writeDeltaDeleteFile(rowsToDelete);
    return fragments;
}
Also used : Page(com.facebook.presto.common.Page) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) Type(com.facebook.presto.common.type.Type) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) RaptorLocalFileSystem(com.facebook.presto.raptor.filesystem.RaptorLocalFileSystem) OptionalLong(java.util.OptionalLong) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo)

Example 20 with ShardInfo

use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.

the class TestShardEjector method testEjector.

@Test(invocationCount = 20)
public void testEjector() throws Exception {
    NodeManager nodeManager = createNodeManager("node1", "node2", "node3", "node4", "node5");
    ShardEjector ejector = new ShardEjector(nodeManager.getCurrentNode().getNodeIdentifier(), nodeManager::getWorkerNodes, shardManager, storageService, new Duration(1, HOURS), Optional.of(new TestingBackupStore()), new LocalOrcDataEnvironment(), "test");
    List<ShardInfo> shards = ImmutableList.<ShardInfo>builder().add(shardInfo("node1", 14)).add(shardInfo("node1", 13)).add(shardInfo("node1", 12)).add(shardInfo("node1", 11)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node2", 5)).add(shardInfo("node2", 5)).add(shardInfo("node3", 10)).add(shardInfo("node4", 10)).add(shardInfo("node5", 10)).add(shardInfo("node6", 200)).build();
    long tableId = createTable("test");
    List<ColumnInfo> columns = ImmutableList.of(new ColumnInfo(1, BIGINT));
    shardManager.createTable(tableId, columns, false, OptionalLong.empty(), false);
    long transactionId = shardManager.beginTransaction();
    shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0);
    for (ShardInfo shard : shards.subList(0, 8)) {
        File file = new File(storageService.getStorageFile(shard.getShardUuid()).toString());
        storageService.createParents(new Path(file.toURI()));
        assertTrue(file.createNewFile());
    }
    ejector.process();
    shardManager.getShardNodes(tableId, TupleDomain.all(), false);
    Set<UUID> ejectedShards = shards.subList(0, 4).stream().map(ShardInfo::getShardUuid).collect(toSet());
    Set<UUID> keptShards = shards.subList(4, 8).stream().map(ShardInfo::getShardUuid).collect(toSet());
    Set<UUID> remaining = uuids(shardManager.getNodeShardsAndDeltas("node1"));
    for (UUID uuid : ejectedShards) {
        assertFalse(remaining.contains(uuid));
        assertFalse(new File(storageService.getStorageFile(uuid).toString()).exists());
    }
    assertEquals(remaining, keptShards);
    for (UUID uuid : keptShards) {
        assertTrue(new File(storageService.getStorageFile(uuid).toString()).exists());
    }
    Set<UUID> others = ImmutableSet.<UUID>builder().addAll(uuids(shardManager.getNodeShardsAndDeltas("node2"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node3"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node4"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node5"))).build();
    assertTrue(others.containsAll(ejectedShards));
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnInfo(com.facebook.presto.raptor.metadata.ColumnInfo) Duration(io.airlift.units.Duration) NodeManager(com.facebook.presto.spi.NodeManager) TestingNodeManager(com.facebook.presto.testing.TestingNodeManager) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) File(java.io.File) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo) Test(org.testng.annotations.Test)

Aggregations

ShardInfo (com.facebook.presto.raptor.metadata.ShardInfo)25 UUID (java.util.UUID)12 Type (com.facebook.presto.common.type.Type)11 Test (org.testng.annotations.Test)11 VarcharType.createVarcharType (com.facebook.presto.common.type.VarcharType.createVarcharType)8 Optional (java.util.Optional)7 Page (com.facebook.presto.common.Page)6 LocalOrcDataEnvironment (com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment)6 Slice (io.airlift.slice.Slice)6 File (java.io.File)6 RaptorLocalFileSystem (com.facebook.presto.raptor.filesystem.RaptorLocalFileSystem)5 StoragePageSink (com.facebook.presto.raptor.storage.StoragePageSink)5 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)5 ShardDeleteDelta (com.facebook.presto.raptor.metadata.ShardDeleteDelta)4 ShardDelta (com.facebook.presto.raptor.metadata.ShardDelta)4 RecordedShard (com.facebook.presto.raptor.storage.InMemoryShardRecorder.RecordedShard)4 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)4 IOException (java.io.IOException)4 OptionalLong (java.util.OptionalLong)4 FileSystem (org.apache.hadoop.fs.FileSystem)4