use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.
the class ShardCompactor method compact.
public List<ShardInfo> compact(long transactionId, OptionalInt bucketNumber, Set<UUID> uuids, List<ColumnInfo> columns) throws IOException {
long start = System.nanoTime();
List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
StoragePageSink storagePageSink = storageManager.createStoragePageSink(transactionId, bucketNumber, columnIds, columnTypes, false);
List<ShardInfo> shardInfos;
try {
shardInfos = compact(storagePageSink, bucketNumber, uuids, columnIds, columnTypes);
} catch (IOException | RuntimeException e) {
storagePageSink.rollback();
throw e;
}
updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis());
return shardInfos;
}
use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.
the class ShardCompactor method compactSorted.
public List<ShardInfo> compactSorted(long transactionId, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, Map<UUID, Optional<UUID>> uuidsMap, List<ColumnInfo> columns, List<Long> sortColumnIds, List<SortOrder> sortOrders) throws IOException {
checkArgument(sortColumnIds.size() == sortOrders.size(), "sortColumnIds and sortOrders must be of the same size");
long start = System.nanoTime();
List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
checkArgument(columnIds.containsAll(sortColumnIds), "sortColumnIds must be a subset of columnIds");
List<Integer> sortIndexes = sortColumnIds.stream().map(columnIds::indexOf).collect(toList());
Queue<SortedPageSource> rowSources = new PriorityQueue<>();
StoragePageSink outputPageSink = storageManager.createStoragePageSink(DEFAULT_RAPTOR_CONTEXT, transactionId, bucketNumber, columnIds, columnTypes, false);
try {
uuidsMap.forEach((uuid, deltaUuid) -> {
ConnectorPageSource pageSource = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, deltaUuid, tableSupportsDeltaDelete, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes);
SortedPageSource rowSource = new SortedPageSource(pageSource, columnTypes, sortIndexes, sortOrders);
rowSources.add(rowSource);
});
while (!rowSources.isEmpty()) {
SortedPageSource rowSource = rowSources.poll();
if (!rowSource.hasNext()) {
// rowSource is empty, close it
rowSource.close();
continue;
}
outputPageSink.appendPages(ImmutableList.of(rowSource.next()));
if (outputPageSink.isFull()) {
outputPageSink.flush();
}
rowSources.add(rowSource);
}
outputPageSink.flush();
List<ShardInfo> shardInfos = getFutureValue(outputPageSink.commit());
int deltaCount = uuidsMap.values().stream().filter(Optional::isPresent).collect(toSet()).size();
updateStats(uuidsMap.size(), deltaCount, shardInfos.size(), nanosSince(start).toMillis());
return shardInfos;
} catch (IOException | RuntimeException e) {
outputPageSink.rollback();
throw e;
} finally {
rowSources.forEach(SortedPageSource::closeQuietly);
}
}
use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.
the class TestOrcStorageManager method testRewriter.
@Test
public void testRewriter() throws Exception {
OrcStorageManager manager = createOrcStorageManager();
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
long transactionId = TRANSACTION_ID;
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
// create file with 2 rows
StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build();
sink.appendPages(pages);
List<ShardInfo> shards = getFutureValue(sink.commit());
assertEquals(shardRecorder.getShards().size(), 1);
// delete one row
BitSet rowsToDelete = new BitSet();
rowsToDelete.set(0);
InplaceShardRewriter shardRewriter = (InplaceShardRewriter) manager.createShardRewriter(DEFAULT_RAPTOR_CONTEXT, fileSystem, transactionId, OptionalInt.empty(), shards.get(0).getShardUuid(), 2, Optional.empty(), false, IntStream.range(0, columnIds.size()).boxed().collect(Collectors.toMap(index -> String.valueOf(columnIds.get(index)), columnTypes::get)));
Collection<Slice> fragments = shardRewriter.rewriteShard(rowsToDelete);
Slice shardDelta = Iterables.getOnlyElement(fragments);
ShardDelta shardDeltas = jsonCodec(ShardDelta.class).fromJson(shardDelta.getBytes());
ShardInfo shardInfo = Iterables.getOnlyElement(shardDeltas.getNewShards());
// check that output file has one row
assertEquals(shardInfo.getRowCount(), 1);
// check that storage file is same as backup file
File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString());
File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid());
assertFileEquals(storageFile, backupFile);
// verify recorded shard
List<RecordedShard> recordedShards = shardRecorder.getShards();
assertEquals(recordedShards.size(), 2);
assertEquals(recordedShards.get(1).getTransactionId(), TRANSACTION_ID);
assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid());
}
use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.
the class TestOrcStorageManager method deltaDelete.
private Collection<Slice> deltaDelete(BitSet rowsToDelete, boolean oldDeltaDeleteExist) {
OrcStorageManager manager = createOrcStorageManager();
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
// create file with 3 rows
StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").row(456L, "test").build();
sink.appendPages(pages);
List<ShardInfo> shards = getFutureValue(sink.commit());
assertEquals(shardRecorder.getShards().size(), 1);
List<ShardInfo> oldDeltaDeleteShards = null;
if (oldDeltaDeleteExist) {
// create oldDeltaDeleteExist with 1 row
List<Long> deltaColumnIds = ImmutableList.of(0L);
List<Type> deltaColumnTypes = ImmutableList.of(BIGINT);
StoragePageSink deltaSink = createStoragePageSink(manager, deltaColumnIds, deltaColumnTypes);
List<Page> deltaPages = rowPagesBuilder(deltaColumnTypes).row(2L).build();
deltaSink.appendPages(deltaPages);
oldDeltaDeleteShards = getFutureValue(deltaSink.commit());
}
// delta delete
DeltaShardRewriter shardRewriter = (DeltaShardRewriter) manager.createShardRewriter(DEFAULT_RAPTOR_CONTEXT, fileSystem, TRANSACTION_ID, OptionalInt.empty(), shards.get(0).getShardUuid(), 3, oldDeltaDeleteExist ? Optional.of(oldDeltaDeleteShards.get(0).getShardUuid()) : Optional.empty(), true, null);
Collection<Slice> fragments = shardRewriter.writeDeltaDeleteFile(rowsToDelete);
return fragments;
}
use of com.facebook.presto.raptor.metadata.ShardInfo in project presto by prestodb.
the class TestShardEjector method testEjector.
@Test(invocationCount = 20)
public void testEjector() throws Exception {
NodeManager nodeManager = createNodeManager("node1", "node2", "node3", "node4", "node5");
ShardEjector ejector = new ShardEjector(nodeManager.getCurrentNode().getNodeIdentifier(), nodeManager::getWorkerNodes, shardManager, storageService, new Duration(1, HOURS), Optional.of(new TestingBackupStore()), new LocalOrcDataEnvironment(), "test");
List<ShardInfo> shards = ImmutableList.<ShardInfo>builder().add(shardInfo("node1", 14)).add(shardInfo("node1", 13)).add(shardInfo("node1", 12)).add(shardInfo("node1", 11)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node2", 5)).add(shardInfo("node2", 5)).add(shardInfo("node3", 10)).add(shardInfo("node4", 10)).add(shardInfo("node5", 10)).add(shardInfo("node6", 200)).build();
long tableId = createTable("test");
List<ColumnInfo> columns = ImmutableList.of(new ColumnInfo(1, BIGINT));
shardManager.createTable(tableId, columns, false, OptionalLong.empty(), false);
long transactionId = shardManager.beginTransaction();
shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0);
for (ShardInfo shard : shards.subList(0, 8)) {
File file = new File(storageService.getStorageFile(shard.getShardUuid()).toString());
storageService.createParents(new Path(file.toURI()));
assertTrue(file.createNewFile());
}
ejector.process();
shardManager.getShardNodes(tableId, TupleDomain.all(), false);
Set<UUID> ejectedShards = shards.subList(0, 4).stream().map(ShardInfo::getShardUuid).collect(toSet());
Set<UUID> keptShards = shards.subList(4, 8).stream().map(ShardInfo::getShardUuid).collect(toSet());
Set<UUID> remaining = uuids(shardManager.getNodeShardsAndDeltas("node1"));
for (UUID uuid : ejectedShards) {
assertFalse(remaining.contains(uuid));
assertFalse(new File(storageService.getStorageFile(uuid).toString()).exists());
}
assertEquals(remaining, keptShards);
for (UUID uuid : keptShards) {
assertTrue(new File(storageService.getStorageFile(uuid).toString()).exists());
}
Set<UUID> others = ImmutableSet.<UUID>builder().addAll(uuids(shardManager.getNodeShardsAndDeltas("node2"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node3"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node4"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node5"))).build();
assertTrue(others.containsAll(ejectedShards));
}
Aggregations