use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcFileRewriter method testRewriterDropThenAddDifferentColumns.
/**
* The following test add or drop different columns
*/
@Test
public void testRewriterDropThenAddDifferentColumns() throws Exception {
FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
DBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
dbi.registerMapper(new TableColumn.Mapper(functionAndTypeManager));
Handle dummyHandle = dbi.open();
File dataDir = Files.createTempDir();
StorageManager storageManager = createOrcStorageManager(dbi, dataDir);
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
File file = new File(temporary, randomUUID().toString());
try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
List<Page> pages = rowPagesBuilder(columnTypes).row(1L, "1").row(2L, "2").row(3L, "3").row(4L, "4").build();
writer.appendPages(pages);
}
// Add a column
File newFile1 = new File(temporary, randomUUID().toString());
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(3L, 7L, 10L), ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE)), path(file), path(newFile1), new BitSet(5));
assertEquals(info.getRowCount(), 4);
assertEquals(readAllBytes(file.toPath()), readAllBytes(newFile1.toPath()));
// Drop a column
File newFile2 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L), ImmutableList.of(createVarcharType(20), DOUBLE)), path(newFile1), path(newFile2), new BitSet(5));
assertEquals(info.getRowCount(), 4);
// Optimized writer will keep the only column
OrcReader orcReader = new OrcReader(fileOrcDataSource(newFile2), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), OrcTestingUtil.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
orcReader.getColumnNames().equals(ImmutableList.of("7"));
// Add a column with the different ID with different type
File newFile3 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L, 13L), ImmutableList.of(createVarcharType(20), DOUBLE, createVarcharType(5))), path(newFile2), path(newFile3), new BitSet(5));
assertEquals(info.getRowCount(), 4);
assertEquals(readAllBytes(newFile2.toPath()), readAllBytes(newFile3.toPath()));
// Get prepared for the final file; make sure it is accessible from storage manager
UUID uuid = randomUUID();
File newFile4 = getFileSystemPath(new File(dataDir, "data/storage"), uuid);
// Optimized ORC writer does not create the file itself
newFile4.getParentFile().mkdirs();
newFile4.createNewFile();
// Drop a column and add a column; also delete 3 rows
BitSet rowsToDelete = new BitSet(5);
rowsToDelete.set(0);
rowsToDelete.set(1);
rowsToDelete.set(3);
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 13L, 18L), ImmutableList.of(createVarcharType(20), createVarcharType(5), INTEGER)), path(newFile3), path(newFile4), rowsToDelete);
assertEquals(info.getRowCount(), 1);
ConnectorPageSource source = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), TupleDomain.all(), READER_ATTRIBUTES);
Page page = null;
while (page == null) {
page = source.getNextPage();
}
assertEquals(page.getPositionCount(), 1);
// Column 13L
Block column0 = page.getBlock(0);
assertTrue(column0.isNull(0));
// Column 7L
Block column1 = page.getBlock(1);
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("3"));
// Column 8L
Block column2 = page.getBlock(2);
assertTrue(column2.isNull(0));
// Remove all the columns
File newFile5 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(13L, 18L), ImmutableList.of(createVarcharType(5), INTEGER)), path(newFile4), path(newFile5), new BitSet(5));
// Optimized writer will drop the file
assertEquals(info.getRowCount(), 0);
assertFalse(newFile5.exists());
dummyHandle.close();
deleteRecursively(dataDir.toPath(), ALLOW_INSECURE);
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcStorageManager method testRewriter.
@Test
public void testRewriter() throws Exception {
OrcStorageManager manager = createOrcStorageManager();
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
long transactionId = TRANSACTION_ID;
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
// create file with 2 rows
StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build();
sink.appendPages(pages);
List<ShardInfo> shards = getFutureValue(sink.commit());
assertEquals(shardRecorder.getShards().size(), 1);
// delete one row
BitSet rowsToDelete = new BitSet();
rowsToDelete.set(0);
InplaceShardRewriter shardRewriter = (InplaceShardRewriter) manager.createShardRewriter(DEFAULT_RAPTOR_CONTEXT, fileSystem, transactionId, OptionalInt.empty(), shards.get(0).getShardUuid(), 2, Optional.empty(), false, IntStream.range(0, columnIds.size()).boxed().collect(Collectors.toMap(index -> String.valueOf(columnIds.get(index)), columnTypes::get)));
Collection<Slice> fragments = shardRewriter.rewriteShard(rowsToDelete);
Slice shardDelta = Iterables.getOnlyElement(fragments);
ShardDelta shardDeltas = jsonCodec(ShardDelta.class).fromJson(shardDelta.getBytes());
ShardInfo shardInfo = Iterables.getOnlyElement(shardDeltas.getNewShards());
// check that output file has one row
assertEquals(shardInfo.getRowCount(), 1);
// check that storage file is same as backup file
File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString());
File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid());
assertFileEquals(storageFile, backupFile);
// verify recorded shard
List<RecordedShard> recordedShards = shardRecorder.getShards();
assertEquals(recordedShards.size(), 2);
assertEquals(recordedShards.get(1).getTransactionId(), TRANSACTION_ID);
assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid());
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcStorageManager method deltaDelete.
private Collection<Slice> deltaDelete(BitSet rowsToDelete, boolean oldDeltaDeleteExist) {
OrcStorageManager manager = createOrcStorageManager();
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
// create file with 3 rows
StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").row(456L, "test").build();
sink.appendPages(pages);
List<ShardInfo> shards = getFutureValue(sink.commit());
assertEquals(shardRecorder.getShards().size(), 1);
List<ShardInfo> oldDeltaDeleteShards = null;
if (oldDeltaDeleteExist) {
// create oldDeltaDeleteExist with 1 row
List<Long> deltaColumnIds = ImmutableList.of(0L);
List<Type> deltaColumnTypes = ImmutableList.of(BIGINT);
StoragePageSink deltaSink = createStoragePageSink(manager, deltaColumnIds, deltaColumnTypes);
List<Page> deltaPages = rowPagesBuilder(deltaColumnTypes).row(2L).build();
deltaSink.appendPages(deltaPages);
oldDeltaDeleteShards = getFutureValue(deltaSink.commit());
}
// delta delete
DeltaShardRewriter shardRewriter = (DeltaShardRewriter) manager.createShardRewriter(DEFAULT_RAPTOR_CONTEXT, fileSystem, TRANSACTION_ID, OptionalInt.empty(), shards.get(0).getShardUuid(), 3, oldDeltaDeleteExist ? Optional.of(oldDeltaDeleteShards.get(0).getShardUuid()) : Optional.empty(), true, null);
Collection<Slice> fragments = shardRewriter.writeDeltaDeleteFile(rowsToDelete);
return fragments;
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestShardEjector method testEjector.
@Test(invocationCount = 20)
public void testEjector() throws Exception {
NodeManager nodeManager = createNodeManager("node1", "node2", "node3", "node4", "node5");
ShardEjector ejector = new ShardEjector(nodeManager.getCurrentNode().getNodeIdentifier(), nodeManager::getWorkerNodes, shardManager, storageService, new Duration(1, HOURS), Optional.of(new TestingBackupStore()), new LocalOrcDataEnvironment(), "test");
List<ShardInfo> shards = ImmutableList.<ShardInfo>builder().add(shardInfo("node1", 14)).add(shardInfo("node1", 13)).add(shardInfo("node1", 12)).add(shardInfo("node1", 11)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node2", 5)).add(shardInfo("node2", 5)).add(shardInfo("node3", 10)).add(shardInfo("node4", 10)).add(shardInfo("node5", 10)).add(shardInfo("node6", 200)).build();
long tableId = createTable("test");
List<ColumnInfo> columns = ImmutableList.of(new ColumnInfo(1, BIGINT));
shardManager.createTable(tableId, columns, false, OptionalLong.empty(), false);
long transactionId = shardManager.beginTransaction();
shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0);
for (ShardInfo shard : shards.subList(0, 8)) {
File file = new File(storageService.getStorageFile(shard.getShardUuid()).toString());
storageService.createParents(new Path(file.toURI()));
assertTrue(file.createNewFile());
}
ejector.process();
shardManager.getShardNodes(tableId, TupleDomain.all(), false);
Set<UUID> ejectedShards = shards.subList(0, 4).stream().map(ShardInfo::getShardUuid).collect(toSet());
Set<UUID> keptShards = shards.subList(4, 8).stream().map(ShardInfo::getShardUuid).collect(toSet());
Set<UUID> remaining = uuids(shardManager.getNodeShardsAndDeltas("node1"));
for (UUID uuid : ejectedShards) {
assertFalse(remaining.contains(uuid));
assertFalse(new File(storageService.getStorageFile(uuid).toString()).exists());
}
assertEquals(remaining, keptShards);
for (UUID uuid : keptShards) {
assertTrue(new File(storageService.getStorageFile(uuid).toString()).exists());
}
Set<UUID> others = ImmutableSet.<UUID>builder().addAll(uuids(shardManager.getNodeShardsAndDeltas("node2"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node3"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node4"))).addAll(uuids(shardManager.getNodeShardsAndDeltas("node5"))).build();
assertTrue(others.containsAll(ejectedShards));
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcStorageManager method testWriteDeltaDeleteMerge.
@Test
public // rowsToDelete and rowsDeleted must be mutually exclusive
void testWriteDeltaDeleteMerge() throws Exception {
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
BitSet rowsToDelete = new BitSet();
rowsToDelete.set(0);
Collection<Slice> fragments = deltaDelete(rowsToDelete, true);
Slice shardDelta = Iterables.getOnlyElement(fragments);
ShardDeleteDelta shardDeltas = jsonCodec(ShardDeleteDelta.class).fromJson(shardDelta.getBytes());
ShardInfo shardInfo = shardDeltas.getDeltaInfoPair().getNewDeltaDeleteShard().get();
// Check that output file (new delta file) has merged 2 rows
assertEquals(shardInfo.getRowCount(), 2);
assertTrue(checkContent(fileSystem, shardInfo.getShardUuid(), rowsToDelete));
// Check that storage file is same as backup file
File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString());
File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid());
assertFileEquals(storageFile, backupFile);
// Verify recorded shard
List<RecordedShard> recordedShards = shardRecorder.getShards();
// original file + old delta + new delta
assertEquals(recordedShards.size(), 3);
assertEquals(recordedShards.get(2).getTransactionId(), TRANSACTION_ID);
assertEquals(recordedShards.get(2).getShardUuid(), shardInfo.getShardUuid());
}
Aggregations