use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcFileRewriter method testRewrite.
@Test
public void testRewrite() throws Exception {
FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
ArrayType arrayType = new ArrayType(BIGINT);
ArrayType arrayOfArrayType = new ArrayType(arrayType);
Type mapType = functionAndTypeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(createVarcharType(5).getTypeSignature()), TypeSignatureParameter.of(BOOLEAN.getTypeSignature())));
List<Long> columnIds = ImmutableList.of(3L, 7L, 9L, 10L, 11L, 12L);
DecimalType decimalType = DecimalType.createDecimalType(4, 4);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), arrayType, mapType, arrayOfArrayType, decimalType);
File file = new File(temporary, randomUUID().toString());
try (FileWriter writer = OrcTestingUtil.createFileWriter(columnIds, columnTypes, file)) {
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5)), new BigDecimal("2.3")).row(777L, "sky", arrayBlockOf(BIGINT, 3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6)), new BigDecimal("2.3")).row(456L, "bye", arrayBlockOf(BIGINT, 5, 6), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7)), new BigDecimal("2.3")).row(888L, "world", arrayBlockOf(BIGINT, 7, 8), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null), new BigDecimal("2.3")).row(999L, "done", arrayBlockOf(BIGINT, 9, 10), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10)), new BigDecimal("2.3")).build();
writer.appendPages(pages);
}
try (OrcDataSource dataSource = fileOrcDataSource(file)) {
OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 5);
assertEquals(reader.getFileRowCount(), 5);
assertEquals(reader.getSplitLength(), file.length());
assertEquals(reader.nextBatch(), 5);
Block column0 = reader.readBlock(0);
assertEquals(column0.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column0.isNull(i), false);
}
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 777L);
assertEquals(BIGINT.getLong(column0, 2), 456L);
assertEquals(BIGINT.getLong(column0, 3), 888L);
assertEquals(BIGINT.getLong(column0, 4), 999L);
Block column1 = reader.readBlock(1);
assertEquals(column1.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column1.isNull(i), false);
}
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("sky"));
assertEquals(createVarcharType(20).getSlice(column1, 2), utf8Slice("bye"));
assertEquals(createVarcharType(20).getSlice(column1, 3), utf8Slice("world"));
assertEquals(createVarcharType(20).getSlice(column1, 4), utf8Slice("done"));
Block column2 = reader.readBlock(2);
assertEquals(column2.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column2.isNull(i), false);
}
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 3, 4)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 2), arrayBlockOf(BIGINT, 5, 6)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 3), arrayBlockOf(BIGINT, 7, 8)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 4), arrayBlockOf(BIGINT, 9, 10)));
Block column3 = reader.readBlock(3);
assertEquals(column3.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column3.isNull(i), false);
}
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", false)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 3), mapBlockOf(createVarcharType(5), BOOLEAN, "k4", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 4), mapBlockOf(createVarcharType(5), BOOLEAN, "k5", true)));
Block column4 = reader.readBlock(4);
assertEquals(column4.getPositionCount(), 5);
for (int i = 0; i < 5; i++) {
assertEquals(column4.isNull(i), false);
}
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 6))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 3), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 8), null)));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 4), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 9, 10))));
assertEquals(reader.nextBatch(), -1);
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).put(12L, decimalType.getTypeSignature()).build()));
}
BitSet rowsToDelete = new BitSet(5);
rowsToDelete.set(1);
rowsToDelete.set(3);
rowsToDelete.set(4);
File newFile = new File(temporary, randomUUID().toString());
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(columnIds, columnTypes), path(file), path(newFile), rowsToDelete);
assertEquals(info.getRowCount(), 2);
assertBetweenInclusive(info.getUncompressedSize(), 94L, 118L * 2);
try (OrcDataSource dataSource = fileOrcDataSource(newFile)) {
OrcBatchRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 2);
assertEquals(reader.getFileRowCount(), 2);
assertEquals(reader.getSplitLength(), newFile.length());
assertEquals(reader.nextBatch(), 2);
Block column0 = reader.readBlock(0);
assertEquals(column0.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column0.isNull(i), false);
}
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 456L);
Block column1 = reader.readBlock(1);
assertEquals(column1.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column1.isNull(i), false);
}
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(20).getSlice(column1, 1), utf8Slice("bye"));
Block column2 = reader.readBlock(2);
assertEquals(column2.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column2.isNull(i), false);
}
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column2, 1), arrayBlockOf(BIGINT, 5, 6)));
Block column3 = reader.readBlock(3);
assertEquals(column3.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column3.isNull(i), false);
}
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column3, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", true)));
Block column4 = reader.readBlock(4);
assertEquals(column4.getPositionCount(), 2);
for (int i = 0; i < 2; i++) {
assertEquals(column4.isNull(i), false);
}
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column4, 1), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 7))));
assertEquals(reader.nextBatch(), -1);
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(3L, BIGINT.getTypeSignature()).put(7L, createVarcharType(20).getTypeSignature()).put(9L, arrayType.getTypeSignature()).put(10L, mapType.getTypeSignature()).put(11L, arrayOfArrayType.getTypeSignature()).put(12L, decimalType.getTypeSignature()).build()));
}
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcStorageManager method createOrcStorageManager.
public static OrcStorageManager createOrcStorageManager(IDBI dbi, File temporary, int maxShardRows) {
URI directory = new File(temporary, "data").toURI();
StorageService storageService = new LocalFileStorageService(new LocalOrcDataEnvironment(), directory);
storageService.start();
File backupDirectory = new File(temporary, "backup");
FileBackupStore fileBackupStore = new FileBackupStore(backupDirectory);
fileBackupStore.start();
Optional<BackupStore> backupStore = Optional.of(fileBackupStore);
ShardManager shardManager = createShardManager(dbi);
ShardRecoveryManager recoveryManager = new ShardRecoveryManager(storageService, backupStore, new LocalOrcDataEnvironment(), new TestingNodeManager(), shardManager, MISSING_SHARD_DISCOVERY, 10);
return createOrcStorageManager(storageService, backupStore, recoveryManager, new InMemoryShardRecorder(), maxShardRows, MAX_FILE_SIZE);
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestOrcStorageManager method testWriteDeltaDelete.
@Test
public void testWriteDeltaDelete() throws Exception {
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
// delete one row
BitSet rowsToDelete = new BitSet();
rowsToDelete.set(0);
Collection<Slice> fragments = deltaDelete(rowsToDelete, false);
Slice shardDelta = Iterables.getOnlyElement(fragments);
ShardDeleteDelta shardDeltas = jsonCodec(ShardDeleteDelta.class).fromJson(shardDelta.getBytes());
ShardInfo shardInfo = shardDeltas.getDeltaInfoPair().getNewDeltaDeleteShard().get();
// Check that output file (new delta file) has one row
assertEquals(shardInfo.getRowCount(), 1);
assertTrue(checkContent(fileSystem, shardInfo.getShardUuid(), rowsToDelete));
// Check that storage file is same as backup file
File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString());
File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid());
assertFileEquals(storageFile, backupFile);
// Verify recorded shard
List<RecordedShard> recordedShards = shardRecorder.getShards();
// original file + delta file
assertEquals(recordedShards.size(), 2);
assertEquals(recordedShards.get(1).getTransactionId(), TRANSACTION_ID);
assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid());
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestShardEjector method setup.
@BeforeMethod
public void setup() {
dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
dummyHandle = dbi.open();
createTablesWithRetry(dbi);
shardManager = createShardManager(dbi);
dataDir = createTempDir();
storageService = new LocalFileStorageService(new LocalOrcDataEnvironment(), dataDir.toURI());
storageService.start();
}
use of com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment in project presto by prestodb.
the class TestShardCleaner method setup.
@BeforeMethod
public void setup() {
dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
dummyHandle = dbi.open();
createTablesWithRetry(dbi);
temporary = createTempDir();
File directory = new File(temporary, "data");
storageService = new LocalFileStorageService(new LocalOrcDataEnvironment(), directory.toURI());
storageService.start();
File backupDirectory = new File(temporary, "backup");
backupStore = new FileBackupStore(backupDirectory);
((FileBackupStore) backupStore).start();
ticker = new TestingTicker();
ShardCleanerConfig config = new ShardCleanerConfig();
cleaner = new ShardCleaner(new DaoSupplier<>(dbi, H2ShardDao.class), "node1", true, ticker, storageService, Optional.of(backupStore), new LocalOrcDataEnvironment(), config.getMaxTransactionAge(), config.getTransactionCleanerInterval(), config.getLocalCleanerInterval(), config.getLocalCleanTime(), config.getBackupCleanerInterval(), config.getBackupCleanTime(), config.getBackupDeletionThreads(), config.getMaxCompletedTransactionAge());
}
Aggregations