use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.
the class OrcTestingUtil method createReader.
public static OrcBatchRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException {
OrcReader orcReader = new OrcReader(dataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
List<String> columnNames = orcReader.getColumnNames();
assertEquals(columnNames.size(), columnIds.size());
Map<Integer, Type> includedColumns = new HashMap<>();
int ordinal = 0;
for (long columnId : columnIds) {
assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
includedColumns.put(ordinal, types.get(ordinal));
ordinal++;
}
return createRecordReader(orcReader, includedColumns);
}
use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.
the class TestOrcFileRewriter method testRewriterDropThenAddDifferentColumns.
/**
* The following test add or drop different columns
*/
@Test
public void testRewriterDropThenAddDifferentColumns() throws Exception {
FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
DBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
dbi.registerMapper(new TableColumn.Mapper(functionAndTypeManager));
Handle dummyHandle = dbi.open();
File dataDir = Files.createTempDir();
StorageManager storageManager = createOrcStorageManager(dbi, dataDir);
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
File file = new File(temporary, randomUUID().toString());
try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
List<Page> pages = rowPagesBuilder(columnTypes).row(1L, "1").row(2L, "2").row(3L, "3").row(4L, "4").build();
writer.appendPages(pages);
}
// Add a column
File newFile1 = new File(temporary, randomUUID().toString());
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(3L, 7L, 10L), ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE)), path(file), path(newFile1), new BitSet(5));
assertEquals(info.getRowCount(), 4);
assertEquals(readAllBytes(file.toPath()), readAllBytes(newFile1.toPath()));
// Drop a column
File newFile2 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L), ImmutableList.of(createVarcharType(20), DOUBLE)), path(newFile1), path(newFile2), new BitSet(5));
assertEquals(info.getRowCount(), 4);
// Optimized writer will keep the only column
OrcReader orcReader = new OrcReader(fileOrcDataSource(newFile2), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), OrcTestingUtil.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
orcReader.getColumnNames().equals(ImmutableList.of("7"));
// Add a column with the different ID with different type
File newFile3 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L, 13L), ImmutableList.of(createVarcharType(20), DOUBLE, createVarcharType(5))), path(newFile2), path(newFile3), new BitSet(5));
assertEquals(info.getRowCount(), 4);
assertEquals(readAllBytes(newFile2.toPath()), readAllBytes(newFile3.toPath()));
// Get prepared for the final file; make sure it is accessible from storage manager
UUID uuid = randomUUID();
File newFile4 = getFileSystemPath(new File(dataDir, "data/storage"), uuid);
// Optimized ORC writer does not create the file itself
newFile4.getParentFile().mkdirs();
newFile4.createNewFile();
// Drop a column and add a column; also delete 3 rows
BitSet rowsToDelete = new BitSet(5);
rowsToDelete.set(0);
rowsToDelete.set(1);
rowsToDelete.set(3);
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 13L, 18L), ImmutableList.of(createVarcharType(20), createVarcharType(5), INTEGER)), path(newFile3), path(newFile4), rowsToDelete);
assertEquals(info.getRowCount(), 1);
ConnectorPageSource source = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), TupleDomain.all(), READER_ATTRIBUTES);
Page page = null;
while (page == null) {
page = source.getNextPage();
}
assertEquals(page.getPositionCount(), 1);
// Column 13L
Block column0 = page.getBlock(0);
assertTrue(column0.isNull(0));
// Column 7L
Block column1 = page.getBlock(1);
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("3"));
// Column 8L
Block column2 = page.getBlock(2);
assertTrue(column2.isNull(0));
// Remove all the columns
File newFile5 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(13L, 18L), ImmutableList.of(createVarcharType(5), INTEGER)), path(newFile4), path(newFile5), new BitSet(5));
// Optimized writer will drop the file
assertEquals(info.getRowCount(), 0);
assertFalse(newFile5.exists());
dummyHandle.close();
deleteRecursively(dataDir.toPath(), ALLOW_INSECURE);
}
use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.
the class OrcStorageManager method getRowsFromUuid.
Optional<BitSet> getRowsFromUuid(FileSystem fileSystem, Optional<UUID> deltaShardUuid) {
if (!deltaShardUuid.isPresent()) {
return Optional.empty();
}
try (OrcDataSource dataSource = openShard(fileSystem, deltaShardUuid.get(), defaultReaderAttributes)) {
OrcAggregatedMemoryContext systemMemoryUsage = new RaptorOrcAggregatedMemoryContext();
OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, defaultReaderAttributes.isZstdJniDecompressionEnabled()), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) {
throw new IOException("File has too many rows");
}
try (OrcBatchRecordReader recordReader = reader.createBatchRecordReader(ImmutableMap.of(0, BIGINT), OrcPredicate.TRUE, DEFAULT_STORAGE_TIMEZONE, systemMemoryUsage, INITIAL_BATCH_SIZE)) {
BitSet bitSet = new BitSet();
while (recordReader.nextBatch() > 0) {
Block block = recordReader.readBlock(0);
for (int i = 0; i < block.getPositionCount(); i++) {
bitSet.set(toIntExact(block.getLong(i)));
}
}
return Optional.of(bitSet);
}
} catch (IOException | RuntimeException e) {
throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + deltaShardUuid, e);
}
}
use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.
the class OrcStorageManager method computeShardStats.
private List<ColumnStats> computeShardStats(FileSystem fileSystem, Path file) {
try (OrcDataSource dataSource = orcDataEnvironment.createOrcDataSource(fileSystem, file, defaultReaderAttributes)) {
OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, stripeMetadataSourceFactory, new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, defaultReaderAttributes.isZstdJniDecompressionEnabled()), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
ImmutableList.Builder<ColumnStats> list = ImmutableList.builder();
for (ColumnInfo info : getColumnInfo(reader)) {
computeColumnStats(reader, info.getColumnId(), info.getType(), typeManager).ifPresent(list::add);
}
return list.build();
} catch (IOException e) {
throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + file, e);
}
}
use of com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext in project presto by prestodb.
the class OrcStorageManager method getPageSource.
@Override
public ConnectorPageSource getPageSource(HdfsContext hdfsContext, HiveFileContext hiveFileContext, UUID shardUuid, Optional<UUID> deltaShardUuid, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId, Optional<Map<String, Type>> allColumnTypes) {
FileSystem fileSystem = orcDataEnvironment.getFileSystem(hdfsContext);
OrcDataSource dataSource = openShard(fileSystem, shardUuid, readerAttributes);
OrcAggregatedMemoryContext systemMemoryUsage = new RaptorOrcAggregatedMemoryContext();
try {
OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, stripeMetadataSourceFactory, new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(readerAttributes.getMaxMergeDistance(), readerAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, readerAttributes.isZstdJniDecompressionEnabled()), hiveFileContext.isCacheable(), NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
for (int i = 0; i < columnIds.size(); i++) {
long columnId = columnIds.get(i);
if (isHiddenColumn(columnId)) {
columnIndexes.add(toSpecialIndex(columnId));
continue;
}
Integer index = indexMap.get(columnId);
if (index == null) {
columnIndexes.add(NULL_COLUMN);
} else {
columnIndexes.add(index);
includedColumns.put(index, toOrcFileType(columnTypes.get(i), typeManager));
}
}
OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
StorageTypeConverter storageTypeConverter = new StorageTypeConverter(typeManager);
OrcBatchRecordReader recordReader = reader.createBatchRecordReader(storageTypeConverter.toStorageTypes(includedColumns.build()), predicate, DEFAULT_STORAGE_TIMEZONE, systemMemoryUsage, INITIAL_BATCH_SIZE);
Optional<ShardRewriter> shardRewriter = Optional.empty();
if (transactionId.isPresent()) {
checkState(allColumnTypes.isPresent());
if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) {
throw new PrestoException(RAPTOR_ERROR, "File has too many rows, failed to read file: " + shardUuid);
}
shardRewriter = Optional.of(createShardRewriter(hdfsContext, fileSystem, transactionId.getAsLong(), bucketNumber, shardUuid, toIntExact(reader.getFooter().getNumberOfRows()), deltaShardUuid, tableSupportsDeltaDelete, allColumnTypes.get()));
}
return new OrcUpdatablePageSource(shardRewriter, recordReader, new OrcPageSource(recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage, new DeltaShardLoader(deltaShardUuid, tableSupportsDeltaDelete, this, fileSystem)));
} catch (IOException | RuntimeException e) {
closeQuietly(dataSource);
throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
} catch (Throwable t) {
closeQuietly(dataSource);
throw t;
}
}
Aggregations