use of io.prestosql.memory.context.AggregatedMemoryContext in project hetu-core by openlookeng.
the class TestBroadcastOutputBuffer method testSharedBufferBlockingNoBlockOnFull.
@Test
public void testSharedBufferBlockingNoBlockOnFull() {
SettableFuture<?> blockedFuture = SettableFuture.create();
MockMemoryReservationHandler reservationHandler = new MockMemoryReservationHandler(blockedFuture);
AggregatedMemoryContext memoryContext = newRootAggregatedMemoryContext(reservationHandler, 0L);
Page page = createPage(1);
long pageSize = PAGES_SERDE.serialize(page).getRetainedSizeInBytes();
// create a buffer that can only hold two pages
BroadcastOutputBuffer buffer = createBroadcastBuffer(createInitialEmptyOutputBuffers(BROADCAST), new DataSize(pageSize * 2, BYTE), memoryContext, directExecutor());
OutputBufferMemoryManager memoryManager = buffer.getMemoryManager();
memoryManager.setNoBlockOnFull();
// even if setNoBlockOnFull() is called the buffer should block on memory when we add the first page
// as no memory is available (MockMemoryReservationHandler will return a future that is not done)
enqueuePage(buffer, page);
// more memory is available
blockedFuture.set(null);
memoryManager.onMemoryAvailable();
assertTrue(memoryManager.getBufferBlockedFuture().isDone(), "buffer shouldn't be blocked");
// we should be able to add one more page after more memory is available
addPage(buffer, page);
// the buffer is full now, but setNoBlockOnFull() is called so the buffer shouldn't block
addPage(buffer, page);
}
use of io.prestosql.memory.context.AggregatedMemoryContext in project boostkit-bigdata by kunpengcompute.
the class ParquetPageSourceFactory method createParquetPushDownPageSource.
public HivePushDownPageSource createParquetPushDownPageSource(Path path, long start, long length, com.huawei.boostkit.omnidata.model.Predicate predicate) {
AggregatedMemoryContext systemMemoryUsage = newSimpleAggregatedMemoryContext();
Properties transProperties = new Properties();
transProperties.put(OMNIDATA_CLIENT_TARGET_LIST, omniDataServerTarget);
DataSource parquetPushDownDataSource = new com.huawei.boostkit.omnidata.model.datasource.hdfs.HdfsParquetDataSource(path.toString(), start, length, false);
TaskSource readTaskInfo = new TaskSource(parquetPushDownDataSource, predicate, TaskSource.ONE_MEGABYTES);
DataReader<Page> dataReader = DataReaderFactory.create(transProperties, readTaskInfo, new OpenLooKengDeserializer());
return new HivePushDownPageSource(dataReader, systemMemoryUsage);
}
use of io.prestosql.memory.context.AggregatedMemoryContext in project boostkit-bigdata by kunpengcompute.
the class ParquetPageSourceFactory method createParquetPageSource.
public static ParquetPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, boolean useParquetColumnNames, boolean failOnCorruptedParquetStatistics, DataSize maxReadBlockSize, TypeManager typeManager, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, DateTimeZone timeZone) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
DateTimeZone readerTimeZone = timeZone;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(user, () -> fileSystem.open(path));
ParquetMetadata parquetMetadata = MetadataReader.readFooter(inputStream, path, fileSize);
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
dataSource = buildHdfsParquetDataSource(inputStream, path, fileSize, stats);
String writerTimeZoneId = fileMetaData.getKeyValueMetaData().get(WRITER_TIME_ZONE_KEY);
if (writerTimeZoneId != null && !writerTimeZoneId.equalsIgnoreCase(readerTimeZone.getID())) {
readerTimeZone = DateTimeZone.forID(writerTimeZoneId);
}
List<org.apache.parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
footerBlocks.add(block);
}
}
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
for (BlockMetaData block : footerBlocks.build()) {
if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, failOnCorruptedParquetStatistics)) {
blocks.add(block);
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(Optional.ofNullable(fileMetaData.getCreatedBy()), messageColumnIO, blocks.build(), dataSource, readerTimeZone, systemMemoryContext, maxReadBlockSize);
return new ParquetPageSource(parquetReader, fileSchema, messageColumnIO, typeManager, schema, columns, effectivePredicate, useParquetColumnNames);
} catch (Exception e) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
if (e instanceof ParquetCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, e);
}
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e instanceof BlockMissingException) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of io.prestosql.memory.context.AggregatedMemoryContext in project hetu-core by openlookeng.
the class OrcSelectiveRecordReader method createColumnReaders.
public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
colReaderWithFilter = new IntArraySet();
colReaderWithORFilter = new IntArraySet();
colReaderWithoutFilter = new IntArraySet();
IntArraySet remainingColumns = new IntArraySet();
remainingColumns.addAll(includedColumns.keySet());
for (int i = 0; i < fieldCount; i++) {
// create column reader only for columns which are part of projection and filter.
if (includedColumns.containsKey(i)) {
int columnIndex = i;
OrcColumn column = fileColumns.get(columnIndex);
boolean outputRequired = outputColumns.contains(i);
SelectiveColumnReader columnReader = null;
if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
} else {
columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
if (orcCacheProperties.isRowDataCacheEnabled()) {
columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
}
}
columnReaders[columnIndex] = columnReader;
if (filters.get(i) != null) {
colReaderWithFilter.add(columnIndex);
} else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
colReaderWithORFilter.add(columnIndex);
} else {
colReaderWithoutFilter.add(columnIndex);
}
remainingColumns.remove(columnIndex);
}
}
/* if any still remaining colIdx < 0 */
remainingColumns.removeAll(missingColumns);
for (Integer col : remainingColumns) {
if (col < 0) {
/* should be always true! */
if (filters.get(col) != null) {
colReaderWithFilter.add(col);
} else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
colReaderWithORFilter.add(col);
}
}
}
// specially for alter add column case:
for (int missingColumn : missingColumns) {
if (filters.get(missingColumn) != null) {
colReaderWithFilter.add(missingColumn);
} else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
colReaderWithORFilter.add(missingColumn);
}
}
return columnReaders;
}
use of io.prestosql.memory.context.AggregatedMemoryContext in project hetu-core by openlookeng.
the class ParquetPageSourceFactory method createParquetPageSource.
public static ParquetPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, boolean useParquetColumnNames, boolean failOnCorruptedParquetStatistics, DataSize maxReadBlockSize, TypeManager typeManager, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, DateTimeZone timeZone) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
DateTimeZone readerTimeZone = timeZone;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(user, () -> fileSystem.open(path));
ParquetMetadata parquetMetadata = MetadataReader.readFooter(inputStream, path, fileSize);
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
dataSource = buildHdfsParquetDataSource(inputStream, path, fileSize, stats);
String writerTimeZoneId = fileMetaData.getKeyValueMetaData().get(WRITER_TIME_ZONE_KEY);
if (writerTimeZoneId != null && !writerTimeZoneId.equalsIgnoreCase(readerTimeZone.getID())) {
readerTimeZone = DateTimeZone.forID(writerTimeZoneId);
}
List<org.apache.parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
footerBlocks.add(block);
}
}
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
for (BlockMetaData block : footerBlocks.build()) {
if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, failOnCorruptedParquetStatistics)) {
blocks.add(block);
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(Optional.ofNullable(fileMetaData.getCreatedBy()), messageColumnIO, blocks.build(), dataSource, readerTimeZone, systemMemoryContext, maxReadBlockSize);
return new ParquetPageSource(parquetReader, fileSchema, messageColumnIO, typeManager, schema, columns, effectivePredicate, useParquetColumnNames);
} catch (Exception e) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
if (e instanceof ParquetCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, e);
}
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e instanceof BlockMissingException) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
Aggregations