use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testIsPartitionFiltered.
@Test
public void testIsPartitionFiltered() throws IOException {
Properties schema = new Properties();
ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of(new HivePartitionKey("p1", "100"), new HivePartitionKey("p2", "101"), new HivePartitionKey("p3", "__HIVE_DEFAULT_PARTITION__"));
HiveSplitWrapper split = HiveSplitWrapper.wrap(new HiveSplit("db", "table", "partitionId", "path", 0, 50, 50, 0, schema, partitionKeys, ImmutableList.of(), OptionalInt.empty(), false, ImmutableMap.of(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of()));
List<Long> filterValues = ImmutableList.of(1L, 50L, 100L);
HiveColumnHandle testColumnHandle = new HiveColumnHandle("p1", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter = createDynamicFilterSupplier(filterValues, testColumnHandle, "filter1");
Optional<DynamicFilterSupplier> dynamicFilterSupplier = Optional.of(new DynamicFilterSupplier(dynamicFilter, System.currentTimeMillis(), 10000));
HiveColumnHandle testColumnHandle2 = new HiveColumnHandle("p2", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter2 = createDynamicFilterSupplier(filterValues, testColumnHandle2, "filter2");
Optional<DynamicFilterSupplier> dynamicFilterSupplier2 = Optional.of(new DynamicFilterSupplier(dynamicFilter2, System.currentTimeMillis(), 10000));
HiveColumnHandle testColumnHandle3 = new HiveColumnHandle("p3", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter3 = createDynamicFilterSupplier(filterValues, testColumnHandle3, "filter3");
Optional<DynamicFilterSupplier> dynamicFilterSupplier3 = Optional.of(new DynamicFilterSupplier(dynamicFilter3, System.currentTimeMillis(), 10000));
HiveColumnHandle testColumnHandle4 = new HiveColumnHandle("p4", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter4 = createDynamicFilterSupplier(filterValues, testColumnHandle4, "filter3");
Optional<DynamicFilterSupplier> dynamicFilterSupplier4 = Optional.of(new DynamicFilterSupplier(dynamicFilter4, System.currentTimeMillis(), 0));
HiveConfig config = new HiveConfig();
HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER, getNoOpIndexCache(), getDefaultHiveSelectiveFactories(config));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
ConnectorTableHandle table = new HiveTableHandle("db", "table", ImmutableMap.of(), ImmutableList.of(), Optional.empty());
HiveTransactionHandle transaction = new HiveTransactionHandle();
try {
ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle), dynamicFilterSupplier);
assertFalse(result instanceof FixedPageSource);
} catch (Exception e) {
assertTrue(e instanceof PrestoException);
}
try {
ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle2), dynamicFilterSupplier2);
assertTrue(result instanceof FixedPageSource);
} catch (Exception e) {
fail("A FixedPageSource object should have been created");
}
try {
ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle3), dynamicFilterSupplier3);
assertFalse(result instanceof FixedPageSource);
} catch (Exception e) {
assertTrue(e instanceof PrestoException);
}
try {
ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle4), dynamicFilterSupplier4);
assertFalse(result instanceof FixedPageSource);
} catch (Exception e) {
assertTrue(e instanceof PrestoException);
}
}
use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.
the class HivePageSourceProvider method createPageSourceInternal.
private ConnectorPageSource createPageSourceInternal(ConnectorSession session, Optional<DynamicFilterSupplier> dynamicFilterSupplier, List<Map<ColumnHandle, DynamicFilter>> dynamicFilters, HiveTableHandle hiveTable, List<HiveColumnHandle> hiveColumns, HiveSplit hiveSplit) {
Path path = new Path(hiveSplit.getPath());
List<Set<DynamicFilter>> dynamicFilterList = new ArrayList();
if (dynamicFilters != null) {
for (Map<ColumnHandle, DynamicFilter> df : dynamicFilters) {
Set<DynamicFilter> values = df.values().stream().collect(Collectors.toSet());
dynamicFilterList.add(values);
}
}
// Filter out splits using partition values and dynamic filters
if (dynamicFilters != null && !dynamicFilters.isEmpty() && isPartitionFiltered(hiveSplit.getPartitionKeys(), dynamicFilterList, typeManager)) {
return new FixedPageSource(ImmutableList.of());
}
Configuration configuration = hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, hiveSplit.getDatabase(), hiveSplit.getTable()), path);
Properties schema = hiveSplit.getSchema();
String columnNameDelimiter = schema.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? schema.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
List<String> partitionColumnNames;
if (schema.containsKey(META_PARTITION_COLUMNS)) {
partitionColumnNames = Arrays.asList(schema.getProperty(META_PARTITION_COLUMNS).split(columnNameDelimiter));
} else if (schema.containsKey(META_TABLE_COLUMNS)) {
partitionColumnNames = Arrays.asList(schema.getProperty(META_TABLE_COLUMNS).split(columnNameDelimiter));
} else {
partitionColumnNames = new ArrayList<>();
}
List<String> tableColumns = hiveColumns.stream().map(cols -> cols.getName()).collect(toList());
List<String> missingColumns = tableColumns.stream().skip(partitionColumnNames.size()).collect(toList());
List<IndexMetadata> indexes = new ArrayList<>();
if (indexCache != null && session.isHeuristicIndexFilterEnabled()) {
indexes.addAll(this.indexCache.getIndices(session.getCatalog().orElse(null), hiveTable.getSchemaTableName().toString(), hiveSplit, hiveTable.getCompactEffectivePredicate(), hiveTable.getPartitionColumns()));
/* Bloom/Bitmap indices are checked for given table and added to the possible matchers for pushdown. */
if (hiveTable.getDisjunctCompactEffectivePredicate().isPresent() && hiveTable.getDisjunctCompactEffectivePredicate().get().size() > 0) {
hiveTable.getDisjunctCompactEffectivePredicate().get().forEach(orPredicate -> indexes.addAll(this.indexCache.getIndices(session.getCatalog().orElse(null), hiveTable.getSchemaTableName().toString(), hiveSplit, orPredicate, hiveTable.getPartitionColumns())));
}
}
Optional<List<IndexMetadata>> indexOptional = indexes == null || indexes.isEmpty() ? Optional.empty() : Optional.of(indexes);
URI splitUri = URI.create(URIUtil.encodePath(hiveSplit.getPath()));
SplitMetadata splitMetadata = new SplitMetadata(splitUri.getRawPath(), hiveSplit.getLastModifiedTime());
TupleDomain<HiveColumnHandle> predicate = TupleDomain.all();
if (dynamicFilterSupplier.isPresent() && dynamicFilters != null && !dynamicFilters.isEmpty()) {
if (dynamicFilters.size() == 1) {
List<HiveColumnHandle> filteredHiveColumnHandles = hiveColumns.stream().filter(column -> dynamicFilters.get(0).containsKey(column)).collect(toList());
HiveColumnHandle hiveColumnHandle = filteredHiveColumnHandles.get(0);
Type type = hiveColumnHandle.getColumnMetadata(typeManager).getType();
predicate = getPredicate(dynamicFilters.get(0).get(hiveColumnHandle), type, hiveColumnHandle);
if (predicate.isNone()) {
predicate = TupleDomain.all();
}
}
}
/**
* This is main logical division point to process filter pushdown enabled case (aka as selective read flow).
* If user configuration orc_predicate_pushdown_enabled is true and if all clause of query can be handled by hive
* selective read flow, then hiveTable.isSuitableToPush() will be enabled.
* (Refer HiveMetadata.checkIfSuitableToPush).
*/
if (hiveTable.isSuitableToPush()) {
return createSelectivePageSource(selectivePageSourceFactories, configuration, session, hiveSplit, assignUniqueIndicesToPartitionColumns(hiveColumns), typeManager, dynamicFilterSupplier, hiveSplit.getDeleteDeltaLocations(), hiveSplit.getStartRowOffsetOfFile(), indexOptional, hiveSplit.isCacheable(), hiveTable.getCompactEffectivePredicate(), hiveTable.getPredicateColumns(), hiveTable.getDisjunctCompactEffectivePredicate(), hiveSplit.getBucketConversion(), hiveSplit.getBucketNumber(), hiveSplit.getLastModifiedTime(), missingColumns);
}
Optional<ConnectorPageSource> pageSource = createHivePageSource(cursorProviders, pageSourceFactories, configuration, session, path, hiveSplit.getBucketNumber(), hiveSplit.getStart(), hiveSplit.getLength(), hiveSplit.getFileSize(), hiveSplit.getSchema(), hiveTable.getCompactEffectivePredicate().intersect(predicate), hiveColumns, hiveSplit.getPartitionKeys(), typeManager, hiveSplit.getColumnCoercions(), hiveSplit.getBucketConversion(), hiveSplit.isS3SelectPushdownEnabled(), dynamicFilterSupplier, hiveSplit.getDeleteDeltaLocations(), hiveSplit.getStartRowOffsetOfFile(), indexOptional, splitMetadata, hiveSplit.isCacheable(), hiveSplit.getLastModifiedTime(), hiveSplit.getCustomSplitInfo(), missingColumns);
if (pageSource.isPresent()) {
return pageSource.get();
}
throw new RuntimeException("Could not find a file reader for split " + hiveSplit);
}
use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.
the class HivePageSourceProvider method createSelectivePageSource.
/**
* Create selective page source, which will be used for selective reader flow.
* Unlike normal page source, selective page source required to pass below additional details to reader
* a. Pre-filled values of all constant.
* b. Coercion information of all columns.
* c. Columns which required to be projected.
* d. Total list of columns which will be read (projection + filter).
* All these info gets used by reader.
* @param columns List of all columns being part of scan.
* @param effectivePredicate Predicates related to AND clause
* @param predicateColumns Map of all columns handles being part of predicate
* @param additionPredicates Predicates related to OR clause.
* Remaining columns are same as for createHivePageSource.
* @param missingColumns
* @return
*/
private static ConnectorPageSource createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<DynamicFilterSupplier> dynamicFilterSupplier, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, boolean splitCacheable, TupleDomain<HiveColumnHandle> effectivePredicate, Map<String, HiveColumnHandle> predicateColumns, Optional<List<TupleDomain<HiveColumnHandle>>> additionPredicates, Optional<HiveSplit.BucketConversion> bucketConversion, OptionalInt bucketNumber, long dataSourceLastModifiedTime, List<String> missingColumns) {
Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(predicateColumns.values()).addAll(bucketConversion.map(HiveSplit.BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
Path path = new Path(split.getPath());
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), columns, ImmutableList.copyOf(interimColumns), split.getColumnCoercions(), path, bucketNumber, true, missingColumns);
List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
Optional<BucketAdaptation> bucketAdaptation = toBucketAdaptation(bucketConversion, regularAndInterimColumnMappings, bucketNumber);
checkArgument(!bucketAdaptation.isPresent(), "Bucket conversion is not yet supported");
// Make a list of all PREFILLED columns, which can be passed to reader. Unlike normal flow, selective read
// flow require to pass this below at reader level as we need to make block of all column values.
Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
// Make a map of column required to be coerced. This also needs to be sent to reader level as coercion
// should be applied before adding values in block.
Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getSchema(), toColumnHandles(columnMappings, true), prefilledValues, outputColumns, effectivePredicate, additionPredicates, deleteDeltaLocations, startRowOffsetOfFile, indexes, splitCacheable, columnMappings, coercers, dataSourceLastModifiedTime);
if (pageSource.isPresent()) {
return new HivePageSource(columnMappings, Optional.empty(), typeManager, pageSource.get(), dynamicFilterSupplier, session, split.getPartitionKeys());
}
}
throw new IllegalStateException("Could not find a file reader for split " + split);
}
use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.
the class TestOrcPageSourceMemoryTracking method getOrcConcatPageSource.
private OrcConcatPageSource getOrcConcatPageSource(long waitTime) {
HiveConfig config = new HiveConfig();
FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
List<ConnectorPageSource> pageSources = new ArrayList<>();
Supplier<List<Map<ColumnHandle, DynamicFilter>>> supplier = null;
DynamicFilterSupplier theSupplier = new DynamicFilterSupplier(supplier, System.currentTimeMillis(), waitTime);
Optional<DynamicFilterSupplier> dynamicFilterSupplier = Optional.of(theSupplier);
pageSources.add(testPreparer.newPageSource(stats, session, dynamicFilterSupplier));
OrcConcatPageSource orcConcatPageSource = new OrcConcatPageSource(pageSources);
return orcConcatPageSource;
}
use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.
the class TableScanOperator method getOutput.
@Override
public Page getOutput() {
if (strategy.equals(REUSE_STRATEGY_CONSUMER)) {
return getPage();
}
if (split == null) {
return null;
}
if (source == null) {
if (isDcTable) {
source = pageSourceProvider.createPageSource(operatorContext.getSession(), split, table, columns, Optional.of(new DynamicFilterSupplier(BloomFilterUtils.getCrossRegionDynamicFilterSupplier(dynamicFilterCacheManagerOptional.get(), queryIdOptional.get().getId(), tableScanNodeOptional.get()), System.currentTimeMillis(), 0L)));
} else {
source = pageSourceProvider.createPageSource(operatorContext.getSession(), split, table, columns, Optional.empty());
}
}
Page page = source.getNextPage();
if (page != null) {
// assure the page is in memory before handing to another operator
page = page.getLoadedPage();
// update operator stats
long endCompletedBytes = source.getCompletedBytes();
long endReadTimeNanos = source.getReadTimeNanos();
operatorContext.recordPhysicalInputWithTiming(endCompletedBytes - completedBytes, page.getPositionCount(), endReadTimeNanos - readTimeNanos);
operatorContext.recordProcessedInput(page.getSizeInBytes(), page.getPositionCount());
completedBytes = endCompletedBytes;
readTimeNanos = endReadTimeNanos;
// pull bloomFilter from stateStore and filter page
if (existsCrossFilter) {
try {
page = filter(page);
} catch (Throwable e) {
// ignore
}
}
}
// updating system memory usage should happen after page is loaded.
systemMemoryContext.setBytes(source.getSystemMemoryUsage());
if (strategy.equals(REUSE_STRATEGY_PRODUCER) && page != null) {
setPage(page);
}
return page;
}
Aggregations