Search in sources :

Example 1 with DynamicFilterSupplier

use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testIsPartitionFiltered.

@Test
public void testIsPartitionFiltered() throws IOException {
    Properties schema = new Properties();
    ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of(new HivePartitionKey("p1", "100"), new HivePartitionKey("p2", "101"), new HivePartitionKey("p3", "__HIVE_DEFAULT_PARTITION__"));
    HiveSplitWrapper split = HiveSplitWrapper.wrap(new HiveSplit("db", "table", "partitionId", "path", 0, 50, 50, 0, schema, partitionKeys, ImmutableList.of(), OptionalInt.empty(), false, ImmutableMap.of(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of()));
    List<Long> filterValues = ImmutableList.of(1L, 50L, 100L);
    HiveColumnHandle testColumnHandle = new HiveColumnHandle("p1", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter = createDynamicFilterSupplier(filterValues, testColumnHandle, "filter1");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier = Optional.of(new DynamicFilterSupplier(dynamicFilter, System.currentTimeMillis(), 10000));
    HiveColumnHandle testColumnHandle2 = new HiveColumnHandle("p2", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter2 = createDynamicFilterSupplier(filterValues, testColumnHandle2, "filter2");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier2 = Optional.of(new DynamicFilterSupplier(dynamicFilter2, System.currentTimeMillis(), 10000));
    HiveColumnHandle testColumnHandle3 = new HiveColumnHandle("p3", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter3 = createDynamicFilterSupplier(filterValues, testColumnHandle3, "filter3");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier3 = Optional.of(new DynamicFilterSupplier(dynamicFilter3, System.currentTimeMillis(), 10000));
    HiveColumnHandle testColumnHandle4 = new HiveColumnHandle("p4", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter4 = createDynamicFilterSupplier(filterValues, testColumnHandle4, "filter3");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier4 = Optional.of(new DynamicFilterSupplier(dynamicFilter4, System.currentTimeMillis(), 0));
    HiveConfig config = new HiveConfig();
    HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER, getNoOpIndexCache(), getDefaultHiveSelectiveFactories(config));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    ConnectorTableHandle table = new HiveTableHandle("db", "table", ImmutableMap.of(), ImmutableList.of(), Optional.empty());
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle), dynamicFilterSupplier);
        assertFalse(result instanceof FixedPageSource);
    } catch (Exception e) {
        assertTrue(e instanceof PrestoException);
    }
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle2), dynamicFilterSupplier2);
        assertTrue(result instanceof FixedPageSource);
    } catch (Exception e) {
        fail("A FixedPageSource object should have been created");
    }
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle3), dynamicFilterSupplier3);
        assertFalse(result instanceof FixedPageSource);
    } catch (Exception e) {
        assertTrue(e instanceof PrestoException);
    }
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle4), dynamicFilterSupplier4);
        assertFalse(result instanceof FixedPageSource);
    } catch (Exception e) {
        assertTrue(e instanceof PrestoException);
    }
}
Also used : PrestoException(io.prestosql.spi.PrestoException) Properties(java.util.Properties) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 2 with DynamicFilterSupplier

use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.

the class HivePageSourceProvider method createPageSourceInternal.

private ConnectorPageSource createPageSourceInternal(ConnectorSession session, Optional<DynamicFilterSupplier> dynamicFilterSupplier, List<Map<ColumnHandle, DynamicFilter>> dynamicFilters, HiveTableHandle hiveTable, List<HiveColumnHandle> hiveColumns, HiveSplit hiveSplit) {
    Path path = new Path(hiveSplit.getPath());
    List<Set<DynamicFilter>> dynamicFilterList = new ArrayList();
    if (dynamicFilters != null) {
        for (Map<ColumnHandle, DynamicFilter> df : dynamicFilters) {
            Set<DynamicFilter> values = df.values().stream().collect(Collectors.toSet());
            dynamicFilterList.add(values);
        }
    }
    // Filter out splits using partition values and dynamic filters
    if (dynamicFilters != null && !dynamicFilters.isEmpty() && isPartitionFiltered(hiveSplit.getPartitionKeys(), dynamicFilterList, typeManager)) {
        return new FixedPageSource(ImmutableList.of());
    }
    Configuration configuration = hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, hiveSplit.getDatabase(), hiveSplit.getTable()), path);
    Properties schema = hiveSplit.getSchema();
    String columnNameDelimiter = schema.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? schema.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    List<String> partitionColumnNames;
    if (schema.containsKey(META_PARTITION_COLUMNS)) {
        partitionColumnNames = Arrays.asList(schema.getProperty(META_PARTITION_COLUMNS).split(columnNameDelimiter));
    } else if (schema.containsKey(META_TABLE_COLUMNS)) {
        partitionColumnNames = Arrays.asList(schema.getProperty(META_TABLE_COLUMNS).split(columnNameDelimiter));
    } else {
        partitionColumnNames = new ArrayList<>();
    }
    List<String> tableColumns = hiveColumns.stream().map(cols -> cols.getName()).collect(toList());
    List<String> missingColumns = tableColumns.stream().skip(partitionColumnNames.size()).collect(toList());
    List<IndexMetadata> indexes = new ArrayList<>();
    if (indexCache != null && session.isHeuristicIndexFilterEnabled()) {
        indexes.addAll(this.indexCache.getIndices(session.getCatalog().orElse(null), hiveTable.getSchemaTableName().toString(), hiveSplit, hiveTable.getCompactEffectivePredicate(), hiveTable.getPartitionColumns()));
        /* Bloom/Bitmap indices are checked for given table and added to the possible matchers for pushdown. */
        if (hiveTable.getDisjunctCompactEffectivePredicate().isPresent() && hiveTable.getDisjunctCompactEffectivePredicate().get().size() > 0) {
            hiveTable.getDisjunctCompactEffectivePredicate().get().forEach(orPredicate -> indexes.addAll(this.indexCache.getIndices(session.getCatalog().orElse(null), hiveTable.getSchemaTableName().toString(), hiveSplit, orPredicate, hiveTable.getPartitionColumns())));
        }
    }
    Optional<List<IndexMetadata>> indexOptional = indexes == null || indexes.isEmpty() ? Optional.empty() : Optional.of(indexes);
    URI splitUri = URI.create(URIUtil.encodePath(hiveSplit.getPath()));
    SplitMetadata splitMetadata = new SplitMetadata(splitUri.getRawPath(), hiveSplit.getLastModifiedTime());
    TupleDomain<HiveColumnHandle> predicate = TupleDomain.all();
    if (dynamicFilterSupplier.isPresent() && dynamicFilters != null && !dynamicFilters.isEmpty()) {
        if (dynamicFilters.size() == 1) {
            List<HiveColumnHandle> filteredHiveColumnHandles = hiveColumns.stream().filter(column -> dynamicFilters.get(0).containsKey(column)).collect(toList());
            HiveColumnHandle hiveColumnHandle = filteredHiveColumnHandles.get(0);
            Type type = hiveColumnHandle.getColumnMetadata(typeManager).getType();
            predicate = getPredicate(dynamicFilters.get(0).get(hiveColumnHandle), type, hiveColumnHandle);
            if (predicate.isNone()) {
                predicate = TupleDomain.all();
            }
        }
    }
    /**
     * This is main logical division point to process filter pushdown enabled case (aka as selective read flow).
     * If user configuration orc_predicate_pushdown_enabled is true and if all clause of query can be handled by hive
     * selective read flow, then hiveTable.isSuitableToPush() will be enabled.
     * (Refer HiveMetadata.checkIfSuitableToPush).
     */
    if (hiveTable.isSuitableToPush()) {
        return createSelectivePageSource(selectivePageSourceFactories, configuration, session, hiveSplit, assignUniqueIndicesToPartitionColumns(hiveColumns), typeManager, dynamicFilterSupplier, hiveSplit.getDeleteDeltaLocations(), hiveSplit.getStartRowOffsetOfFile(), indexOptional, hiveSplit.isCacheable(), hiveTable.getCompactEffectivePredicate(), hiveTable.getPredicateColumns(), hiveTable.getDisjunctCompactEffectivePredicate(), hiveSplit.getBucketConversion(), hiveSplit.getBucketNumber(), hiveSplit.getLastModifiedTime(), missingColumns);
    }
    Optional<ConnectorPageSource> pageSource = createHivePageSource(cursorProviders, pageSourceFactories, configuration, session, path, hiveSplit.getBucketNumber(), hiveSplit.getStart(), hiveSplit.getLength(), hiveSplit.getFileSize(), hiveSplit.getSchema(), hiveTable.getCompactEffectivePredicate().intersect(predicate), hiveColumns, hiveSplit.getPartitionKeys(), typeManager, hiveSplit.getColumnCoercions(), hiveSplit.getBucketConversion(), hiveSplit.isS3SelectPushdownEnabled(), dynamicFilterSupplier, hiveSplit.getDeleteDeltaLocations(), hiveSplit.getStartRowOffsetOfFile(), indexOptional, splitMetadata, hiveSplit.isCacheable(), hiveSplit.getLastModifiedTime(), hiveSplit.getCustomSplitInfo(), missingColumns);
    if (pageSource.isPresent()) {
        return pageSource.get();
    }
    throw new RuntimeException("Could not find a file reader for split " + hiveSplit);
}
Also used : Arrays(java.util.Arrays) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BuiltInFunctionHandle(io.prestosql.spi.function.BuiltInFunctionHandle) ValueSet(io.prestosql.spi.predicate.ValueSet) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) META_PARTITION_COLUMNS(io.prestosql.plugin.hive.metastore.MetastoreUtil.META_PARTITION_COLUMNS) CallExpression(io.prestosql.spi.relation.CallExpression) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) HiveCoercer.createCoercer(io.prestosql.plugin.hive.coercions.HiveCoercer.createCoercer) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) FilteredDynamicFilter(io.prestosql.spi.dynamicfilter.FilteredDynamicFilter) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) URI(java.net.URI) MAX_PARTITION_KEY_COLUMN_INDEX(io.prestosql.plugin.hive.HiveColumnHandle.MAX_PARTITION_KEY_COLUMN_INDEX) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) Collectors(java.util.stream.Collectors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Domain(io.prestosql.spi.predicate.Domain) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) URIUtil(org.eclipse.jetty.util.URIUtil) Optional(java.util.Optional) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) Slice(io.airlift.slice.Slice) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) HashSet(java.util.HashSet) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) ImmutableList(com.google.common.collect.ImmutableList) Range(io.prestosql.spi.predicate.Range) HiveCoercer(io.prestosql.plugin.hive.coercions.HiveCoercer) Objects.requireNonNull(java.util.Objects.requireNonNull) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) RecordCursor(io.prestosql.spi.connector.RecordCursor) Signature(io.prestosql.spi.function.Signature) SerDeUtils(org.apache.hadoop.hive.serde2.SerDeUtils) Properties(java.util.Properties) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TypeManager(io.prestosql.spi.type.TypeManager) HiveUtil.isPartitionFiltered(io.prestosql.plugin.hive.HiveUtil.isPartitionFiltered) CombinedDynamicFilter(io.prestosql.spi.dynamicfilter.CombinedDynamicFilter) Collectors.toList(java.util.stream.Collectors.toList) IndexCache(io.prestosql.plugin.hive.util.IndexCache) ColumnMapping.toColumnHandles(io.prestosql.plugin.hive.HivePageSourceProvider.ColumnMapping.toColumnHandles) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) RowExpression(io.prestosql.spi.relation.RowExpression) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) ConnectorPageSourceProvider(io.prestosql.spi.connector.ConnectorPageSourceProvider) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) ValueSet(io.prestosql.spi.predicate.ValueSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Properties(java.util.Properties) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) URI(java.net.URI) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Path(org.apache.hadoop.fs.Path) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) FilteredDynamicFilter(io.prestosql.spi.dynamicfilter.FilteredDynamicFilter) CombinedDynamicFilter(io.prestosql.spi.dynamicfilter.CombinedDynamicFilter) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) Type(io.prestosql.spi.type.Type)

Example 3 with DynamicFilterSupplier

use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.

the class HivePageSourceProvider method createSelectivePageSource.

/**
 * Create selective page source, which will be used for selective reader flow.
 * Unlike normal page source, selective page source required to pass below additional details to reader
 * a. Pre-filled values of all constant.
 * b. Coercion information of all columns.
 * c. Columns which required to be projected.
 * d. Total list of columns which will be read (projection + filter).
 * All these info gets used by reader.
 * @param columns List of all columns being part of scan.
 * @param effectivePredicate Predicates related to AND clause
 * @param predicateColumns Map of all columns handles being part of predicate
 * @param additionPredicates Predicates related to OR clause.
 * Remaining columns are same as for createHivePageSource.
 * @param missingColumns
 * @return
 */
private static ConnectorPageSource createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<DynamicFilterSupplier> dynamicFilterSupplier, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, boolean splitCacheable, TupleDomain<HiveColumnHandle> effectivePredicate, Map<String, HiveColumnHandle> predicateColumns, Optional<List<TupleDomain<HiveColumnHandle>>> additionPredicates, Optional<HiveSplit.BucketConversion> bucketConversion, OptionalInt bucketNumber, long dataSourceLastModifiedTime, List<String> missingColumns) {
    Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(predicateColumns.values()).addAll(bucketConversion.map(HiveSplit.BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
    Path path = new Path(split.getPath());
    List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), columns, ImmutableList.copyOf(interimColumns), split.getColumnCoercions(), path, bucketNumber, true, missingColumns);
    List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
    Optional<BucketAdaptation> bucketAdaptation = toBucketAdaptation(bucketConversion, regularAndInterimColumnMappings, bucketNumber);
    checkArgument(!bucketAdaptation.isPresent(), "Bucket conversion is not yet supported");
    // Make a list of all PREFILLED columns, which can be passed to reader. Unlike normal flow, selective read
    // flow require to pass this below at reader level as we need to make block of all column values.
    Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
    // Make a map of column required to be coerced. This also needs to be sent to reader level as coercion
    // should be applied before adding values in block.
    Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
    List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
    for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
        Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getSchema(), toColumnHandles(columnMappings, true), prefilledValues, outputColumns, effectivePredicate, additionPredicates, deleteDeltaLocations, startRowOffsetOfFile, indexes, splitCacheable, columnMappings, coercers, dataSourceLastModifiedTime);
        if (pageSource.isPresent()) {
            return new HivePageSource(columnMappings, Optional.empty(), typeManager, pageSource.get(), dynamicFilterSupplier, session, split.getPartitionKeys());
        }
    }
    throw new IllegalStateException("Could not find a file reader for split " + split);
}
Also used : Path(org.apache.hadoop.fs.Path) Arrays(java.util.Arrays) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BuiltInFunctionHandle(io.prestosql.spi.function.BuiltInFunctionHandle) ValueSet(io.prestosql.spi.predicate.ValueSet) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) META_PARTITION_COLUMNS(io.prestosql.plugin.hive.metastore.MetastoreUtil.META_PARTITION_COLUMNS) CallExpression(io.prestosql.spi.relation.CallExpression) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) HiveCoercer.createCoercer(io.prestosql.plugin.hive.coercions.HiveCoercer.createCoercer) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) FilteredDynamicFilter(io.prestosql.spi.dynamicfilter.FilteredDynamicFilter) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) URI(java.net.URI) MAX_PARTITION_KEY_COLUMN_INDEX(io.prestosql.plugin.hive.HiveColumnHandle.MAX_PARTITION_KEY_COLUMN_INDEX) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) Collectors(java.util.stream.Collectors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Domain(io.prestosql.spi.predicate.Domain) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) URIUtil(org.eclipse.jetty.util.URIUtil) Optional(java.util.Optional) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) Slice(io.airlift.slice.Slice) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) HashSet(java.util.HashSet) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) ImmutableList(com.google.common.collect.ImmutableList) Range(io.prestosql.spi.predicate.Range) HiveCoercer(io.prestosql.plugin.hive.coercions.HiveCoercer) Objects.requireNonNull(java.util.Objects.requireNonNull) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) RecordCursor(io.prestosql.spi.connector.RecordCursor) Signature(io.prestosql.spi.function.Signature) SerDeUtils(org.apache.hadoop.hive.serde2.SerDeUtils) Properties(java.util.Properties) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TypeManager(io.prestosql.spi.type.TypeManager) HiveUtil.isPartitionFiltered(io.prestosql.plugin.hive.HiveUtil.isPartitionFiltered) CombinedDynamicFilter(io.prestosql.spi.dynamicfilter.CombinedDynamicFilter) Collectors.toList(java.util.stream.Collectors.toList) IndexCache(io.prestosql.plugin.hive.util.IndexCache) ColumnMapping.toColumnHandles(io.prestosql.plugin.hive.HivePageSourceProvider.ColumnMapping.toColumnHandles) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) RowExpression(io.prestosql.spi.relation.RowExpression) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) ConnectorPageSourceProvider(io.prestosql.spi.connector.ConnectorPageSourceProvider) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) HiveCoercer(io.prestosql.plugin.hive.coercions.HiveCoercer)

Example 4 with DynamicFilterSupplier

use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.

the class TestOrcPageSourceMemoryTracking method getOrcConcatPageSource.

private OrcConcatPageSource getOrcConcatPageSource(long waitTime) {
    HiveConfig config = new HiveConfig();
    FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
    ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    List<ConnectorPageSource> pageSources = new ArrayList<>();
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> supplier = null;
    DynamicFilterSupplier theSupplier = new DynamicFilterSupplier(supplier, System.currentTimeMillis(), waitTime);
    Optional<DynamicFilterSupplier> dynamicFilterSupplier = Optional.of(theSupplier);
    pageSources.add(testPreparer.newPageSource(stats, session, dynamicFilterSupplier));
    OrcConcatPageSource orcConcatPageSource = new OrcConcatPageSource(pageSources);
    return orcConcatPageSource;
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ArrayList(java.util.ArrayList) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier)

Example 5 with DynamicFilterSupplier

use of io.prestosql.spi.dynamicfilter.DynamicFilterSupplier in project hetu-core by openlookeng.

the class TableScanOperator method getOutput.

@Override
public Page getOutput() {
    if (strategy.equals(REUSE_STRATEGY_CONSUMER)) {
        return getPage();
    }
    if (split == null) {
        return null;
    }
    if (source == null) {
        if (isDcTable) {
            source = pageSourceProvider.createPageSource(operatorContext.getSession(), split, table, columns, Optional.of(new DynamicFilterSupplier(BloomFilterUtils.getCrossRegionDynamicFilterSupplier(dynamicFilterCacheManagerOptional.get(), queryIdOptional.get().getId(), tableScanNodeOptional.get()), System.currentTimeMillis(), 0L)));
        } else {
            source = pageSourceProvider.createPageSource(operatorContext.getSession(), split, table, columns, Optional.empty());
        }
    }
    Page page = source.getNextPage();
    if (page != null) {
        // assure the page is in memory before handing to another operator
        page = page.getLoadedPage();
        // update operator stats
        long endCompletedBytes = source.getCompletedBytes();
        long endReadTimeNanos = source.getReadTimeNanos();
        operatorContext.recordPhysicalInputWithTiming(endCompletedBytes - completedBytes, page.getPositionCount(), endReadTimeNanos - readTimeNanos);
        operatorContext.recordProcessedInput(page.getSizeInBytes(), page.getPositionCount());
        completedBytes = endCompletedBytes;
        readTimeNanos = endReadTimeNanos;
        // pull bloomFilter from stateStore and filter page
        if (existsCrossFilter) {
            try {
                page = filter(page);
            } catch (Throwable e) {
            // ignore
            }
        }
    }
    // updating system memory usage should happen after page is loaded.
    systemMemoryContext.setBytes(source.getSystemMemoryUsage());
    if (strategy.equals(REUSE_STRATEGY_PRODUCER) && page != null) {
        setPage(page);
    }
    return page;
}
Also used : Page(io.prestosql.spi.Page) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier)

Aggregations

DynamicFilterSupplier (io.prestosql.spi.dynamicfilter.DynamicFilterSupplier)6 ImmutableList (com.google.common.collect.ImmutableList)5 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)5 List (java.util.List)5 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)3 Maps.uniqueIndex (com.google.common.collect.Maps.uniqueIndex)3 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)3 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)3 FixedPageSource (io.prestosql.spi.connector.FixedPageSource)3 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)3 Properties (java.util.Properties)3 Preconditions.checkState (com.google.common.base.Preconditions.checkState)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Slice (io.airlift.slice.Slice)2 Slices (io.airlift.slice.Slices)2 BucketingVersion (io.prestosql.plugin.hive.HiveBucketing.BucketingVersion)2