Search in sources :

Example 6 with DynamicFilter

use of io.prestosql.spi.dynamicfilter.DynamicFilter in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method createDynamicFilterSupplier.

private Supplier<List<Map<ColumnHandle, DynamicFilter>>> createDynamicFilterSupplier(List<Long> values, ColumnHandle columnHandle, String filterId) throws IOException {
    BloomFilter filter = new BloomFilter(values.size(), 0.01);
    for (Long value : values) {
        filter.add(value);
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    filter.writeTo(out);
    DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
    Map<ColumnHandle, DynamicFilter> dynamicFilterMap = ImmutableMap.of(columnHandle, dynamicFilter);
    return () -> ImmutableList.of(dynamicFilterMap);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 7 with DynamicFilter

use of io.prestosql.spi.dynamicfilter.DynamicFilter in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method testIsPartitionFiltered.

@Test
public void testIsPartitionFiltered() throws IOException {
    Properties schema = new Properties();
    ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of(new HivePartitionKey("p1", "100"), new HivePartitionKey("p2", "101"), new HivePartitionKey("p3", "__HIVE_DEFAULT_PARTITION__"));
    HiveSplitWrapper split = HiveSplitWrapper.wrap(new HiveSplit("db", "table", "partitionId", "path", 0, 50, 50, 0, schema, partitionKeys, ImmutableList.of(), OptionalInt.empty(), false, ImmutableMap.of(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of()));
    List<Long> filterValues = ImmutableList.of(1L, 50L, 100L);
    HiveColumnHandle testColumnHandle = new HiveColumnHandle("p1", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter = createDynamicFilterSupplier(filterValues, testColumnHandle, "filter1");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier = Optional.of(new DynamicFilterSupplier(dynamicFilter, System.currentTimeMillis(), 10000));
    HiveColumnHandle testColumnHandle2 = new HiveColumnHandle("p2", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter2 = createDynamicFilterSupplier(filterValues, testColumnHandle2, "filter2");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier2 = Optional.of(new DynamicFilterSupplier(dynamicFilter2, System.currentTimeMillis(), 10000));
    HiveColumnHandle testColumnHandle3 = new HiveColumnHandle("p3", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter3 = createDynamicFilterSupplier(filterValues, testColumnHandle3, "filter3");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier3 = Optional.of(new DynamicFilterSupplier(dynamicFilter3, System.currentTimeMillis(), 10000));
    HiveColumnHandle testColumnHandle4 = new HiveColumnHandle("p4", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), 0, PARTITION_KEY, Optional.empty());
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> dynamicFilter4 = createDynamicFilterSupplier(filterValues, testColumnHandle4, "filter3");
    Optional<DynamicFilterSupplier> dynamicFilterSupplier4 = Optional.of(new DynamicFilterSupplier(dynamicFilter4, System.currentTimeMillis(), 0));
    HiveConfig config = new HiveConfig();
    HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER, getNoOpIndexCache(), getDefaultHiveSelectiveFactories(config));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    ConnectorTableHandle table = new HiveTableHandle("db", "table", ImmutableMap.of(), ImmutableList.of(), Optional.empty());
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle), dynamicFilterSupplier);
        assertFalse(result instanceof FixedPageSource);
    } catch (Exception e) {
        assertTrue(e instanceof PrestoException);
    }
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle2), dynamicFilterSupplier2);
        assertTrue(result instanceof FixedPageSource);
    } catch (Exception e) {
        fail("A FixedPageSource object should have been created");
    }
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle3), dynamicFilterSupplier3);
        assertFalse(result instanceof FixedPageSource);
    } catch (Exception e) {
        assertTrue(e instanceof PrestoException);
    }
    try {
        ConnectorPageSource result = provider.createPageSource(transaction, session, split, table, ImmutableList.of(testColumnHandle4), dynamicFilterSupplier4);
        assertFalse(result instanceof FixedPageSource);
    } catch (Exception e) {
        assertTrue(e instanceof PrestoException);
    }
}
Also used : PrestoException(io.prestosql.spi.PrestoException) Properties(java.util.Properties) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 8 with DynamicFilter

use of io.prestosql.spi.dynamicfilter.DynamicFilter in project hetu-core by openlookeng.

the class TestDynamicFilterCacheManager method testInvalidateCache.

@Test
public void testInvalidateCache() {
    DynamicFilterCacheManager cacheManager = new DynamicFilterCacheManager();
    String filterKey1 = createCacheKey("filter1", "query");
    DynamicFilter filter1 = mock(DynamicFilter.class);
    TaskId taskId = new TaskId("task1");
    cacheManager.registerTask(filterKey1, taskId);
    cacheManager.cacheDynamicFilter(filterKey1, filter1);
    assertEquals(cacheManager.getDynamicFilter(filterKey1), filter1, "filter1 should be cached");
    cacheManager.invalidateCache();
    assertNull(cacheManager.getDynamicFilter(filterKey1), "filter1 should be removed");
}
Also used : TaskId(io.prestosql.execution.TaskId) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) Test(org.testng.annotations.Test)

Example 9 with DynamicFilter

use of io.prestosql.spi.dynamicfilter.DynamicFilter in project hetu-core by openlookeng.

the class HivePageSource method getNextPage.

@Override
public Page getNextPage() {
    try {
        final List<Map<ColumnHandle, DynamicFilter>> dynamicFilters;
        if (dynamicFilterSupplier.isPresent()) {
            dynamicFilters = dynamicFilterSupplier.get().getDynamicFilters();
            // Wait for any dynamic filter
            if (dynamicFilters.isEmpty() && dynamicFilterSupplier.get().isBlocked()) {
                return null;
            }
            // Close the current PageSource if the partition should be filtered
            List<Set<DynamicFilter>> dynamicFilterList = new ArrayList();
            for (Map<ColumnHandle, DynamicFilter> df : dynamicFilters) {
                Set<DynamicFilter> values = df.values().stream().collect(Collectors.toSet());
                dynamicFilterList.add(values);
            }
            if (isPartitionFiltered(partitionKeys, dynamicFilterList, typeManager)) {
                close();
                return null;
            }
        } else {
            dynamicFilters = ImmutableList.of();
        }
        Page dataPage = delegate.getNextPage();
        if (dataPage == null) {
            return null;
        }
        // using the filter rows function
        if (!dynamicFilters.isEmpty()) {
            final List<Map<Integer, ColumnHandle>> eligibleColumns = getEligibleColumnsForRowFiltering(dataPage.getChannelCount(), dynamicFilters);
            if (!eligibleColumns.isEmpty()) {
                dataPage = filter(dynamicFilters, dataPage, eligibleColumns, types);
            }
        }
        if (bucketAdapter.isPresent()) {
            IntArrayList rowsToKeep = bucketAdapter.get().computeEligibleRowIds(dataPage);
            Block[] adaptedBlocks = new Block[dataPage.getChannelCount()];
            for (int i = 0; i < adaptedBlocks.length; i++) {
                Block block = dataPage.getBlock(i);
                if (block instanceof LazyBlock && !((LazyBlock) block).isLoaded()) {
                    adaptedBlocks[i] = new LazyBlock(rowsToKeep.size(), new RowFilterLazyBlockLoader(dataPage.getBlock(i), rowsToKeep.elements()));
                } else {
                    adaptedBlocks[i] = block.getPositions(rowsToKeep.elements(), 0, rowsToKeep.size());
                }
            }
            dataPage = new Page(rowsToKeep.size(), adaptedBlocks);
        }
        if (isSelectiveRead) {
            // FixMe(Rajeev) : Check way to optimize for prefilled fields.
            return dataPage;
        }
        int batchSize = dataPage.getPositionCount();
        List<Block> blocks = new ArrayList<>();
        for (int fieldId = 0; fieldId < columnMappings.size(); fieldId++) {
            ColumnMapping columnMapping = columnMappings.get(fieldId);
            switch(columnMapping.getKind()) {
                case PREFILLED:
                    blocks.add(RunLengthEncodedBlock.create(types[fieldId], prefilledValues[fieldId], batchSize));
                    break;
                case REGULAR:
                case TRANSACTIONID:
                    Block block = dataPage.getBlock(columnMapping.getIndex());
                    Optional<Function<Block, Block>> coercer = coercers.get(fieldId);
                    if (coercer.isPresent()) {
                        block = new LazyBlock(batchSize, new CoercionLazyBlockLoader(block, coercer.get()));
                    }
                    blocks.add(block);
                    break;
                case INTERIM:
                    // interim columns don't show up in output
                    break;
                default:
                    throw new UnsupportedOperationException();
            }
        }
        return new Page(batchSize, dataPage.getPageMetadata(), blocks.toArray(new Block[0]));
    } catch (PrestoException e) {
        closeWithSuppression(e);
        throw e;
    } catch (RuntimeException e) {
        closeWithSuppression(e);
        throw new PrestoException(HIVE_CURSOR_ERROR, e);
    }
}
Also used : Set(java.util.Set) ArrayList(java.util.ArrayList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Page(io.prestosql.spi.Page) PrestoException(io.prestosql.spi.PrestoException) Function(java.util.function.Function) ColumnMapping(io.prestosql.plugin.hive.HivePageSourceProvider.ColumnMapping) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) LazyBlock(io.prestosql.spi.block.LazyBlock) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) LazyBlock(io.prestosql.spi.block.LazyBlock) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HashMap(java.util.HashMap) Map(java.util.Map)

Example 10 with DynamicFilter

use of io.prestosql.spi.dynamicfilter.DynamicFilter in project hetu-core by openlookeng.

the class HivePageSource method filterRows.

private static boolean[] filterRows(List<Map<ColumnHandle, DynamicFilter>> dynamicFilters, Page page, List<Map<Integer, ColumnHandle>> eligibleColumns, Type[] types) {
    boolean[] result = new boolean[page.getPositionCount()];
    Arrays.fill(result, Boolean.FALSE);
    // loop to handle union of filters if any
    for (int j = 0; j < dynamicFilters.size(); j++) {
        boolean[] filterResult = new boolean[page.getPositionCount()];
        Arrays.fill(filterResult, Boolean.TRUE);
        for (Map.Entry<Integer, ColumnHandle> column : eligibleColumns.get(j).entrySet()) {
            final int columnIndex = column.getKey();
            final ColumnHandle columnHandle = column.getValue();
            final DynamicFilter dynamicFilter = dynamicFilters.get(j).get(columnHandle);
            final Block block = page.getBlock(columnIndex).getLoadedBlock();
            if (dynamicFilter instanceof BloomFilterDynamicFilter) {
                block.filter(((BloomFilterDynamicFilter) dynamicFilters.get(j).get(columnHandle)).getBloomFilterDeserialized(), filterResult);
            } else {
                for (int i = 0; i < block.getPositionCount(); i++) {
                    filterResult[i] = filterResult[i] && dynamicFilter.contains(TypeUtils.readNativeValue(types[columnIndex], block, i));
                }
            }
        }
        // apply union of last filter
        for (Map.Entry<Integer, ColumnHandle> column : eligibleColumns.get(j).entrySet()) {
            final int columnIndex = column.getKey();
            final Block block = page.getBlock(columnIndex).getLoadedBlock();
            for (int i = 0; i < block.getPositionCount(); i++) {
                result[i] = result[i] || filterResult[i];
            }
        }
    }
    return result;
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) LazyBlock(io.prestosql.spi.block.LazyBlock) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)34 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)26 Map (java.util.Map)16 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)15 ArrayList (java.util.ArrayList)14 Test (org.testng.annotations.Test)14 Set (java.util.Set)13 ImmutableList (com.google.common.collect.ImmutableList)11 HashSet (java.util.HashSet)11 List (java.util.List)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 PrestoException (io.prestosql.spi.PrestoException)9 TypeManager (io.prestosql.spi.type.TypeManager)9 BloomFilter (io.prestosql.spi.util.BloomFilter)9 HashMap (java.util.HashMap)9 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)7 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)7 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)7 DynamicFilterSupplier (io.prestosql.spi.dynamicfilter.DynamicFilterSupplier)7 HashSetDynamicFilter (io.prestosql.spi.dynamicfilter.HashSetDynamicFilter)7