Search in sources :

Example 6 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class VariableWidthBlockTest method getBf.

private BloomFilter getBf(int size) {
    Random rnd = new Random();
    BloomFilter bf = new BloomFilter(size, 0.01);
    for (int i = 0; i < 100; i++) {
        bf.test(("value" + rnd.nextLong()).getBytes());
    }
    return bf;
}
Also used : Random(java.util.Random) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 7 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class VariableWidthBlockTest method testFilter.

public void testFilter(int size) {
    int count = 1024;
    boolean[] valid = new boolean[count];
    Arrays.fill(valid, Boolean.TRUE);
    VariableWidthBlock block = getBlock();
    String[] values = new String[block.getPositionCount()];
    BloomFilter bf1 = getBf(size);
    BloomFilter bf2 = getBf(size);
    for (int i = 0; i < block.getPositionCount(); i++) {
        values[i] = block.getString(i, 0, 0);
    }
    for (int j = 0; j < 10; j++) {
        long start = System.nanoTime();
        for (int i = 0; i < count; i++) {
            bf1.test(block.getString(i, 0, 0).getBytes());
        }
        System.out.println("original: " + (System.nanoTime() - start));
        start = System.nanoTime();
        block.filter(bf2, valid);
        System.out.println("   block: " + (System.nanoTime() - start));
    }
}
Also used : BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 8 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class LogicalPart method process.

void process() {
    switch(processingState.get()) {
        case ACCEPTING_PAGES:
        case PROCESSING:
        case COMPLETED:
            return;
    }
    processingState.set(LogicalPartState.PROCESSING);
    // sort and create sparse index
    if (!sortChannels.isEmpty()) {
        SortBuffer sortBuffer = new SortBuffer(new DataSize(maxLogicalPartBytes, DataSize.Unit.BYTE), types, sortChannels, sortOrders, pageSorter, maxPageSizeBytes);
        pages.forEach(sortBuffer::add);
        List<Page> sortedPages = new ArrayList<>();
        sortBuffer.flushTo(sortedPages::add);
        // create index
        int newRowCount = 0;
        long newByteSize = 0;
        for (int i = 0; i < sortedPages.size(); i++) {
            Page page = sortedPages.get(i);
            newByteSize += page.getSizeInBytes();
            newRowCount += page.getPositionCount();
            Object value = getNativeValue(types.get(sortChannels.get(0)), page.getBlock(sortChannels.get(0)), 0);
            if (value != null) {
                if (!(value instanceof Comparable)) {
                    throw new RuntimeException(String.format(Locale.ENGLISH, "Unable to create sparse index for channel %d, type is not Comparable.", sortChannels.get(0)));
                }
                sparseIdx.computeIfAbsent((Comparable) value, e -> new SparseValue(new ArrayList<>())).getPageIndices().add(i);
            }
        }
        for (SparseValue sparseValue : sparseIdx.values()) {
            int lastPageIndex = sparseValue.getPageIndices().get(sparseValue.getPageIndices().size() - 1);
            Page lastPage = sortedPages.get(lastPageIndex);
            sparseValue.setLast((Comparable) getNativeValue(types.get(sortChannels.get(0)), lastPage.getBlock(sortChannels.get(0)), lastPage.getPositionCount() - 1));
        }
        if (newRowCount != rows) {
            throw new RuntimeException("Pages mismatch while processing");
        }
        // create minmax index for sort column
        Page firstPage = sortedPages.get(0);
        Page lastPage = sortedPages.get(sortedPages.size() - 1);
        Object minValue = getNativeValue(types.get(sortChannels.get(0)), firstPage.getBlock(sortChannels.get(0)), 0);
        Object maxValue = getNativeValue(types.get(sortChannels.get(0)), lastPage.getBlock(sortChannels.get(0)), lastPage.getPositionCount() - 1);
        if (minValue instanceof Comparable && maxValue instanceof Comparable) {
            minMaxIdx.put(sortChannels.get(0), new AbstractMap.SimpleEntry<>((Comparable) minValue, (Comparable) maxValue));
        }
        this.byteSize = newByteSize;
        // help triggering GC of old pages
        this.pages.clear();
        this.pages = sortedPages;
    }
    // create bloom index on index columns
    for (Integer indexChannel : indexChannels) {
        Set<Object> values = new HashSet<>();
        for (Page page : getPages()) {
            for (int i = 0; i < page.getPositionCount(); i++) {
                Object value = getNativeValue(types.get(indexChannel), page.getBlock(indexChannel), i);
                if (value != null) {
                    values.add(value);
                }
            }
        }
        BloomFilter filter = values.size() == 0 ? null : new BloomFilter(values.size(), 0.05);
        boolean unsupportedValue = false;
        // if the column is being sorted on, we already have min-max values by looking at the
        // first and last value of the pages, so we can save some computation by skipping this step
        // however, if the column is not being sorted on, the min-max values will need to be
        // determined by doing comparisons
        boolean createMinMax = !minMaxIdx.containsKey(indexChannel);
        Comparable min = null;
        Comparable max = null;
        for (Object value : values) {
            if (createMinMax && value instanceof Comparable) {
                Comparable comparableValue = (Comparable) value;
                min = min(min, comparableValue);
                max = max(max, comparableValue);
            }
            if (!addToFilter(filter, value)) {
                LOG.warn("Unsupported index column type %s", value.getClass().getSimpleName());
                unsupportedValue = true;
                min = null;
                max = null;
                break;
            }
        }
        if (min != null && max != null) {
            minMaxIdx.put(indexChannel, new AbstractMap.SimpleEntry<>(min, max));
        }
        if (unsupportedValue) {
            continue;
        }
        bloomIdx.put(indexChannel, filter);
    }
    try {
        writePages();
    } catch (Exception e) {
        LOG.error("Error spilling LogicalPart " + getPageFileName() + " to disk. Restoring will be unavailable.", e);
    }
    this.processingState.set(LogicalPartState.COMPLETED);
}
Also used : ArrayList(java.util.ArrayList) Page(io.prestosql.spi.Page) BloomFilter(io.prestosql.spi.util.BloomFilter) IOException(java.io.IOException) AbstractMap(java.util.AbstractMap) DataSize(io.airlift.units.DataSize) HashSet(java.util.HashSet)

Example 9 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestHiveUtil method testIsPartitionFiltered.

@Test
public void testIsPartitionFiltered() {
    TypeManager typeManager = new TestingTypeManager();
    assertFalse(isPartitionFiltered(null, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    Set<DynamicFilter> dynamicFilters = new HashSet<>();
    List<HivePartitionKey> partitions = new ArrayList<>();
    assertFalse(isPartitionFiltered(partitions, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    assertFalse(isPartitionFiltered(null, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partitions and dynamicFilters are empty");
    partitions.add(new HivePartitionKey("pt_d", "0"));
    partitions.add(new HivePartitionKey("app_id", "10000"));
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if dynamicFilters is empty");
    ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_LONG, parseTypeSignature(BIGINT), 0, PARTITION_KEY, Optional.empty());
    BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
    dynamicFilters.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
    assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if any dynamicFilter has 0 element count");
    dayFilter.add(1L);
    assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if partition value not in dynamicFilter");
    dayFilter.add(0L);
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partition value is in dynamicFilter");
    Set<DynamicFilter> dynamicFilters1 = new HashSet<>();
    BloomFilter dayFilter1 = new BloomFilter(1024 * 1024, 0.01);
    dynamicFilters1.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter1, DynamicFilter.Type.GLOBAL));
    dayFilter1.add(0L);
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters1), typeManager), "Should not filter partition if partition value is in dynamicFilter");
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) HashSetDynamicFilter(io.prestosql.spi.dynamicfilter.HashSetDynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) ArrayList(java.util.ArrayList) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) TypeManager(io.prestosql.spi.type.TypeManager) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) BloomFilter(io.prestosql.spi.util.BloomFilter) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 10 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestDynamicFilterServiceWithBloomFilter method mockLocalDynamicFilter.

private void mockLocalDynamicFilter(String taskId, String filterId, String queryId, List<String> values) {
    BloomFilter bloomFilter = new BloomFilter(1024 * 1024, 0.1);
    for (String val : values) {
        bloomFilter.add(val.getBytes(StandardCharsets.UTF_8));
    }
    String key = DynamicFilterUtils.createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId);
    try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
        bloomFilter.writeTo(out);
        byte[] finalOutput = out.toByteArray();
        ((StateSet) stateStoreProvider.getStateStore().getStateCollection(key)).add(finalOutput);
        ((StateSet) stateStoreProvider.getStateStore().getStateCollection(DynamicFilterUtils.createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId))).add(taskId);
    } catch (IOException e) {
        Assert.fail("could not register finish filter, Exception happened:" + e.getMessage());
    }
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) StateSet(io.prestosql.spi.statestore.StateSet) BloomFilter(io.prestosql.spi.util.BloomFilter)

Aggregations

BloomFilter (io.prestosql.spi.util.BloomFilter)26 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)9 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)9 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 Test (org.testng.annotations.Test)7 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 Map (java.util.Map)6 Page (io.prestosql.spi.Page)5 IOException (java.io.IOException)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Block (io.prestosql.spi.block.Block)3 Symbol (io.prestosql.spi.plan.Symbol)3 StateSet (io.prestosql.spi.statestore.StateSet)3 List (java.util.List)3 ImmutableList (com.google.common.collect.ImmutableList)2 BlockBuilder (io.prestosql.spi.block.BlockBuilder)2 LongArrayBlockBuilder (io.prestosql.spi.block.LongArrayBlockBuilder)2