Search in sources :

Example 21 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestBloomIndex method testMmapUse.

@Test
public void testMmapUse() throws IOException {
    // experiment test to understand the performance of using mmap
    try (TempFolder folder = new TempFolder();
        BloomIndex objectBloomIndex = new BloomIndex();
        BloomIndex bloomIndexMemory = new BloomIndex();
        BloomIndex bloomIndexMmap = new BloomIndex();
        BloomIndex objectBloomIndexString = new BloomIndex();
        BloomIndex objectBloomIndexDouble = new BloomIndex();
        BloomIndex bloomIndexMemoryString = new BloomIndex();
        BloomIndex bloomIndexMemoryDouble = new BloomIndex();
        BloomIndex bloomIndexMmapDouble = new BloomIndex();
        BloomIndex bloomIndexMmapString = new BloomIndex()) {
        folder.create();
        int dataEntryNum = 2000000;
        int queryNum = 10000;
        long startTime;
        long stopTime;
        long elapsedTime;
        // compare the performance on int data with 2000000 values
        File testFile = folder.newFile("int");
        objectBloomIndex.setExpectedNumOfEntries(dataEntryNum);
        Random rd = new Random();
        List<Integer> arr = new ArrayList<>();
        for (int i = 0; i < dataEntryNum; i++) {
            arr.add(rd.nextInt());
        }
        objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arr))));
        try (FileOutputStream fo = new FileOutputStream(testFile)) {
            objectBloomIndex.serialize(fo);
        }
        bloomIndexMemory.setMmapEnabled(false);
        bloomIndexMemory.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFile)) {
            bloomIndexMemory.deserialize(fi);
        }
        bloomIndexMmap.setMmapEnabled(true);
        bloomIndexMmap.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFile)) {
            bloomIndexMmap.deserialize(fi);
        }
        System.out.println(testFile);
        Random rdTest = new Random();
        // get query time using memory
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            int testNum = rdTest.nextInt();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
            bloomIndexMemory.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        // get query time using mmap
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            int testNum = rdTest.nextInt();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
            bloomIndexMmap.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        BloomFilter memoryFilter = bloomIndexMemory.getFilter();
        BloomFilter mmapFilter = bloomIndexMmap.getFilter();
        assertEquals(mmapFilter, memoryFilter);
        long usage1 = bloomIndexMemory.getMemoryUsage();
        long usage2 = bloomIndexMmap.getMemoryUsage();
        assertTrue(usage1 > usage2, "mmap should use less memory.");
        long fileUsage1 = bloomIndexMemory.getDiskUsage();
        long fileUsage2 = bloomIndexMmap.getDiskUsage();
        assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
        // compare the performance on double data with 2000000 entries
        File testFileDouble = folder.newFile("double");
        objectBloomIndexDouble.setExpectedNumOfEntries(dataEntryNum);
        Random rdDouble = new Random();
        List<Double> arrDouble = new ArrayList<>();
        for (int i = 0; i < dataEntryNum; i++) {
            arrDouble.add(rdDouble.nextDouble());
        }
        objectBloomIndexDouble.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrDouble))));
        try (FileOutputStream fo = new FileOutputStream(testFileDouble)) {
            objectBloomIndexDouble.serialize(fo);
        }
        bloomIndexMemoryDouble.setMmapEnabled(false);
        bloomIndexMemoryDouble.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileDouble)) {
            bloomIndexMemoryDouble.deserialize(fi);
        }
        bloomIndexMmapDouble.setMmapEnabled(true);
        bloomIndexMmapDouble.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileDouble)) {
            bloomIndexMmapDouble.deserialize(fi);
        }
        System.out.println(testFileDouble);
        Random rdTestDouble = new Random();
        // get query time using memory
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            double testDouble = rdTestDouble.nextDouble();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
            bloomIndexMemoryDouble.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        // get query time using mmap
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            double testDouble = rdTestDouble.nextDouble();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
            bloomIndexMmapDouble.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        memoryFilter = bloomIndexMemoryDouble.getFilter();
        mmapFilter = bloomIndexMmapDouble.getFilter();
        assertEquals(mmapFilter, memoryFilter);
        usage1 = bloomIndexMemoryDouble.getMemoryUsage();
        usage2 = bloomIndexMmapDouble.getMemoryUsage();
        assertTrue(usage1 > usage2, "mmap should use less memory.");
        fileUsage1 = bloomIndexMemoryDouble.getDiskUsage();
        fileUsage2 = bloomIndexMmapDouble.getDiskUsage();
        assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
        // compare the performance on UUID string with 2000000 entries
        File testFileString = folder.newFile("string");
        objectBloomIndexString.setExpectedNumOfEntries(dataEntryNum);
        List<String> arrString = new ArrayList<>();
        for (int i = 0; i < dataEntryNum; i++) {
            arrString.add(UUID.randomUUID().toString());
        }
        objectBloomIndexString.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrString))));
        try (FileOutputStream fo = new FileOutputStream(testFileString)) {
            objectBloomIndexString.serialize(fo);
        }
        bloomIndexMemoryString.setMmapEnabled(false);
        bloomIndexMemoryString.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileString)) {
            bloomIndexMemoryString.deserialize(fi);
        }
        bloomIndexMmapString.setMmapEnabled(true);
        bloomIndexMmapString.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileString)) {
            bloomIndexMmapString.deserialize(fi);
        }
        System.out.println(testFileString);
        // get query time using memory
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            String testString = UUID.randomUUID().toString();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
            bloomIndexMemoryString.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        // get query time using mmap
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            String testString = UUID.randomUUID().toString();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
            bloomIndexMmapString.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        memoryFilter = bloomIndexMemoryString.getFilter();
        mmapFilter = bloomIndexMmapString.getFilter();
        assertEquals(mmapFilter, memoryFilter);
        usage1 = bloomIndexMemoryString.getMemoryUsage();
        usage2 = bloomIndexMmapString.getMemoryUsage();
        assertTrue(usage1 > usage2, "mmap should use less memory.");
        fileUsage1 = bloomIndexMemoryString.getDiskUsage();
        fileUsage2 = bloomIndexMmapString.getDiskUsage();
        assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
    }
}
Also used : ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) BloomFilter(io.prestosql.spi.util.BloomFilter) Random(java.util.Random) TempFolder(io.hetu.core.common.filesystem.TempFolder) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 22 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class BloomFilterUtils method filter.

/**
 * filter page by BloomFilter
 *
 * @param page source data page
 * @param bloomFilterMap bloom filter map
 * @return return filtered page
 */
public static Page filter(Page page, Map<Integer, BloomFilter> bloomFilterMap) {
    boolean[] result = new boolean[page.getPositionCount()];
    Arrays.fill(result, Boolean.TRUE);
    for (Map.Entry<Integer, BloomFilter> entry : bloomFilterMap.entrySet()) {
        int columnIndex = entry.getKey();
        Block block = page.getBlock(columnIndex).getLoadedBlock();
        block.filter(entry.getValue(), result);
    }
    Block[] adaptedBlocks = new Block[page.getChannelCount()];
    int[] rowsToKeep = toPositions(result);
    if (rowsToKeep.length == page.getPositionCount()) {
        return page;
    }
    for (int i = 0; i < adaptedBlocks.length; i++) {
        Block block = page.getBlock(i);
        if (block instanceof LazyBlock && !((LazyBlock) block).isLoaded()) {
            adaptedBlocks[i] = new LazyBlock(rowsToKeep.length, new RowFilterLazyBlockLoader(page.getBlock(i), rowsToKeep));
        } else {
            adaptedBlocks[i] = block.getPositions(rowsToKeep, 0, rowsToKeep.length);
        }
    }
    return new Page(rowsToKeep.length, adaptedBlocks);
}
Also used : LazyBlock(io.prestosql.spi.block.LazyBlock) Block(io.prestosql.spi.block.Block) LazyBlock(io.prestosql.spi.block.LazyBlock) Page(io.prestosql.spi.Page) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(io.prestosql.spi.statestore.StateMap) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 23 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestLogicalPart method getBloomFilterEqualityResult.

private List<Page> getBloomFilterEqualityResult(LogicalPart logicalPart, Long domainValue, boolean bloomFilterReturn) {
    Domain domain = Domain.create(ValueSet.ofRanges(equal(IntegerType.INTEGER, domainValue)), false);
    Map<Integer, BloomFilter> bloomIdx = new HashMap<>();
    bloomIdx.put(0, mock(BloomFilter.class));
    ReflectionTestUtils.setField(logicalPart, "bloomIdx", bloomIdx);
    LogicalPart spyLogicalPart = spy(logicalPart);
    doReturn(bloomFilterReturn).when(spyLogicalPart).testFilter(Mockito.any(BloomFilter.class), eq(domainValue));
    return spyLogicalPart.getPages(Collections.emptyMap(), ImmutableMap.of(0, ((SortedRangeSet) domain.getValues()).getOrderedRanges()), Collections.emptyMap());
}
Also used : SortedRangeSet(io.prestosql.spi.predicate.SortedRangeSet) HashMap(java.util.HashMap) Domain(io.prestosql.spi.predicate.Domain) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 24 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestDynamicFilterSupplier method testNotNullDynamicFilter.

@Test(description = "get dynamic-filter when supplier is not null")
void testNotNullDynamicFilter() throws IOException {
    // construct a supplier
    List<Long> filterValues = ImmutableList.of(1L, 50L, 100L);
    ColumnHandle testColumnHandle = new TestingColumnHandle("test");
    BloomFilter filter = new BloomFilter(filterValues.size(), 0.01);
    for (Long value : filterValues) {
        filter.add(value);
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    filter.writeTo(out);
    DynamicFilter dynamicFilter = DynamicFilterFactory.create("testFilter", testColumnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
    Supplier<List<Map<ColumnHandle, DynamicFilter>>> supplier = () -> ImmutableList.of(ImmutableMap.of(testColumnHandle, dynamicFilter));
    DynamicFilterSupplier theSupplier = new DynamicFilterSupplier(supplier, System.currentTimeMillis(), 10000);
    assertEquals(theSupplier.getDynamicFilters(), supplier.get());
}
Also used : TestingColumnHandle(io.prestosql.spi.connector.TestingColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) TestingColumnHandle(io.prestosql.spi.connector.TestingColumnHandle) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BloomFilter(io.prestosql.spi.util.BloomFilter) Test(org.testng.annotations.Test)

Example 25 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class IntArrayBlockTest method testFilter.

public void testFilter(int size) {
    int count = 1024;
    IntArrayBlock block1 = new IntArrayBlock(count, Optional.empty(), getValues(count));
    IntArrayBlock block2 = new IntArrayBlock(count, Optional.empty(), getValues(count));
    BloomFilter bf1 = getBf(size);
    BloomFilter bf2 = getBf(size);
    long total1 = 0;
    long total2 = 0;
    int value = 0;
    for (int j = 0; j < 100; j++) {
        boolean[] result1 = new boolean[count];
        boolean[] result2 = new boolean[count];
        Arrays.fill(result1, Boolean.TRUE);
        Arrays.fill(result2, Boolean.TRUE);
        long start = System.nanoTime();
        for (int i = 0; i < count; i++) {
            value = block1.getInt(i, 0);
            result1[i] = bf1.test(value);
        }
        total1 += System.nanoTime() - start;
        start = System.nanoTime();
        block2.filter(bf2, result2);
        total2 += System.nanoTime() - start;
        for (int i = 0; i < count; i++) {
            if (result1[i] != result2[i]) {
                throw new RuntimeException("error" + i);
            }
        }
    }
    System.out.println("bfsize: " + size + "  origi: " + total1);
    System.out.println("bfsize: " + size + "  block: " + total2);
}
Also used : BloomFilter(io.prestosql.spi.util.BloomFilter)

Aggregations

BloomFilter (io.prestosql.spi.util.BloomFilter)26 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)9 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)9 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 Test (org.testng.annotations.Test)7 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 Map (java.util.Map)6 Page (io.prestosql.spi.Page)5 IOException (java.io.IOException)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Block (io.prestosql.spi.block.Block)3 Symbol (io.prestosql.spi.plan.Symbol)3 StateSet (io.prestosql.spi.statestore.StateSet)3 List (java.util.List)3 ImmutableList (com.google.common.collect.ImmutableList)2 BlockBuilder (io.prestosql.spi.block.BlockBuilder)2 LongArrayBlockBuilder (io.prestosql.spi.block.LongArrayBlockBuilder)2