use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestBloomIndex method testMmapUse.
@Test
public void testMmapUse() throws IOException {
// experiment test to understand the performance of using mmap
try (TempFolder folder = new TempFolder();
BloomIndex objectBloomIndex = new BloomIndex();
BloomIndex bloomIndexMemory = new BloomIndex();
BloomIndex bloomIndexMmap = new BloomIndex();
BloomIndex objectBloomIndexString = new BloomIndex();
BloomIndex objectBloomIndexDouble = new BloomIndex();
BloomIndex bloomIndexMemoryString = new BloomIndex();
BloomIndex bloomIndexMemoryDouble = new BloomIndex();
BloomIndex bloomIndexMmapDouble = new BloomIndex();
BloomIndex bloomIndexMmapString = new BloomIndex()) {
folder.create();
int dataEntryNum = 2000000;
int queryNum = 10000;
long startTime;
long stopTime;
long elapsedTime;
// compare the performance on int data with 2000000 values
File testFile = folder.newFile("int");
objectBloomIndex.setExpectedNumOfEntries(dataEntryNum);
Random rd = new Random();
List<Integer> arr = new ArrayList<>();
for (int i = 0; i < dataEntryNum; i++) {
arr.add(rd.nextInt());
}
objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arr))));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
objectBloomIndex.serialize(fo);
}
bloomIndexMemory.setMmapEnabled(false);
bloomIndexMemory.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFile)) {
bloomIndexMemory.deserialize(fi);
}
bloomIndexMmap.setMmapEnabled(true);
bloomIndexMmap.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFile)) {
bloomIndexMmap.deserialize(fi);
}
System.out.println(testFile);
Random rdTest = new Random();
// get query time using memory
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
int testNum = rdTest.nextInt();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
bloomIndexMemory.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
// get query time using mmap
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
int testNum = rdTest.nextInt();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
bloomIndexMmap.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
BloomFilter memoryFilter = bloomIndexMemory.getFilter();
BloomFilter mmapFilter = bloomIndexMmap.getFilter();
assertEquals(mmapFilter, memoryFilter);
long usage1 = bloomIndexMemory.getMemoryUsage();
long usage2 = bloomIndexMmap.getMemoryUsage();
assertTrue(usage1 > usage2, "mmap should use less memory.");
long fileUsage1 = bloomIndexMemory.getDiskUsage();
long fileUsage2 = bloomIndexMmap.getDiskUsage();
assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
// compare the performance on double data with 2000000 entries
File testFileDouble = folder.newFile("double");
objectBloomIndexDouble.setExpectedNumOfEntries(dataEntryNum);
Random rdDouble = new Random();
List<Double> arrDouble = new ArrayList<>();
for (int i = 0; i < dataEntryNum; i++) {
arrDouble.add(rdDouble.nextDouble());
}
objectBloomIndexDouble.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrDouble))));
try (FileOutputStream fo = new FileOutputStream(testFileDouble)) {
objectBloomIndexDouble.serialize(fo);
}
bloomIndexMemoryDouble.setMmapEnabled(false);
bloomIndexMemoryDouble.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileDouble)) {
bloomIndexMemoryDouble.deserialize(fi);
}
bloomIndexMmapDouble.setMmapEnabled(true);
bloomIndexMmapDouble.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileDouble)) {
bloomIndexMmapDouble.deserialize(fi);
}
System.out.println(testFileDouble);
Random rdTestDouble = new Random();
// get query time using memory
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
double testDouble = rdTestDouble.nextDouble();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
bloomIndexMemoryDouble.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
// get query time using mmap
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
double testDouble = rdTestDouble.nextDouble();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
bloomIndexMmapDouble.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
memoryFilter = bloomIndexMemoryDouble.getFilter();
mmapFilter = bloomIndexMmapDouble.getFilter();
assertEquals(mmapFilter, memoryFilter);
usage1 = bloomIndexMemoryDouble.getMemoryUsage();
usage2 = bloomIndexMmapDouble.getMemoryUsage();
assertTrue(usage1 > usage2, "mmap should use less memory.");
fileUsage1 = bloomIndexMemoryDouble.getDiskUsage();
fileUsage2 = bloomIndexMmapDouble.getDiskUsage();
assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
// compare the performance on UUID string with 2000000 entries
File testFileString = folder.newFile("string");
objectBloomIndexString.setExpectedNumOfEntries(dataEntryNum);
List<String> arrString = new ArrayList<>();
for (int i = 0; i < dataEntryNum; i++) {
arrString.add(UUID.randomUUID().toString());
}
objectBloomIndexString.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrString))));
try (FileOutputStream fo = new FileOutputStream(testFileString)) {
objectBloomIndexString.serialize(fo);
}
bloomIndexMemoryString.setMmapEnabled(false);
bloomIndexMemoryString.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileString)) {
bloomIndexMemoryString.deserialize(fi);
}
bloomIndexMmapString.setMmapEnabled(true);
bloomIndexMmapString.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileString)) {
bloomIndexMmapString.deserialize(fi);
}
System.out.println(testFileString);
// get query time using memory
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
String testString = UUID.randomUUID().toString();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
bloomIndexMemoryString.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
// get query time using mmap
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
String testString = UUID.randomUUID().toString();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
bloomIndexMmapString.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
memoryFilter = bloomIndexMemoryString.getFilter();
mmapFilter = bloomIndexMmapString.getFilter();
assertEquals(mmapFilter, memoryFilter);
usage1 = bloomIndexMemoryString.getMemoryUsage();
usage2 = bloomIndexMmapString.getMemoryUsage();
assertTrue(usage1 > usage2, "mmap should use less memory.");
fileUsage1 = bloomIndexMemoryString.getDiskUsage();
fileUsage2 = bloomIndexMmapString.getDiskUsage();
assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
}
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class BloomFilterUtils method filter.
/**
* filter page by BloomFilter
*
* @param page source data page
* @param bloomFilterMap bloom filter map
* @return return filtered page
*/
public static Page filter(Page page, Map<Integer, BloomFilter> bloomFilterMap) {
boolean[] result = new boolean[page.getPositionCount()];
Arrays.fill(result, Boolean.TRUE);
for (Map.Entry<Integer, BloomFilter> entry : bloomFilterMap.entrySet()) {
int columnIndex = entry.getKey();
Block block = page.getBlock(columnIndex).getLoadedBlock();
block.filter(entry.getValue(), result);
}
Block[] adaptedBlocks = new Block[page.getChannelCount()];
int[] rowsToKeep = toPositions(result);
if (rowsToKeep.length == page.getPositionCount()) {
return page;
}
for (int i = 0; i < adaptedBlocks.length; i++) {
Block block = page.getBlock(i);
if (block instanceof LazyBlock && !((LazyBlock) block).isLoaded()) {
adaptedBlocks[i] = new LazyBlock(rowsToKeep.length, new RowFilterLazyBlockLoader(page.getBlock(i), rowsToKeep));
} else {
adaptedBlocks[i] = block.getPositions(rowsToKeep, 0, rowsToKeep.length);
}
}
return new Page(rowsToKeep.length, adaptedBlocks);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestLogicalPart method getBloomFilterEqualityResult.
private List<Page> getBloomFilterEqualityResult(LogicalPart logicalPart, Long domainValue, boolean bloomFilterReturn) {
Domain domain = Domain.create(ValueSet.ofRanges(equal(IntegerType.INTEGER, domainValue)), false);
Map<Integer, BloomFilter> bloomIdx = new HashMap<>();
bloomIdx.put(0, mock(BloomFilter.class));
ReflectionTestUtils.setField(logicalPart, "bloomIdx", bloomIdx);
LogicalPart spyLogicalPart = spy(logicalPart);
doReturn(bloomFilterReturn).when(spyLogicalPart).testFilter(Mockito.any(BloomFilter.class), eq(domainValue));
return spyLogicalPart.getPages(Collections.emptyMap(), ImmutableMap.of(0, ((SortedRangeSet) domain.getValues()).getOrderedRanges()), Collections.emptyMap());
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestDynamicFilterSupplier method testNotNullDynamicFilter.
@Test(description = "get dynamic-filter when supplier is not null")
void testNotNullDynamicFilter() throws IOException {
// construct a supplier
List<Long> filterValues = ImmutableList.of(1L, 50L, 100L);
ColumnHandle testColumnHandle = new TestingColumnHandle("test");
BloomFilter filter = new BloomFilter(filterValues.size(), 0.01);
for (Long value : filterValues) {
filter.add(value);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
filter.writeTo(out);
DynamicFilter dynamicFilter = DynamicFilterFactory.create("testFilter", testColumnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
Supplier<List<Map<ColumnHandle, DynamicFilter>>> supplier = () -> ImmutableList.of(ImmutableMap.of(testColumnHandle, dynamicFilter));
DynamicFilterSupplier theSupplier = new DynamicFilterSupplier(supplier, System.currentTimeMillis(), 10000);
assertEquals(theSupplier.getDynamicFilters(), supplier.get());
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class IntArrayBlockTest method testFilter.
public void testFilter(int size) {
int count = 1024;
IntArrayBlock block1 = new IntArrayBlock(count, Optional.empty(), getValues(count));
IntArrayBlock block2 = new IntArrayBlock(count, Optional.empty(), getValues(count));
BloomFilter bf1 = getBf(size);
BloomFilter bf2 = getBf(size);
long total1 = 0;
long total2 = 0;
int value = 0;
for (int j = 0; j < 100; j++) {
boolean[] result1 = new boolean[count];
boolean[] result2 = new boolean[count];
Arrays.fill(result1, Boolean.TRUE);
Arrays.fill(result2, Boolean.TRUE);
long start = System.nanoTime();
for (int i = 0; i < count; i++) {
value = block1.getInt(i, 0);
result1[i] = bf1.test(value);
}
total1 += System.nanoTime() - start;
start = System.nanoTime();
block2.filter(bf2, result2);
total2 += System.nanoTime() - start;
for (int i = 0; i < count; i++) {
if (result1[i] != result2[i]) {
throw new RuntimeException("error" + i);
}
}
}
System.out.println("bfsize: " + size + " origi: " + total1);
System.out.println("bfsize: " + size + " block: " + total2);
}
Aggregations