use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class VariableWidthBlockTest method getBf.
private BloomFilter getBf(int size) {
Random rnd = new Random();
BloomFilter bf = new BloomFilter(size, 0.01);
for (int i = 0; i < 100; i++) {
bf.test(("value" + rnd.nextLong()).getBytes());
}
return bf;
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class VariableWidthBlockTest method testFilter.
public void testFilter(int size) {
int count = 1024;
boolean[] valid = new boolean[count];
Arrays.fill(valid, Boolean.TRUE);
VariableWidthBlock block = getBlock();
String[] values = new String[block.getPositionCount()];
BloomFilter bf1 = getBf(size);
BloomFilter bf2 = getBf(size);
for (int i = 0; i < block.getPositionCount(); i++) {
values[i] = block.getString(i, 0, 0);
}
for (int j = 0; j < 10; j++) {
long start = System.nanoTime();
for (int i = 0; i < count; i++) {
bf1.test(block.getString(i, 0, 0).getBytes());
}
System.out.println("original: " + (System.nanoTime() - start));
start = System.nanoTime();
block.filter(bf2, valid);
System.out.println(" block: " + (System.nanoTime() - start));
}
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class LogicalPart method process.
void process() {
switch(processingState.get()) {
case ACCEPTING_PAGES:
case PROCESSING:
case COMPLETED:
return;
}
processingState.set(LogicalPartState.PROCESSING);
// sort and create sparse index
if (!sortChannels.isEmpty()) {
SortBuffer sortBuffer = new SortBuffer(new DataSize(maxLogicalPartBytes, DataSize.Unit.BYTE), types, sortChannels, sortOrders, pageSorter, maxPageSizeBytes);
pages.forEach(sortBuffer::add);
List<Page> sortedPages = new ArrayList<>();
sortBuffer.flushTo(sortedPages::add);
// create index
int newRowCount = 0;
long newByteSize = 0;
for (int i = 0; i < sortedPages.size(); i++) {
Page page = sortedPages.get(i);
newByteSize += page.getSizeInBytes();
newRowCount += page.getPositionCount();
Object value = getNativeValue(types.get(sortChannels.get(0)), page.getBlock(sortChannels.get(0)), 0);
if (value != null) {
if (!(value instanceof Comparable)) {
throw new RuntimeException(String.format(Locale.ENGLISH, "Unable to create sparse index for channel %d, type is not Comparable.", sortChannels.get(0)));
}
sparseIdx.computeIfAbsent((Comparable) value, e -> new SparseValue(new ArrayList<>())).getPageIndices().add(i);
}
}
for (SparseValue sparseValue : sparseIdx.values()) {
int lastPageIndex = sparseValue.getPageIndices().get(sparseValue.getPageIndices().size() - 1);
Page lastPage = sortedPages.get(lastPageIndex);
sparseValue.setLast((Comparable) getNativeValue(types.get(sortChannels.get(0)), lastPage.getBlock(sortChannels.get(0)), lastPage.getPositionCount() - 1));
}
if (newRowCount != rows) {
throw new RuntimeException("Pages mismatch while processing");
}
// create minmax index for sort column
Page firstPage = sortedPages.get(0);
Page lastPage = sortedPages.get(sortedPages.size() - 1);
Object minValue = getNativeValue(types.get(sortChannels.get(0)), firstPage.getBlock(sortChannels.get(0)), 0);
Object maxValue = getNativeValue(types.get(sortChannels.get(0)), lastPage.getBlock(sortChannels.get(0)), lastPage.getPositionCount() - 1);
if (minValue instanceof Comparable && maxValue instanceof Comparable) {
minMaxIdx.put(sortChannels.get(0), new AbstractMap.SimpleEntry<>((Comparable) minValue, (Comparable) maxValue));
}
this.byteSize = newByteSize;
// help triggering GC of old pages
this.pages.clear();
this.pages = sortedPages;
}
// create bloom index on index columns
for (Integer indexChannel : indexChannels) {
Set<Object> values = new HashSet<>();
for (Page page : getPages()) {
for (int i = 0; i < page.getPositionCount(); i++) {
Object value = getNativeValue(types.get(indexChannel), page.getBlock(indexChannel), i);
if (value != null) {
values.add(value);
}
}
}
BloomFilter filter = values.size() == 0 ? null : new BloomFilter(values.size(), 0.05);
boolean unsupportedValue = false;
// if the column is being sorted on, we already have min-max values by looking at the
// first and last value of the pages, so we can save some computation by skipping this step
// however, if the column is not being sorted on, the min-max values will need to be
// determined by doing comparisons
boolean createMinMax = !minMaxIdx.containsKey(indexChannel);
Comparable min = null;
Comparable max = null;
for (Object value : values) {
if (createMinMax && value instanceof Comparable) {
Comparable comparableValue = (Comparable) value;
min = min(min, comparableValue);
max = max(max, comparableValue);
}
if (!addToFilter(filter, value)) {
LOG.warn("Unsupported index column type %s", value.getClass().getSimpleName());
unsupportedValue = true;
min = null;
max = null;
break;
}
}
if (min != null && max != null) {
minMaxIdx.put(indexChannel, new AbstractMap.SimpleEntry<>(min, max));
}
if (unsupportedValue) {
continue;
}
bloomIdx.put(indexChannel, filter);
}
try {
writePages();
} catch (Exception e) {
LOG.error("Error spilling LogicalPart " + getPageFileName() + " to disk. Restoring will be unavailable.", e);
}
this.processingState.set(LogicalPartState.COMPLETED);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestHiveUtil method testIsPartitionFiltered.
@Test
public void testIsPartitionFiltered() {
TypeManager typeManager = new TestingTypeManager();
assertFalse(isPartitionFiltered(null, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
Set<DynamicFilter> dynamicFilters = new HashSet<>();
List<HivePartitionKey> partitions = new ArrayList<>();
assertFalse(isPartitionFiltered(partitions, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
assertFalse(isPartitionFiltered(null, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partitions and dynamicFilters are empty");
partitions.add(new HivePartitionKey("pt_d", "0"));
partitions.add(new HivePartitionKey("app_id", "10000"));
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if dynamicFilters is empty");
ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_LONG, parseTypeSignature(BIGINT), 0, PARTITION_KEY, Optional.empty());
BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
dynamicFilters.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if any dynamicFilter has 0 element count");
dayFilter.add(1L);
assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if partition value not in dynamicFilter");
dayFilter.add(0L);
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partition value is in dynamicFilter");
Set<DynamicFilter> dynamicFilters1 = new HashSet<>();
BloomFilter dayFilter1 = new BloomFilter(1024 * 1024, 0.01);
dynamicFilters1.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter1, DynamicFilter.Type.GLOBAL));
dayFilter1.add(0L);
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters1), typeManager), "Should not filter partition if partition value is in dynamicFilter");
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestDynamicFilterServiceWithBloomFilter method mockLocalDynamicFilter.
private void mockLocalDynamicFilter(String taskId, String filterId, String queryId, List<String> values) {
BloomFilter bloomFilter = new BloomFilter(1024 * 1024, 0.1);
for (String val : values) {
bloomFilter.add(val.getBytes(StandardCharsets.UTF_8));
}
String key = DynamicFilterUtils.createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId);
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
bloomFilter.writeTo(out);
byte[] finalOutput = out.toByteArray();
((StateSet) stateStoreProvider.getStateStore().getStateCollection(key)).add(finalOutput);
((StateSet) stateStoreProvider.getStateStore().getStateCollection(DynamicFilterUtils.createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId))).add(taskId);
} catch (IOException e) {
Assert.fail("could not register finish filter, Exception happened:" + e.getMessage());
}
}
Aggregations