Search in sources :

Example 1 with BloomFilterDef

use of org.apache.drill.exec.work.filter.BloomFilterDef in project drill by apache.

the class RuntimeFilterVisitor method generateRuntimeFilter.

/**
 * Generate a possible RuntimeFilter of a HashJoinPrel, left some BF parameters of the generated RuntimeFilter
 * to be set later.
 *
 * @param hashJoinPrel
 * @return null or a partial information RuntimeFilterDef
 */
private RuntimeFilterDef generateRuntimeFilter(HashJoinPrel hashJoinPrel) {
    JoinRelType joinRelType = hashJoinPrel.getJoinType();
    JoinInfo joinInfo = hashJoinPrel.analyzeCondition();
    boolean allowJoin = (joinInfo.isEqui()) && (joinRelType == JoinRelType.INNER || joinRelType == JoinRelType.RIGHT);
    if (!allowJoin) {
        return null;
    }
    // TODO check whether to enable RuntimeFilter according to the NDV percent
    /**
     *     double threshold = 0.5;
     *     double percent = leftNDV / rightDNV;
     *     if (percent > threshold ) {
     *     return null;
     *     }
     */
    List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
    // find the possible left scan node of the left join key
    ScanPrel probeSideScanPrel = null;
    RelNode left = hashJoinPrel.getLeft();
    RelNode right = hashJoinPrel.getRight();
    ExchangePrel exchangePrel = findRightExchangePrel(right);
    if (exchangePrel == null) {
        // can only be BroadcastExchangePrel or HashToRandomExchangePrel
        return null;
    }
    List<String> leftFields = left.getRowType().getFieldNames();
    List<String> rightFields = right.getRowType().getFieldNames();
    List<Integer> leftKeys = hashJoinPrel.getLeftKeys();
    List<Integer> rightKeys = hashJoinPrel.getRightKeys();
    RelMetadataQuery metadataQuery = left.getCluster().getMetadataQuery();
    int i = 0;
    for (Integer leftKey : leftKeys) {
        String leftFieldName = leftFields.get(leftKey);
        Integer rightKey = rightKeys.get(i++);
        String rightFieldName = rightFields.get(rightKey);
        // This also avoids the left field of the join condition with a function call.
        ScanPrel scanPrel = findLeftScanPrel(leftFieldName, left);
        if (scanPrel != null) {
            boolean encounteredBlockNode = containBlockNode((Prel) left, scanPrel);
            if (encounteredBlockNode) {
                continue;
            }
            // Collect NDV from the Metadata
            RelDataType scanRowType = scanPrel.getRowType();
            RelDataTypeField field = scanRowType.getField(leftFieldName, true, true);
            int index = field.getIndex();
            Double ndv = metadataQuery.getDistinctRowCount(scanPrel, ImmutableBitSet.of(index), null);
            if (ndv == null) {
                // If NDV is not supplied, we use the row count to estimate the ndv.
                ndv = left.estimateRowCount(metadataQuery) * 0.1;
            }
            int bloomFilterSizeInBytes = BloomFilter.optimalNumOfBytes(ndv.longValue(), fpp);
            bloomFilterSizeInBytes = bloomFilterSizeInBytes > bloomFilterMaxSizeInBytesDef ? bloomFilterMaxSizeInBytesDef : bloomFilterSizeInBytes;
            // left the local parameter to be set later.
            BloomFilterDef bloomFilterDef = new BloomFilterDef(bloomFilterSizeInBytes, false, leftFieldName, rightFieldName);
            bloomFilterDef.setLeftNDV(ndv);
            bloomFilterDefs.add(bloomFilterDef);
            toAddRuntimeFilter.add(scanPrel);
            probeSideScanPrel = scanPrel;
        }
    }
    if (bloomFilterDefs.size() > 0) {
        // left sendToForeman parameter to be set later.
        RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
        probeSideScan2hj.put(probeSideScanPrel, hashJoinPrel);
        return runtimeFilterDef;
    }
    return null;
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) ExchangePrel(org.apache.drill.exec.planner.physical.ExchangePrel) BroadcastExchangePrel(org.apache.drill.exec.planner.physical.BroadcastExchangePrel) JoinInfo(org.apache.calcite.rel.core.JoinInfo) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef)

Example 2 with BloomFilterDef

use of org.apache.drill.exec.work.filter.BloomFilterDef in project drill by apache.

the class HashJoinBatch method setupHash64.

private void setupHash64(HashTableConfig htConfig) {
    LogicalExpression[] keyExprsBuild = new LogicalExpression[htConfig.getKeyExprsBuild().size()];
    ErrorCollector collector = new ErrorCollectorImpl();
    int i = 0;
    for (NamedExpression ne : htConfig.getKeyExprsBuild()) {
        LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), buildBatch, collector, context.getFunctionRegistry());
        collector.reportErrors(logger);
        if (expr == null) {
            continue;
        }
        keyExprsBuild[i] = expr;
        i++;
    }
    i = 0;
    boolean missingField = false;
    TypedFieldId[] buildSideTypeFieldIds = new TypedFieldId[keyExprsBuild.length];
    for (NamedExpression ne : htConfig.getKeyExprsBuild()) {
        SchemaPath schemaPath = (SchemaPath) ne.getExpr();
        TypedFieldId typedFieldId = buildBatch.getValueVectorId(schemaPath);
        if (typedFieldId == null) {
            missingField = true;
            break;
        }
        buildSideTypeFieldIds[i] = typedFieldId;
        i++;
    }
    if (missingField) {
        logger.info("As some build side key fields not found, runtime filter was disabled");
        enableRuntimeFilter = false;
        return;
    }
    RuntimeFilterDef runtimeFilterDef = popConfig.getRuntimeFilterDef();
    List<BloomFilterDef> bloomFilterDefs = runtimeFilterDef.getBloomFilterDefs();
    for (BloomFilterDef bloomFilterDef : bloomFilterDefs) {
        String buildField = bloomFilterDef.getBuildField();
        SchemaPath schemaPath = new SchemaPath(new PathSegment.NameSegment(buildField), ExpressionPosition.UNKNOWN);
        TypedFieldId typedFieldId = buildBatch.getValueVectorId(schemaPath);
        if (typedFieldId == null) {
            missingField = true;
            break;
        }
        int fieldId = typedFieldId.getFieldIds()[0];
        bloomFilterDef2buildId.put(bloomFilterDef, fieldId);
    }
    if (missingField) {
        logger.info("As some build side join key fields not found, runtime filter was disabled");
        enableRuntimeFilter = false;
        return;
    }
    ValueVectorHashHelper hashHelper = new ValueVectorHashHelper(buildBatch, context);
    try {
        hash64 = hashHelper.getHash64(keyExprsBuild, buildSideTypeFieldIds);
    } catch (Exception e) {
        throw UserException.internalError(e).message("Failed to construct a field's hash64 dynamic codes").build(logger);
    }
}
Also used : ErrorCollector(org.apache.drill.common.expression.ErrorCollector) PathSegment(org.apache.drill.common.expression.PathSegment) ValueVectorHashHelper(org.apache.drill.exec.expr.fn.impl.ValueVectorHashHelper) UserException(org.apache.drill.common.exceptions.UserException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) IOException(java.io.IOException) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) SchemaPath(org.apache.drill.common.expression.SchemaPath) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef)

Example 3 with BloomFilterDef

use of org.apache.drill.exec.work.filter.BloomFilterDef in project drill by apache.

the class TestHashJoinJPPD method testBroadcastHashJoin1Cond.

@SuppressWarnings("unchecked")
@Test
public void testBroadcastHashJoin1Cond() {
    List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
    int numBytes = BloomFilter.optimalNumOfBytes(2600, 0.01);
    BloomFilterDef bloomFilterDef = new BloomFilterDef(numBytes, true, "lft", "rgt");
    bloomFilterDefs.add(bloomFilterDef);
    RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
    HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("lft", "EQUALS", "rgt")), JoinRelType.INNER, runtimeFilterDef);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_partitions", 4);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_rows_in_batch", 64);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.max_batches_in_memory", 8);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.enable.runtime_filter", true);
    // Put some duplicate values
    List<String> leftTable = Lists.newArrayList("[{\"lft\": 0, \"a\" : \"a string\"}]", "[{\"lft\": 0, \"a\" : \"a different string\"},{\"lft\": 0, \"a\" : \"yet another\"}]");
    List<String> rightTable = Lists.newArrayList("[{\"rgt\": 0, \"b\" : \"a string\"}]", "[{\"rgt\": 0, \"b\" : \"a different string\"},{\"rgt\": 0, \"b\" : \"yet another\"}]");
    int numRows = 2500;
    for (int cnt = 1; cnt <= numRows; cnt++) {
        leftTable.add("[{\"lft\": " + cnt + ", \"a\" : \"a string\"}]");
    }
    legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftTable, rightTable)).baselineColumns("lft", "a", "b", "rgt").expectedTotalRows(9).go();
}
Also used : ArrayList(java.util.ArrayList) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test) SlowTest(org.apache.drill.categories.SlowTest)

Example 4 with BloomFilterDef

use of org.apache.drill.exec.work.filter.BloomFilterDef in project drill by apache.

the class TestHashJoinJPPD method testBroadcastHashJoin2Cond.

@SuppressWarnings("unchecked")
@Test
public void testBroadcastHashJoin2Cond() {
    List<BloomFilterDef> bloomFilterDefs = new ArrayList<>();
    int numBytes = BloomFilter.optimalNumOfBytes(2600, 0.01);
    BloomFilterDef bloomFilterDef = new BloomFilterDef(numBytes, true, "lft", "rgt");
    BloomFilterDef bloomFilterDef1 = new BloomFilterDef(numBytes, true, "a", "b");
    bloomFilterDefs.add(bloomFilterDef);
    bloomFilterDefs.add(bloomFilterDef1);
    RuntimeFilterDef runtimeFilterDef = new RuntimeFilterDef(true, false, bloomFilterDefs, false, -1);
    HashJoinPOP joinConf = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("lft", "EQUALS", "rgt"), joinCond("a", "EQUALS", "b")), JoinRelType.INNER, runtimeFilterDef);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_partitions", 4);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.num_rows_in_batch", 128);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.max_batches_in_memory", 8);
    operatorFixture.getOptionManager().setLocalOption("exec.hashjoin.enable.runtime_filter", true);
    // Put some duplicate values
    List<String> leftTable = Lists.newArrayList("[{\"lft\": 0, \"a\" : \"a string\"}]", "[{\"lft\": 0, \"a\" : \"a different string\"},{\"lft\": 0, \"a\" : \"yet another\"}]");
    List<String> rightTable = Lists.newArrayList("[{\"rgt\": 0, \"b\" : \"a string\"}]", "[{\"rgt\": 0, \"b\" : \"a different string\"},{\"rgt\": 0, \"b\" : \"yet another\"}]");
    int numRows = 2500;
    for (int cnt = 1; cnt <= numRows; cnt++) {
        leftTable.add("[{\"lft\": " + cnt + ", \"a\" : \"a string\"}]");
    }
    legacyOpTestBuilder().physicalOperator(joinConf).inputDataStreamsJson(Lists.newArrayList(leftTable, rightTable)).baselineColumns("lft", "a", "b", "rgt").expectedTotalRows(3).go();
}
Also used : ArrayList(java.util.ArrayList) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test) SlowTest(org.apache.drill.categories.SlowTest)

Example 5 with BloomFilterDef

use of org.apache.drill.exec.work.filter.BloomFilterDef in project drill by apache.

the class HashJoinBatch method initializeRuntimeFilter.

/**
 * Note: This method can not be called again as part of recursive call of
 * executeBuildPhase() to handle spilled build partitions.
 */
private void initializeRuntimeFilter() {
    if (!enableRuntimeFilter || bloomFiltersGenerated) {
        return;
    }
    runtimeFilterReporter = new RuntimeFilterReporter((ExecutorFragmentContext) context);
    RuntimeFilterDef runtimeFilterDef = popConfig.getRuntimeFilterDef();
    // RuntimeFilterRouter's judgement will have the RuntimeFilterDef.
    if (runtimeFilterDef != null) {
        List<BloomFilterDef> bloomFilterDefs = runtimeFilterDef.getBloomFilterDefs();
        for (BloomFilterDef bloomFilterDef : bloomFilterDefs) {
            int buildFieldId = bloomFilterDef2buildId.get(bloomFilterDef);
            int numBytes = bloomFilterDef.getNumBytes();
            String probeField = bloomFilterDef.getProbeField();
            probeFields.add(probeField);
            BloomFilter bloomFilter = new BloomFilter(numBytes, context.getAllocator());
            bloomFilters.add(bloomFilter);
            bloomFilter2buildId.put(bloomFilter, buildFieldId);
        }
    }
    bloomFiltersGenerated = true;
}
Also used : ExecutorFragmentContext(org.apache.drill.exec.ops.ExecutorFragmentContext) BloomFilterDef(org.apache.drill.exec.work.filter.BloomFilterDef) RuntimeFilterReporter(org.apache.drill.exec.work.filter.RuntimeFilterReporter) RuntimeFilterDef(org.apache.drill.exec.work.filter.RuntimeFilterDef) BloomFilter(org.apache.drill.exec.work.filter.BloomFilter)

Aggregations

BloomFilterDef (org.apache.drill.exec.work.filter.BloomFilterDef)5 RuntimeFilterDef (org.apache.drill.exec.work.filter.RuntimeFilterDef)5 ArrayList (java.util.ArrayList)3 OperatorTest (org.apache.drill.categories.OperatorTest)2 SlowTest (org.apache.drill.categories.SlowTest)2 HashJoinPOP (org.apache.drill.exec.physical.config.HashJoinPOP)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 RelNode (org.apache.calcite.rel.RelNode)1 JoinInfo (org.apache.calcite.rel.core.JoinInfo)1 JoinRelType (org.apache.calcite.rel.core.JoinRelType)1 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)1 UserException (org.apache.drill.common.exceptions.UserException)1 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)1 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)1 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)1 PathSegment (org.apache.drill.common.expression.PathSegment)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1