use of org.apache.calcite.plan.RelOptCost in project hive by apache.
the class HiveRelMdDistinctRowCount method getCumulativeCost.
/*
* Favor Broad Plans over Deep Plans.
*/
public RelOptCost getCumulativeCost(HiveJoin rel, RelMetadataQuery mq) {
RelOptCost cost = mq.getNonCumulativeCost(rel);
List<RelNode> inputs = rel.getInputs();
RelOptCost maxICost = HiveCost.ZERO;
for (RelNode input : inputs) {
RelOptCost iCost = mq.getCumulativeCost(input);
if (maxICost.isLt(iCost)) {
maxICost = iCost;
}
}
return cost.plus(maxICost);
}
use of org.apache.calcite.plan.RelOptCost in project hive by apache.
the class HiveRelMdCumulativeCost method getCumulativeCost.
/*
* Favor Broad Plans over Deep Plans.
*/
public RelOptCost getCumulativeCost(HiveJoin rel, RelMetadataQuery mq) {
RelOptCost cost = mq.getNonCumulativeCost(rel);
List<RelNode> inputs = rel.getInputs();
RelOptCost maxICost = HiveCost.ZERO;
for (RelNode input : inputs) {
RelOptCost iCost = mq.getCumulativeCost(input);
if (maxICost.isLt(iCost)) {
maxICost = iCost;
}
}
return cost.plus(maxICost);
}
use of org.apache.calcite.plan.RelOptCost in project hive by apache.
the class HiveCardinalityPreservingJoinRule method choosePlan.
private RelNode choosePlan(RelNode node, RelNode optimized) {
JaninoRelMetadataProvider original = RelMetadataQuery.THREAD_PROVIDERS.get();
try {
RelMetadataQuery.THREAD_PROVIDERS.set(HiveTezModelRelMetadataProvider.DEFAULT);
node.getCluster().invalidateMetadataQuery();
RelMetadataQuery metadataQuery = RelMetadataQuery.instance();
RelOptCost optimizedCost = metadataQuery.getCumulativeCost(optimized);
RelOptCost originalCost = metadataQuery.getCumulativeCost(node);
originalCost = originalCost.multiplyBy(factor);
LOG.debug("Original plan cost {} vs Optimized plan cost {}", originalCost, optimizedCost);
if (optimizedCost.isLt(originalCost)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after:\n" + RelOptUtil.toString(optimized));
}
return optimized;
}
return node;
} finally {
node.getCluster().invalidateMetadataQuery();
RelMetadataQuery.THREAD_PROVIDERS.set(original);
}
}
use of org.apache.calcite.plan.RelOptCost in project calcite by apache.
the class RelMdPercentageOriginalRows method getCumulativeCost.
// Ditto for getNonCumulativeCost
public RelOptCost getCumulativeCost(RelNode rel, RelMetadataQuery mq) {
RelOptCost cost = mq.getNonCumulativeCost(rel);
List<RelNode> inputs = rel.getInputs();
for (RelNode input : inputs) {
cost = cost.plus(mq.getCumulativeCost(input));
}
return cost;
}
use of org.apache.calcite.plan.RelOptCost in project calcite by apache.
the class LoptSemiJoinOptimizer method computeScore.
/**
* Computes a score relevant to applying a set of semijoins on a fact table.
* The higher the score, the better.
*
* @param factRel fact table being filtered
* @param dimRel dimension table that participates in semijoin
* @param semiJoin semijoin between fact and dimension tables
*
* @return computed score of applying the dimension table filters on the
* fact table
*/
private double computeScore(RelNode factRel, RelNode dimRel, SemiJoin semiJoin) {
// Estimate savings as a result of applying semijoin filter on fact
// table. As a heuristic, the selectivity of the semijoin needs to
// be less than half. There may be instances where an even smaller
// selectivity value is required because of the overhead of
// index lookups on a very large fact table. Half was chosen as
// a middle ground based on testing that was done with a large
// data set.
final ImmutableBitSet dimCols = ImmutableBitSet.of(semiJoin.getRightKeys());
final double selectivity = RelMdUtil.computeSemiJoinSelectivity(mq, factRel, dimRel, semiJoin);
if (selectivity > .5) {
return 0;
}
final RelOptCost factCost = mq.getCumulativeCost(factRel);
// if not enough information, return a low score
if (factCost == null) {
return 0;
}
double savings = (1.0 - Math.sqrt(selectivity)) * Math.max(1.0, factCost.getRows());
// Additional savings if the dimension columns are unique. We can
// ignore nulls since they will be filtered out by the semijoin.
boolean uniq = RelMdUtil.areColumnsDefinitelyUniqueWhenNullsFiltered(mq, dimRel, dimCols);
if (uniq) {
savings *= 2.0;
}
// compute the cost of doing an extra scan on the dimension table,
// including the distinct sort on top of the scan; if the dimension
// columns are already unique, no need to add on the dup removal cost
final Double dimSortCost = mq.getRowCount(dimRel);
final Double dupRemCost = uniq ? 0 : dimSortCost;
final RelOptCost dimCost = mq.getCumulativeCost(dimRel);
if ((dimSortCost == null) || (dupRemCost == null) || (dimCost == null)) {
return 0;
}
Double dimRows = dimCost.getRows();
if (dimRows < 1.0) {
dimRows = 1.0;
}
return savings / dimRows;
}
Aggregations