Search in sources :

Example 1 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.

the class ConvertHiveParquetScanToDrillParquetScan method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    try {
        final DrillScanRel hiveScanRel = (DrillScanRel) call.rel(0);
        final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
        final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
        final String partitionColumnLabel = settings.getFsPartitionColumnLabel();
        final Table hiveTable = hiveScan.getHiveReadEntry().getTable();
        checkForUnsupportedDataTypes(hiveTable);
        final Map<String, String> partitionColMapping = getPartitionColMapping(hiveTable, partitionColumnLabel);
        final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel);
        if (hiveScanRel.getRowType().getFieldCount() == 0) {
            call.transformTo(nativeScanRel);
        } else {
            final DrillProjectRel projectRel = createProjectRel(hiveScanRel, partitionColMapping, nativeScanRel);
            call.transformTo(projectRel);
        }
    } catch (final Exception e) {
        logger.warn("Failed to convert HiveScan to HiveDrillNativeParquetScan", e);
    }
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) Table(org.apache.hadoop.hive.metastore.api.Table) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) HiveScan(org.apache.drill.exec.store.hive.HiveScan)

Example 2 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.

the class DrillReduceAggregatesRule method reduceStddev.

private RexNode reduceStddev(Aggregate oldAggRel, AggregateCall oldCall, boolean biased, boolean sqrt, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping, List<RexNode> inputExprs) {
    // stddev_pop(x) ==>
    // power(
    // (sum(x * x) - sum(x) * sum(x) / count(x))
    // / count(x),
    // .5)
    // 
    // stddev_samp(x) ==>
    // power(
    // (sum(x * x) - sum(x) * sum(x) / count(x))
    // / nullif(count(x) - 1, 0),
    // .5)
    final PlannerSettings plannerSettings = (PlannerSettings) oldAggRel.getCluster().getPlanner().getContext();
    final boolean isInferenceEnabled = plannerSettings.isTypeInferenceEnabled();
    final int nGroups = oldAggRel.getGroupCount();
    RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
    final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
    assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
    final int argOrdinal = oldCall.getArgList().get(0);
    final RelDataType argType = getFieldType(oldAggRel.getInput(), argOrdinal);
    // final RexNode argRef = inputExprs.get(argOrdinal);
    RexNode argRef = rexBuilder.makeCall(CastHighOp, inputExprs.get(argOrdinal));
    inputExprs.set(argOrdinal, argRef);
    final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, argRef, argRef);
    final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
    RelDataType sumType = TypeInferenceUtils.getDrillSqlReturnTypeInference(SqlKind.SUM.name(), ImmutableList.<DrillFuncHolder>of()).inferReturnType(oldCall.createBinding(oldAggRel));
    sumType = typeFactory.createTypeWithNullability(sumType, true);
    final AggregateCall sumArgSquaredAggCall = AggregateCall.create(new DrillCalciteSqlAggFunctionWrapper(new SqlSumAggFunction(sumType), sumType), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argSquaredOrdinal), -1, sumType, null);
    final RexNode sumArgSquared = rexBuilder.addAggCall(sumArgSquaredAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argType));
    final AggregateCall sumArgAggCall = AggregateCall.create(new DrillCalciteSqlAggFunctionWrapper(new SqlSumAggFunction(sumType), sumType), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argOrdinal), -1, sumType, null);
    final RexNode sumArg = rexBuilder.addAggCall(sumArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argType));
    final RexNode sumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, sumArg, sumArg);
    final SqlCountAggFunction countAgg = (SqlCountAggFunction) SqlStdOperatorTable.COUNT;
    final RelDataType countType = countAgg.getReturnType(typeFactory);
    final AggregateCall countArgAggCall = AggregateCall.create(countAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, countType, null);
    final RexNode countArg = rexBuilder.addAggCall(countArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argType));
    final RexNode avgSumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
    final RexNode diff = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, sumArgSquared, avgSumSquaredArg);
    final RexNode denominator;
    if (biased) {
        denominator = countArg;
    } else {
        final RexLiteral one = rexBuilder.makeExactLiteral(BigDecimal.ONE);
        final RexNode nul = rexBuilder.makeNullLiteral(countArg.getType());
        final RexNode countMinusOne = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, countArg, one);
        final RexNode countEqOne = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, countArg, one);
        denominator = rexBuilder.makeCall(SqlStdOperatorTable.CASE, countEqOne, nul, countMinusOne);
    }
    final SqlOperator divide;
    if (isInferenceEnabled) {
        divide = new DrillSqlOperator("divide", 2, true, oldCall.getType(), false);
    } else {
        divide = SqlStdOperatorTable.DIVIDE;
    }
    final RexNode div = rexBuilder.makeCall(divide, diff, denominator);
    RexNode result = div;
    if (sqrt) {
        final RexNode half = rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
        result = rexBuilder.makeCall(SqlStdOperatorTable.POWER, div, half);
    }
    if (isInferenceEnabled) {
        return result;
    } else {
        /*
      * Currently calcite's strategy to infer the return type of aggregate functions
      * is wrong because it uses the first known argument to determine output type. For
      * instance if we are performing stddev on an integer column then it interprets the
      * output type to be integer which is incorrect as it should be double. So based on
      * this if we add cast after rewriting the aggregate we add an additional cast which
      * would cause wrong results. So we simply add a cast to ANY.
      */
        return rexBuilder.makeCast(typeFactory.createSqlType(SqlTypeName.ANY), result);
    }
}
Also used : RexLiteral(org.apache.calcite.rex.RexLiteral) DrillFuncHolder(org.apache.drill.exec.expr.fn.DrillFuncHolder) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillSqlOperator(org.apache.drill.exec.planner.sql.DrillSqlOperator) SqlOperator(org.apache.calcite.sql.SqlOperator) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlCountAggFunction(org.apache.calcite.sql.fun.SqlCountAggFunction) BigDecimal(java.math.BigDecimal) DrillCalciteSqlAggFunctionWrapper(org.apache.drill.exec.planner.sql.DrillCalciteSqlAggFunctionWrapper) DrillSqlOperator(org.apache.drill.exec.planner.sql.DrillSqlOperator) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) SqlSumAggFunction(org.apache.calcite.sql.fun.SqlSumAggFunction) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Example 3 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.

the class DrillReduceAggregatesRule method reduceAvg.

private RexNode reduceAvg(Aggregate oldAggRel, AggregateCall oldCall, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping) {
    final PlannerSettings plannerSettings = (PlannerSettings) oldAggRel.getCluster().getPlanner().getContext();
    final boolean isInferenceEnabled = plannerSettings.isTypeInferenceEnabled();
    final int nGroups = oldAggRel.getGroupCount();
    RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
    RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
    int iAvgInput = oldCall.getArgList().get(0);
    RelDataType avgInputType = getFieldType(oldAggRel.getInput(), iAvgInput);
    RelDataType sumType = TypeInferenceUtils.getDrillSqlReturnTypeInference(SqlKind.SUM.name(), ImmutableList.<DrillFuncHolder>of()).inferReturnType(oldCall.createBinding(oldAggRel));
    sumType = typeFactory.createTypeWithNullability(sumType, sumType.isNullable() || nGroups == 0);
    SqlAggFunction sumAgg = new DrillCalciteSqlAggFunctionWrapper(new SqlSumEmptyIsZeroAggFunction(), sumType);
    AggregateCall sumCall = AggregateCall.create(sumAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, sumType, null);
    final SqlCountAggFunction countAgg = (SqlCountAggFunction) SqlStdOperatorTable.COUNT;
    final RelDataType countType = countAgg.getReturnType(typeFactory);
    AggregateCall countCall = AggregateCall.create(countAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, countType, null);
    RexNode tmpsumRef = rexBuilder.addAggCall(sumCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(avgInputType));
    RexNode tmpcountRef = rexBuilder.addAggCall(countCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(avgInputType));
    RexNode n = rexBuilder.makeCall(SqlStdOperatorTable.CASE, rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, tmpcountRef, rexBuilder.makeExactLiteral(BigDecimal.ZERO)), rexBuilder.constantNull(), tmpsumRef);
    // NOTE:  these references are with respect to the output
    // of newAggRel
    /*
    RexNode numeratorRef =
        rexBuilder.makeCall(CastHighOp,
          rexBuilder.addAggCall(
              sumCall,
              nGroups,
              newCalls,
              aggCallMapping,
              ImmutableList.of(avgInputType))
        );
    */
    RexNode numeratorRef = rexBuilder.makeCall(CastHighOp, n);
    RexNode denominatorRef = rexBuilder.addAggCall(countCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(avgInputType));
    if (isInferenceEnabled) {
        return rexBuilder.makeCall(new DrillSqlOperator("divide", 2, true, oldCall.getType(), false), numeratorRef, denominatorRef);
    } else {
        final RexNode divideRef = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, numeratorRef, denominatorRef);
        return rexBuilder.makeCast(typeFactory.createSqlType(SqlTypeName.ANY), divideRef);
    }
}
Also used : DrillFuncHolder(org.apache.drill.exec.expr.fn.DrillFuncHolder) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) SqlCountAggFunction(org.apache.calcite.sql.fun.SqlCountAggFunction) DrillCalciteSqlAggFunctionWrapper(org.apache.drill.exec.planner.sql.DrillCalciteSqlAggFunctionWrapper) DrillSqlOperator(org.apache.drill.exec.planner.sql.DrillSqlOperator) SqlSumEmptyIsZeroAggFunction(org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Example 4 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.

the class ParquetPruneScanRule method getFilterOnScanParquet.

public static final RelOptRule getFilterOnScanParquet(OptimizerRulesContext optimizerRulesContext) {
    return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.any(DrillScanRel.class)), "PruneScanRule:Filter_On_Scan_Parquet", optimizerRulesContext) {

        @Override
        public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
            return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
        }

        @Override
        public boolean matches(RelOptRuleCall call) {
            final DrillScanRel scan = call.rel(1);
            GroupScan groupScan = scan.getGroupScan();
            // this rule is applicable only for parquet based partition pruning
            if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
                return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
            } else {
                return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
            }
        }

        @Override
        public void onMatch(RelOptRuleCall call) {
            final DrillFilterRel filterRel = call.rel(0);
            final DrillScanRel scanRel = call.rel(1);
            doOnMatch(call, filterRel, null, scanRel);
        }
    };
}
Also used : FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) TableScan(org.apache.calcite.rel.core.TableScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillFilterRel(org.apache.drill.exec.planner.logical.DrillFilterRel) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) ParquetPartitionDescriptor(org.apache.drill.exec.planner.ParquetPartitionDescriptor)

Example 5 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class ConvertHiveParquetScanToDrillParquetScan method createColumnFormatConversion.

/**
 * Apply any data format conversion expressions.
 */
private RexNode createColumnFormatConversion(DrillScanRel hiveScanRel, DrillScanRel nativeScanRel, String colName, RexBuilder rb) {
    RelDataType outputType = hiveScanRel.getRowType().getField(colName, false, false).getType();
    RelDataTypeField inputField = nativeScanRel.getRowType().getField(colName, false, false);
    RexInputRef inputRef = rb.makeInputRef(inputField.getType(), inputField.getIndex());
    PlannerSettings settings = PrelUtil.getPlannerSettings(hiveScanRel.getCluster().getPlanner());
    boolean conversionToTimestampEnabled = settings.getOptions().getBoolean(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP);
    if (outputType.getSqlTypeName() == SqlTypeName.TIMESTAMP && !conversionToTimestampEnabled) {
        // disabled to avoid double conversion after reading value from parquet and here.
        return rb.makeCall(INT96_TO_TIMESTAMP, inputRef);
    }
    return inputRef;
}
Also used : RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RexInputRef(org.apache.calcite.rex.RexInputRef) RelDataType(org.apache.calcite.rel.type.RelDataType)

Aggregations

PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)38 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)16 RexNode (org.apache.calcite.rex.RexNode)13 GroupScan (org.apache.drill.exec.physical.base.GroupScan)13 TableScan (org.apache.calcite.rel.core.TableScan)12 RexBuilder (org.apache.calcite.rex.RexBuilder)12 RelDataType (org.apache.calcite.rel.type.RelDataType)11 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)10 IOException (java.io.IOException)9 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)8 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 HiveScan (org.apache.drill.exec.store.hive.HiveScan)8 RelNode (org.apache.calcite.rel.RelNode)7 AggregateCall (org.apache.calcite.rel.core.AggregateCall)6 SqlCountAggFunction (org.apache.calcite.sql.fun.SqlCountAggFunction)6 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)6 List (java.util.List)5 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)5 ArrayList (java.util.ArrayList)4