use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.
the class ConvertHiveParquetScanToDrillParquetScan method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
try {
final DrillScanRel hiveScanRel = (DrillScanRel) call.rel(0);
final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
final String partitionColumnLabel = settings.getFsPartitionColumnLabel();
final Table hiveTable = hiveScan.getHiveReadEntry().getTable();
checkForUnsupportedDataTypes(hiveTable);
final Map<String, String> partitionColMapping = getPartitionColMapping(hiveTable, partitionColumnLabel);
final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel);
if (hiveScanRel.getRowType().getFieldCount() == 0) {
call.transformTo(nativeScanRel);
} else {
final DrillProjectRel projectRel = createProjectRel(hiveScanRel, partitionColMapping, nativeScanRel);
call.transformTo(projectRel);
}
} catch (final Exception e) {
logger.warn("Failed to convert HiveScan to HiveDrillNativeParquetScan", e);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.
the class DrillReduceAggregatesRule method reduceStddev.
private RexNode reduceStddev(Aggregate oldAggRel, AggregateCall oldCall, boolean biased, boolean sqrt, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping, List<RexNode> inputExprs) {
// stddev_pop(x) ==>
// power(
// (sum(x * x) - sum(x) * sum(x) / count(x))
// / count(x),
// .5)
//
// stddev_samp(x) ==>
// power(
// (sum(x * x) - sum(x) * sum(x) / count(x))
// / nullif(count(x) - 1, 0),
// .5)
final PlannerSettings plannerSettings = (PlannerSettings) oldAggRel.getCluster().getPlanner().getContext();
final boolean isInferenceEnabled = plannerSettings.isTypeInferenceEnabled();
final int nGroups = oldAggRel.getGroupCount();
RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
final int argOrdinal = oldCall.getArgList().get(0);
final RelDataType argType = getFieldType(oldAggRel.getInput(), argOrdinal);
// final RexNode argRef = inputExprs.get(argOrdinal);
RexNode argRef = rexBuilder.makeCall(CastHighOp, inputExprs.get(argOrdinal));
inputExprs.set(argOrdinal, argRef);
final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, argRef, argRef);
final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
RelDataType sumType = TypeInferenceUtils.getDrillSqlReturnTypeInference(SqlKind.SUM.name(), ImmutableList.<DrillFuncHolder>of()).inferReturnType(oldCall.createBinding(oldAggRel));
sumType = typeFactory.createTypeWithNullability(sumType, true);
final AggregateCall sumArgSquaredAggCall = AggregateCall.create(new DrillCalciteSqlAggFunctionWrapper(new SqlSumAggFunction(sumType), sumType), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argSquaredOrdinal), -1, sumType, null);
final RexNode sumArgSquared = rexBuilder.addAggCall(sumArgSquaredAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argType));
final AggregateCall sumArgAggCall = AggregateCall.create(new DrillCalciteSqlAggFunctionWrapper(new SqlSumAggFunction(sumType), sumType), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argOrdinal), -1, sumType, null);
final RexNode sumArg = rexBuilder.addAggCall(sumArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argType));
final RexNode sumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, sumArg, sumArg);
final SqlCountAggFunction countAgg = (SqlCountAggFunction) SqlStdOperatorTable.COUNT;
final RelDataType countType = countAgg.getReturnType(typeFactory);
final AggregateCall countArgAggCall = AggregateCall.create(countAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, countType, null);
final RexNode countArg = rexBuilder.addAggCall(countArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argType));
final RexNode avgSumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
final RexNode diff = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, sumArgSquared, avgSumSquaredArg);
final RexNode denominator;
if (biased) {
denominator = countArg;
} else {
final RexLiteral one = rexBuilder.makeExactLiteral(BigDecimal.ONE);
final RexNode nul = rexBuilder.makeNullLiteral(countArg.getType());
final RexNode countMinusOne = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, countArg, one);
final RexNode countEqOne = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, countArg, one);
denominator = rexBuilder.makeCall(SqlStdOperatorTable.CASE, countEqOne, nul, countMinusOne);
}
final SqlOperator divide;
if (isInferenceEnabled) {
divide = new DrillSqlOperator("divide", 2, true, oldCall.getType(), false);
} else {
divide = SqlStdOperatorTable.DIVIDE;
}
final RexNode div = rexBuilder.makeCall(divide, diff, denominator);
RexNode result = div;
if (sqrt) {
final RexNode half = rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
result = rexBuilder.makeCall(SqlStdOperatorTable.POWER, div, half);
}
if (isInferenceEnabled) {
return result;
} else {
/*
* Currently calcite's strategy to infer the return type of aggregate functions
* is wrong because it uses the first known argument to determine output type. For
* instance if we are performing stddev on an integer column then it interprets the
* output type to be integer which is incorrect as it should be double. So based on
* this if we add cast after rewriting the aggregate we add an additional cast which
* would cause wrong results. So we simply add a cast to ANY.
*/
return rexBuilder.makeCast(typeFactory.createSqlType(SqlTypeName.ANY), result);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.
the class DrillReduceAggregatesRule method reduceAvg.
private RexNode reduceAvg(Aggregate oldAggRel, AggregateCall oldCall, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping) {
final PlannerSettings plannerSettings = (PlannerSettings) oldAggRel.getCluster().getPlanner().getContext();
final boolean isInferenceEnabled = plannerSettings.isTypeInferenceEnabled();
final int nGroups = oldAggRel.getGroupCount();
RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
int iAvgInput = oldCall.getArgList().get(0);
RelDataType avgInputType = getFieldType(oldAggRel.getInput(), iAvgInput);
RelDataType sumType = TypeInferenceUtils.getDrillSqlReturnTypeInference(SqlKind.SUM.name(), ImmutableList.<DrillFuncHolder>of()).inferReturnType(oldCall.createBinding(oldAggRel));
sumType = typeFactory.createTypeWithNullability(sumType, sumType.isNullable() || nGroups == 0);
SqlAggFunction sumAgg = new DrillCalciteSqlAggFunctionWrapper(new SqlSumEmptyIsZeroAggFunction(), sumType);
AggregateCall sumCall = AggregateCall.create(sumAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, sumType, null);
final SqlCountAggFunction countAgg = (SqlCountAggFunction) SqlStdOperatorTable.COUNT;
final RelDataType countType = countAgg.getReturnType(typeFactory);
AggregateCall countCall = AggregateCall.create(countAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, countType, null);
RexNode tmpsumRef = rexBuilder.addAggCall(sumCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(avgInputType));
RexNode tmpcountRef = rexBuilder.addAggCall(countCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(avgInputType));
RexNode n = rexBuilder.makeCall(SqlStdOperatorTable.CASE, rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, tmpcountRef, rexBuilder.makeExactLiteral(BigDecimal.ZERO)), rexBuilder.constantNull(), tmpsumRef);
// NOTE: these references are with respect to the output
// of newAggRel
/*
RexNode numeratorRef =
rexBuilder.makeCall(CastHighOp,
rexBuilder.addAggCall(
sumCall,
nGroups,
newCalls,
aggCallMapping,
ImmutableList.of(avgInputType))
);
*/
RexNode numeratorRef = rexBuilder.makeCall(CastHighOp, n);
RexNode denominatorRef = rexBuilder.addAggCall(countCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(avgInputType));
if (isInferenceEnabled) {
return rexBuilder.makeCall(new DrillSqlOperator("divide", 2, true, oldCall.getType(), false), numeratorRef, denominatorRef);
} else {
final RexNode divideRef = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, numeratorRef, denominatorRef);
return rexBuilder.makeCast(typeFactory.createSqlType(SqlTypeName.ANY), divideRef);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by axbaretto.
the class ParquetPruneScanRule method getFilterOnScanParquet.
public static final RelOptRule getFilterOnScanParquet(OptimizerRulesContext optimizerRulesContext) {
return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.any(DrillScanRel.class)), "PruneScanRule:Filter_On_Scan_Parquet", optimizerRulesContext) {
@Override
public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
}
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scan = call.rel(1);
GroupScan groupScan = scan.getGroupScan();
// this rule is applicable only for parquet based partition pruning
if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
} else {
return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
}
}
@Override
public void onMatch(RelOptRuleCall call) {
final DrillFilterRel filterRel = call.rel(0);
final DrillScanRel scanRel = call.rel(1);
doOnMatch(call, filterRel, null, scanRel);
}
};
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method createColumnFormatConversion.
/**
* Apply any data format conversion expressions.
*/
private RexNode createColumnFormatConversion(DrillScanRel hiveScanRel, DrillScanRel nativeScanRel, String colName, RexBuilder rb) {
RelDataType outputType = hiveScanRel.getRowType().getField(colName, false, false).getType();
RelDataTypeField inputField = nativeScanRel.getRowType().getField(colName, false, false);
RexInputRef inputRef = rb.makeInputRef(inputField.getType(), inputField.getIndex());
PlannerSettings settings = PrelUtil.getPlannerSettings(hiveScanRel.getCluster().getPlanner());
boolean conversionToTimestampEnabled = settings.getOptions().getBoolean(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP);
if (outputType.getSqlTypeName() == SqlTypeName.TIMESTAMP && !conversionToTimestampEnabled) {
// disabled to avoid double conversion after reading value from parquet and here.
return rb.makeCall(INT96_TO_TIMESTAMP, inputRef);
}
return inputRef;
}
Aggregations