use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class ConvertMetadataAggregateToDirectScanRule method buildDirectScan.
private DirectGroupScan buildDirectScan(List<SchemaPath> interestingColumns, DrillScanRel scan, PlannerSettings settings) throws IOException {
DrillTable drillTable = Utilities.getDrillTable(scan.getTable());
ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(settings.getOptions());
// populates schema to be used when adding record values
FormatSelection selection = (FormatSelection) drillTable.getSelection();
// adds partition columns to the schema
Map<String, Class<?>> schema = ColumnExplorer.getPartitionColumnNames(selection.getSelection(), columnNamesOptions).stream().collect(Collectors.toMap(Function.identity(), s -> String.class, (o, n) -> n));
// adds internal implicit columns to the schema
schema.put(MetastoreAnalyzeConstants.SCHEMA_FIELD, String.class);
schema.put(MetastoreAnalyzeConstants.LOCATION_FIELD, String.class);
schema.put(columnNamesOptions.rowGroupIndex(), String.class);
schema.put(columnNamesOptions.rowGroupStart(), String.class);
schema.put(columnNamesOptions.rowGroupLength(), String.class);
schema.put(columnNamesOptions.lastModifiedTime(), String.class);
return populateRecords(interestingColumns, schema, scan, columnNamesOptions);
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class DrillReduceAggregatesRule method reduceSum.
private RexNode reduceSum(Aggregate oldAggRel, AggregateCall oldCall, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping) {
final PlannerSettings plannerSettings = (PlannerSettings) oldAggRel.getCluster().getPlanner().getContext();
final boolean isInferenceEnabled = plannerSettings.isTypeInferenceEnabled();
final int nGroups = oldAggRel.getGroupCount();
RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
int arg = oldCall.getArgList().get(0);
RelDataType argType = getFieldType(oldAggRel.getInput(), arg);
final RelDataType sumType;
final SqlAggFunction sumZeroAgg;
if (isInferenceEnabled) {
sumType = oldCall.getType();
} else {
sumType = typeFactory.createTypeWithNullability(oldCall.getType(), argType.isNullable());
}
sumZeroAgg = new DrillCalciteSqlAggFunctionWrapper(new SqlSumEmptyIsZeroAggFunction(), sumType);
AggregateCall sumZeroCall = AggregateCall.create(sumZeroAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, sumType, null);
final SqlCountAggFunction countAgg = (SqlCountAggFunction) SqlStdOperatorTable.COUNT;
final RelDataType countType = countAgg.getReturnType(typeFactory);
AggregateCall countCall = AggregateCall.create(countAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, countType, null);
// NOTE: these references are with respect to the output
// of newAggRel
RexNode sumZeroRef = rexBuilder.addAggCall(sumZeroCall, nGroups, newCalls, aggCallMapping, ImmutableList.of(argType));
if (!oldCall.getType().isNullable()) {
// null). Therefore we translate to SUM0(x).
return sumZeroRef;
}
RexNode countRef = rexBuilder.addAggCall(countCall, nGroups, newCalls, aggCallMapping, ImmutableList.of(argType));
return rexBuilder.makeCall(SqlStdOperatorTable.CASE, rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, countRef, rexBuilder.makeExactLiteral(BigDecimal.ZERO)), rexBuilder.constantNull(), sumZeroRef);
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class DrillReduceAggregatesRule method reduceStddev.
private RexNode reduceStddev(Aggregate oldAggRel, AggregateCall oldCall, boolean biased, boolean sqrt, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping, List<RexNode> inputExprs) {
// stddev_pop(x) ==>
// power(
// (sum(x * x) - sum(x) * sum(x) / count(x))
// / count(x),
// .5)
//
// stddev_samp(x) ==>
// power(
// (sum(x * x) - sum(x) * sum(x) / count(x))
// / nullif(count(x) - 1, 0),
// .5)
final PlannerSettings plannerSettings = (PlannerSettings) oldAggRel.getCluster().getPlanner().getContext();
final boolean isInferenceEnabled = plannerSettings.isTypeInferenceEnabled();
final int nGroups = oldAggRel.getGroupCount();
RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
final int argOrdinal = oldCall.getArgList().get(0);
final RelDataType argType = getFieldType(oldAggRel.getInput(), argOrdinal);
// final RexNode argRef = inputExprs.get(argOrdinal);
RexNode argRef = rexBuilder.makeCall(CastHighOp, inputExprs.get(argOrdinal));
inputExprs.set(argOrdinal, argRef);
final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, argRef, argRef);
final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
RelDataType sumType = TypeInferenceUtils.getDrillSqlReturnTypeInference(SqlKind.SUM.name(), ImmutableList.of()).inferReturnType(oldCall.createBinding(oldAggRel));
sumType = typeFactory.createTypeWithNullability(sumType, true);
final AggregateCall sumArgSquaredAggCall = AggregateCall.create(new DrillCalciteSqlAggFunctionWrapper(new SqlSumAggFunction(sumType), sumType), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argSquaredOrdinal), -1, sumType, null);
final RexNode sumArgSquared = rexBuilder.addAggCall(sumArgSquaredAggCall, nGroups, newCalls, aggCallMapping, ImmutableList.of(argType));
final AggregateCall sumArgAggCall = AggregateCall.create(new DrillCalciteSqlAggFunctionWrapper(new SqlSumAggFunction(sumType), sumType), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argOrdinal), -1, sumType, null);
final RexNode sumArg = rexBuilder.addAggCall(sumArgAggCall, nGroups, newCalls, aggCallMapping, ImmutableList.of(argType));
final RexNode sumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, sumArg, sumArg);
final SqlCountAggFunction countAgg = (SqlCountAggFunction) SqlStdOperatorTable.COUNT;
final RelDataType countType = countAgg.getReturnType(typeFactory);
final AggregateCall countArgAggCall = AggregateCall.create(countAgg, oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), -1, countType, null);
final RexNode countArg = rexBuilder.addAggCall(countArgAggCall, nGroups, newCalls, aggCallMapping, ImmutableList.of(argType));
final RexNode avgSumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
final RexNode diff = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, sumArgSquared, avgSumSquaredArg);
final RexNode denominator;
if (biased) {
denominator = countArg;
} else {
final RexLiteral one = rexBuilder.makeExactLiteral(BigDecimal.ONE);
final RexNode nul = rexBuilder.makeNullLiteral(countArg.getType());
final RexNode countMinusOne = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, countArg, one);
final RexNode countEqOne = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, countArg, one);
denominator = rexBuilder.makeCall(SqlStdOperatorTable.CASE, countEqOne, nul, countMinusOne);
}
final SqlOperator divide;
if (isInferenceEnabled) {
divide = new DrillSqlOperator("divide", 2, true, oldCall.getType(), false);
} else {
divide = SqlStdOperatorTable.DIVIDE;
}
final RexNode div = rexBuilder.makeCall(divide, diff, denominator);
RexNode result = div;
if (sqrt) {
final RexNode half = rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
result = rexBuilder.makeCall(SqlStdOperatorTable.POWER, div, half);
}
if (isInferenceEnabled) {
return result;
} else {
/*
* Currently calcite's strategy to infer the return type of aggregate functions
* is wrong because it uses the first known argument to determine output type. For
* instance if we are performing stddev on an integer column then it interprets the
* output type to be integer which is incorrect as it should be double. So based on
* this if we add cast after rewriting the aggregate we add an additional cast which
* would cause wrong results. So we simply add a cast to ANY.
*/
return rexBuilder.makeCast(typeFactory.createSqlType(SqlTypeName.ANY), result);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class MapRDBStatistics method addToCache.
/*
* Adds the statistic(row count) to the cache. Also adds the corresponding QueryCondition->RexNode
* condition mapping.
*/
private void addToCache(RexNode condition, IndexDescriptor idx, IndexCallContext context, StatisticsPayload payload, JsonTableGroupScan jTabGrpScan, RelNode scanRel, RelDataType rowType) {
if (condition != null && !condition.isAlwaysTrue()) {
RexBuilder builder = scanRel.getCluster().getRexBuilder();
PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
if (statsCache.get(conditionAsStr) == null && payload.getRowCount() != Statistics.ROWCOUNT_UNKNOWN) {
Map<String, StatisticsPayload> payloadMap = new HashMap<>();
payloadMap.put(buildUniqueIndexIdentifier(idx), payload);
statsCache.put(conditionAsStr, payloadMap);
logger.debug("Statistics: StatsCache:<{}, {}>", conditionAsStr, payload);
// Always pre-process CAST conditions - Otherwise queryCondition will not be generated correctly
RexNode preProcIdxCondition = convertToStatsCondition(condition, idx, context, scanRel, Arrays.asList(SqlKind.CAST));
QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, rowType, settings, builder));
if (queryCondition != null) {
String queryConditionAsStr = queryCondition.toString();
if (conditionRexNodeMap.get(queryConditionAsStr) == null) {
conditionRexNodeMap.put(queryConditionAsStr, conditionAsStr);
logger.debug("Statistics: QCRNCache:<{}, {}>", queryConditionAsStr, conditionAsStr);
}
} else {
logger.debug("Statistics: QCRNCache: Unable to generate QueryCondition for {}", conditionAsStr);
logger.debug("Statistics: QCRNCache: Unable to generate QueryCondition for {}", conditionAsStr);
}
} else {
Map<String, StatisticsPayload> payloadMap = statsCache.get(conditionAsStr);
if (payloadMap != null) {
if (payloadMap.get(buildUniqueIndexIdentifier(idx)) == null) {
payloadMap.put(buildUniqueIndexIdentifier(idx), payload);
// rowCount for the same condition should be the same on primary table or index,
// let us sync them to the smallest since currently both are over-estimated.
// DO NOT sync the leading rowCount since it is based on the leading condition and not the
// condition (key for this cache). Hence, for the same condition the leading condition and
// consequently the leading rowCount will vary with the index. Syncing them may lead to
// unintended side-effects e.g. given a covering index and full table scan and a condition
// on a non-id field which happens to be the leading key in the index, the leading rowcount
// for the full table scan should be the full table rowcount. Syncing them would incorrectly
// make the full table scan cheaper! If required, syncing should be only done based on
// leading condition and NOT the condition
double minimalRowCount = payload.getRowCount();
for (StatisticsPayload existing : payloadMap.values()) {
if (existing.getRowCount() < minimalRowCount) {
minimalRowCount = existing.getRowCount();
}
}
for (StatisticsPayload existing : payloadMap.values()) {
if (existing instanceof MapRDBStatisticsPayload) {
((MapRDBStatisticsPayload) existing).rowCount = minimalRowCount;
}
}
} else {
logger.debug("Statistics: Filter row count already exists for filter: {}. Skip!", conditionAsStr);
}
} else {
logger.debug("Statistics: Filter row count is UNKNOWN for filter: {}", conditionAsStr);
}
}
} else if (condition == null && idx == null) {
fullTableScanPayload = new MapRDBStatisticsPayload(payload.getRowCount(), payload.getLeadingRowCount(), payload.getAvgRowSize());
logger.debug("Statistics: StatsCache:<{}, {}>", "NULL", fullTableScanPayload);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class ConvertHiveMapRDBJsonScanToDrillMapRDBJsonScan method createNativeScanRel.
/**
* Helper method which creates a DrillScanRel with native Drill HiveScan.
*/
private DrillScanRel createNativeScanRel(DrillScanRel hiveScanRel, PlannerSettings settings) throws IOException {
RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
HiveReadEntry hiveReadEntry = hiveScan.getHiveReadEntry();
Map<String, String> parameters = hiveReadEntry.getHiveTableWrapper().getParameters();
JsonScanSpec scanSpec = new JsonScanSpec(parameters.get(MAPRDB_TABLE_NAME), null, null);
List<SchemaPath> hiveScanCols = hiveScanRel.getColumns().stream().map(colNameSchemaPath -> replaceOverriddenSchemaPath(parameters, colNameSchemaPath)).collect(Collectors.toList());
// creates TupleMetadata based on Hive's schema (with optional data modes) to be used in the reader
// for the case when column type wasn't discovered
HiveToRelDataTypeConverter dataTypeConverter = new HiveToRelDataTypeConverter(typeFactory);
TupleMetadata schema = new TupleSchema();
hiveReadEntry.getTable().getColumnListsCache().getTableSchemaColumns().forEach(column -> schema.addColumn(HiveUtilities.getColumnMetadata(replaceOverriddenColumnId(parameters, column.getName()), dataTypeConverter.convertToNullableRelDataType(column))));
MapRDBFormatPluginConfig formatConfig = new MapRDBFormatPluginConfig();
formatConfig.readTimestampWithZoneOffset = settings.getOptions().getBoolean(ExecConstants.HIVE_READ_MAPRDB_JSON_TIMESTAMP_WITH_TIMEZONE_OFFSET);
formatConfig.allTextMode = settings.getOptions().getBoolean(ExecConstants.HIVE_MAPRDB_JSON_ALL_TEXT_MODE);
JsonTableGroupScan nativeMapRDBScan = new JsonTableGroupScan(hiveScan.getUserName(), hiveScan.getStoragePlugin(), // TODO: We should use Hive format plugins here, once it will be implemented. DRILL-6621
(MapRDBFormatPlugin) hiveScan.getStoragePlugin().getFormatPlugin(formatConfig), scanSpec, hiveScanCols, new MapRDBStatistics(), FileSystemMetadataProviderManager.getMetadataProviderForSchema(schema));
List<String> nativeScanColNames = hiveScanRel.getRowType().getFieldList().stream().map(field -> replaceOverriddenColumnId(parameters, field.getName())).collect(Collectors.toList());
List<RelDataType> nativeScanColTypes = hiveScanRel.getRowType().getFieldList().stream().map(RelDataTypeField::getType).collect(Collectors.toList());
RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeMapRDBScan, nativeScanRowType, hiveScanCols);
}
Aggregations