use of org.apache.calcite.rex.RexBuilder in project hive by apache.
the class HiveRelMdRowCount method analyzeJoinForPKFK.
/*
* For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can
* infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is
* like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor
* based on the Selectivity of the PK/Dim table side.
*
* 1. If both T1.x and T2.y are keys then use the larger one as the PK side.
* 2. In case of outer Joins: a) The FK side should be the Null Preserving
* side. It doesn't make sense to apply this heuristic in case of Dim loj Fact
* or Fact roj Dim b) The selectivity factor applied on the Fact Table should
* be 1.
*/
public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery mq) {
RelNode left = joinRel.getInputs().get(0);
RelNode right = joinRel.getInputs().get(1);
final List<RexNode> initJoinFilters = RelOptUtil.conjunctions(joinRel.getCondition());
/*
* No joining condition.
*/
if (initJoinFilters.isEmpty()) {
return null;
}
List<RexNode> leftFilters = new ArrayList<RexNode>();
List<RexNode> rightFilters = new ArrayList<RexNode>();
List<RexNode> joinFilters = new ArrayList<RexNode>(initJoinFilters);
// column counts that is not true for semiJoins.
if (joinRel instanceof SemiJoin) {
return null;
}
RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(), false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel.getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters);
Pair<Integer, Integer> joinCols = canHandleJoin(joinRel, leftFilters, rightFilters, joinFilters);
if (joinCols == null) {
return null;
}
int leftColIdx = joinCols.left;
int rightColIdx = joinCols.right;
RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
RexNode leftPred = RexUtil.composeConjunction(rexBuilder, leftFilters, true);
RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, true);
ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx);
ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx);
/*
* If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return
* null.
*/
boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.RIGHT) && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left, mq);
boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right, mq);
if (!leftIsKey && !rightIsKey) {
return null;
}
double leftRowCount = mq.getRowCount(left);
double rightRowCount = mq.getRowCount(right);
if (leftIsKey && rightIsKey) {
if (rightRowCount < leftRowCount) {
leftIsKey = false;
}
}
int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1;
boolean isPKSideSimpleTree = pkSide != -1 ? IsSimpleTreeOnJoinKey.check(pkSide == 0 ? left : right, pkSide == 0 ? leftColIdx : rightColIdx, mq) : false;
double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1;
double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1;
/*
* If the ndv of the PK - FK side don't match, and the PK side is a filter
* on the Key column then scale the NDV on the FK side.
*
* As described by Peter Boncz: http://databasearchitects.blogspot.com/
* in such cases we can be off by a large margin in the Join cardinality
* estimate. The e.g. he provides is on the join of StoreSales and DateDim
* on the TPCDS dataset. Since the DateDim is populated for 20 years into
* the future, while the StoreSales only has 5 years worth of data, there
* are 40 times fewer distinct dates in StoreSales.
*
* In general it is hard to infer the range for the foreign key on an
* arbitrary expression. For e.g. the NDV for DayofWeek is the same
* irrespective of NDV on the number of unique days, whereas the
* NDV of Quarters has the same ratio as the NDV on the keys.
*
* But for expressions that apply only on columns that have the same NDV
* as the key (implying that they are alternate keys) we can apply the
* ratio. So in the case of StoreSales - DateDim joins for predicate on the
* d_date column we can apply the scaling factor.
*/
double ndvScalingFactor = 1.0;
if (isPKSideSimpleTree) {
ndvScalingFactor = pkSide == 0 ? leftNDV / rightNDV : rightNDV / leftNDV;
}
if (pkSide == 0) {
FKSideInfo fkInfo = new FKSideInfo(rightRowCount, rightNDV);
double pkSelectivity = pkSelectivity(joinRel, mq, true, left, leftRowCount);
PKSideInfo pkInfo = new PKSideInfo(leftRowCount, leftNDV, joinRel.getJoinType().generatesNullsOnRight() ? 1.0 : pkSelectivity);
return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
}
if (pkSide == 1) {
FKSideInfo fkInfo = new FKSideInfo(leftRowCount, leftNDV);
double pkSelectivity = pkSelectivity(joinRel, mq, false, right, rightRowCount);
PKSideInfo pkInfo = new PKSideInfo(rightRowCount, rightNDV, joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : pkSelectivity);
return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
}
return null;
}
use of org.apache.calcite.rex.RexBuilder in project hive by apache.
the class HiveRelFieldTrimmer method project.
private static RelNode project(DruidQuery dq, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields, RelBuilder relBuilder) {
final int fieldCount = dq.getRowType().getFieldCount();
if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) {
return dq;
}
final List<RexNode> exprList = new ArrayList<>();
final List<String> nameList = new ArrayList<>();
final RexBuilder rexBuilder = dq.getCluster().getRexBuilder();
final List<RelDataTypeField> fields = dq.getRowType().getFieldList();
// Project the subset of fields.
for (int i : fieldsUsed) {
RelDataTypeField field = fields.get(i);
exprList.add(rexBuilder.makeInputRef(dq, i));
nameList.add(field.getName());
}
// extra fields, but we don't.)
for (RelDataTypeField extraField : extraFields) {
exprList.add(rexBuilder.ensureType(extraField.getType(), rexBuilder.constantNull(), true));
nameList.add(extraField.getName());
}
HiveProject hp = (HiveProject) relBuilder.push(dq).project(exprList, nameList).build();
hp.setSynthetic();
return hp;
}
use of org.apache.calcite.rex.RexBuilder in project hive by apache.
the class HiveMaterializedViewsRegistry method createTableScan.
private static RelNode createTableScan(Table viewTable) {
// 0. Recreate cluster
final RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(null);
final RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl());
final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
// 1. Create column schema
final RowResolver rr = new RowResolver();
// 1.1 Add Column info for non partion cols (Object Inspector fields)
StructObjectInspector rowObjectInspector;
try {
rowObjectInspector = (StructObjectInspector) viewTable.getDeserializer().getObjectInspector();
} catch (SerDeException e) {
// Bail out
return null;
}
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
ColumnInfo colInfo;
String colName;
ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
for (int i = 0; i < fields.size(); i++) {
colName = fields.get(i).getFieldName();
colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), null, false);
rr.put(null, colName, colInfo);
cInfoLst.add(colInfo);
}
ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
// 1.2 Add column info corresponding to partition columns
ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
for (FieldSchema part_col : viewTable.getPartCols()) {
colName = part_col.getName();
colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true);
rr.put(null, colName, colInfo);
cInfoLst.add(colInfo);
partitionColumns.add(colInfo);
}
// 1.3 Build row type from field <type, name>
RelDataType rowType;
try {
rowType = TypeConverter.getType(cluster, rr, null);
} catch (CalciteSemanticException e) {
// Bail out
return null;
}
// 2. Build RelOptAbstractTable
String fullyQualifiedTabName = viewTable.getDbName();
if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) {
fullyQualifiedTabName = fullyQualifiedTabName + "." + viewTable.getTableName();
} else {
fullyQualifiedTabName = viewTable.getTableName();
}
RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<VirtualColumn>(), SessionState.get().getConf(), new HashMap<String, PrunedPartitionList>(), new AtomicInteger());
RelNode tableRel;
// 3. Build operator
if (obtainTableType(viewTable) == TableType.DRUID) {
// Build Druid query
String address = HiveConf.getVar(SessionState.get().getConf(), HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
String dataSource = viewTable.getParameters().get(Constants.DRUID_DATA_SOURCE);
Set<String> metrics = new HashSet<>();
List<RelDataType> druidColTypes = new ArrayList<>();
List<String> druidColNames = new ArrayList<>();
for (RelDataTypeField field : rowType.getFieldList()) {
druidColTypes.add(field.getType());
druidColNames.add(field.getName());
if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
// timestamp
continue;
}
if (field.getType().getSqlTypeName() == SqlTypeName.VARCHAR) {
// dimension
continue;
}
metrics.add(field.getName());
}
List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals);
final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
tableRel = DruidQuery.create(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, druidTable, ImmutableList.<RelNode>of(scan));
} else {
// Build Hive Table Scan Rel
tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
}
return tableRel;
}
use of org.apache.calcite.rex.RexBuilder in project hive by apache.
the class TestCBORuleFiredOnlyOnce method testRuleFiredOnlyOnce.
@Test
public void testRuleFiredOnlyOnce() {
HiveConf conf = new HiveConf();
// Create HepPlanner
HepProgramBuilder programBuilder = new HepProgramBuilder();
programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN);
programBuilder = programBuilder.addRuleCollection(ImmutableList.<RelOptRule>of(DummyRule.INSTANCE));
// Create rules registry to not trigger a rule more than once
HiveRulesRegistry registry = new HiveRulesRegistry();
HivePlannerContext context = new HivePlannerContext(null, registry, null);
HepPlanner planner = new HepPlanner(programBuilder.build(), context);
// Cluster
RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl());
RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
// Create MD provider
HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
List<RelMetadataProvider> list = Lists.newArrayList();
list.add(mdProvider.getMetadataProvider());
planner.registerMetadataProviders(list);
RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
final RelNode node = new DummyNode(cluster, cluster.traitSet());
node.getCluster().setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, planner));
planner.setRoot(node);
planner.findBestExp();
// Matches 3 times: 2 times the original node, 1 time the new node created by the rule
assertEquals(3, DummyRule.INSTANCE.numberMatches);
// It is fired only once: on the original node
assertEquals(1, DummyRule.INSTANCE.numberOnMatch);
}
use of org.apache.calcite.rex.RexBuilder in project hive by apache.
the class TypeConverter method getType.
/*********************** Convert Hive Types To Calcite Types ***********************/
public static RelDataType getType(RelOptCluster cluster, List<ColumnInfo> cInfoLst) throws CalciteSemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
List<RelDataType> fieldTypes = new LinkedList<RelDataType>();
List<String> fieldNames = new LinkedList<String>();
for (ColumnInfo ci : cInfoLst) {
fieldTypes.add(convert(ci.getType(), dtFactory));
fieldNames.add(ci.getInternalName());
}
return dtFactory.createStructType(fieldTypes, fieldNames);
}
Aggregations