use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.
the class HiveRelFieldTrimmer method fetchColStats.
private void fetchColStats(RelNode key, TableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
final List<Integer> iRefSet = Lists.newArrayList();
if (key instanceof Project) {
final Project project = (Project) key;
for (RexNode rx : project.getChildExps()) {
iRefSet.addAll(HiveCalciteUtil.getInputRefs(rx));
}
} else {
final int fieldCount = tableAccessRel.getRowType().getFieldCount();
if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) {
// get all cols
iRefSet.addAll(ImmutableBitSet.range(fieldCount).asList());
}
}
//Remove any virtual cols
if (tableAccessRel instanceof HiveTableScan) {
iRefSet.removeAll(((HiveTableScan) tableAccessRel).getPartOrVirtualCols());
}
if (!iRefSet.isEmpty()) {
final RelOptTable table = tableAccessRel.getTable();
if (table instanceof RelOptHiveTable) {
((RelOptHiveTable) table).getColStat(iRefSet, true);
LOG.debug("Got col stats for {} in {}", iRefSet, tableAccessRel.getTable().getQualifiedName());
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.
the class HiveMaterializedViewsRegistry method createTableScan.
private static RelNode createTableScan(Table viewTable) {
// 0. Recreate cluster
final RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(null);
final RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl());
final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
// 1. Create column schema
final RowResolver rr = new RowResolver();
// 1.1 Add Column info for non partion cols (Object Inspector fields)
StructObjectInspector rowObjectInspector;
try {
rowObjectInspector = (StructObjectInspector) viewTable.getDeserializer().getObjectInspector();
} catch (SerDeException e) {
// Bail out
return null;
}
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
ColumnInfo colInfo;
String colName;
ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
for (int i = 0; i < fields.size(); i++) {
colName = fields.get(i).getFieldName();
colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), null, false);
rr.put(null, colName, colInfo);
cInfoLst.add(colInfo);
}
ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
// 1.2 Add column info corresponding to partition columns
ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
for (FieldSchema part_col : viewTable.getPartCols()) {
colName = part_col.getName();
colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true);
rr.put(null, colName, colInfo);
cInfoLst.add(colInfo);
partitionColumns.add(colInfo);
}
// 1.3 Build row type from field <type, name>
RelDataType rowType;
try {
rowType = TypeConverter.getType(cluster, rr, null);
} catch (CalciteSemanticException e) {
// Bail out
return null;
}
// 2. Build RelOptAbstractTable
String fullyQualifiedTabName = viewTable.getDbName();
if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) {
fullyQualifiedTabName = fullyQualifiedTabName + "." + viewTable.getTableName();
} else {
fullyQualifiedTabName = viewTable.getTableName();
}
RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<VirtualColumn>(), SessionState.get().getConf(), new HashMap<String, PrunedPartitionList>(), new AtomicInteger());
RelNode tableRel;
// 3. Build operator
if (obtainTableType(viewTable) == TableType.DRUID) {
// Build Druid query
String address = HiveConf.getVar(SessionState.get().getConf(), HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
String dataSource = viewTable.getParameters().get(Constants.DRUID_DATA_SOURCE);
Set<String> metrics = new HashSet<>();
List<RelDataType> druidColTypes = new ArrayList<>();
List<String> druidColNames = new ArrayList<>();
for (RelDataTypeField field : rowType.getFieldList()) {
druidColTypes.add(field.getType());
druidColNames.add(field.getName());
if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
// timestamp
continue;
}
if (field.getType().getSqlTypeName() == SqlTypeName.VARCHAR) {
// dimension
continue;
}
metrics.add(field.getName());
}
List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals);
final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
tableRel = DruidQuery.create(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, druidTable, ImmutableList.<RelNode>of(scan));
} else {
// Build Hive Table Scan Rel
tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
}
return tableRel;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.
the class HiveExpandDistinctAggregatesRule method onMatch.
//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final Aggregate aggregate = call.rel(0);
int numCountDistinct = getNumCountDistinctCall(aggregate);
if (numCountDistinct == 0) {
return;
}
// Find all of the agg expressions. We use a List (for all count(distinct))
// as well as a Set (for all others) to ensure determinism.
int nonDistinctCount = 0;
List<List<Integer>> argListList = new ArrayList<List<Integer>>();
Set<List<Integer>> argListSets = new LinkedHashSet<List<Integer>>();
Set<Integer> positions = new HashSet<>();
for (AggregateCall aggCall : aggregate.getAggCallList()) {
if (!aggCall.isDistinct()) {
++nonDistinctCount;
continue;
}
ArrayList<Integer> argList = new ArrayList<Integer>();
for (Integer arg : aggCall.getArgList()) {
argList.add(arg);
positions.add(arg);
}
// Aggr checks for sorted argList.
argListList.add(argList);
argListSets.add(argList);
}
Util.permAssert(argListSets.size() > 0, "containsDistinctCall lied");
if (numCountDistinct > 1 && numCountDistinct == aggregate.getAggCallList().size() && aggregate.getGroupSet().isEmpty()) {
LOG.debug("Trigger countDistinct rewrite. numCountDistinct is " + numCountDistinct);
// now positions contains all the distinct positions, i.e., $5, $4, $6
// we need to first sort them as group by set
// and then get their position later, i.e., $4->1, $5->2, $6->3
cluster = aggregate.getCluster();
rexBuilder = cluster.getRexBuilder();
RelNode converted = null;
List<Integer> sourceOfForCountDistinct = new ArrayList<>();
sourceOfForCountDistinct.addAll(positions);
Collections.sort(sourceOfForCountDistinct);
try {
converted = convert(aggregate, argListList, sourceOfForCountDistinct);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
call.transformTo(converted);
return;
}
// arguments then we can use a more efficient form.
if ((nonDistinctCount == 0) && (argListSets.size() == 1)) {
for (Integer arg : argListSets.iterator().next()) {
Set<RelColumnOrigin> colOrigs = RelMetadataQuery.instance().getColumnOrigins(aggregate, arg);
if (null != colOrigs) {
for (RelColumnOrigin colOrig : colOrigs) {
RelOptHiveTable hiveTbl = (RelOptHiveTable) colOrig.getOriginTable();
if (hiveTbl.getPartColInfoMap().containsKey(colOrig.getOriginColumnOrdinal())) {
// Encountered partitioning column, this will be better handled by MetadataOnly optimizer.
return;
}
}
}
}
RelNode converted = convertMonopole(aggregate, argListSets.iterator().next());
call.transformTo(converted);
return;
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.
the class HivePartitionPruneRule method perform.
protected void perform(RelOptRuleCall call, Filter filter, HiveTableScan tScan) {
RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
RexNode predicate = filter.getCondition();
Pair<RexNode, RexNode> predicates = PartitionPrune.extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
RexNode partColExpr = predicates.left;
hiveTable.computePartitionList(conf, partColExpr, tScan.getPartOrVirtualCols());
}
Aggregations