Search in sources :

Example 6 with RelOptHiveTable

use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.

the class HiveRelFieldTrimmer method fetchColStats.

private void fetchColStats(RelNode key, TableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final List<Integer> iRefSet = Lists.newArrayList();
    if (key instanceof Project) {
        final Project project = (Project) key;
        for (RexNode rx : project.getChildExps()) {
            iRefSet.addAll(HiveCalciteUtil.getInputRefs(rx));
        }
    } else {
        final int fieldCount = tableAccessRel.getRowType().getFieldCount();
        if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) {
            // get all cols
            iRefSet.addAll(ImmutableBitSet.range(fieldCount).asList());
        }
    }
    //Remove any virtual cols
    if (tableAccessRel instanceof HiveTableScan) {
        iRefSet.removeAll(((HiveTableScan) tableAccessRel).getPartOrVirtualCols());
    }
    if (!iRefSet.isEmpty()) {
        final RelOptTable table = tableAccessRel.getTable();
        if (table instanceof RelOptHiveTable) {
            ((RelOptHiveTable) table).getColStat(iRefSet, true);
            LOG.debug("Got col stats for {} in {}", iRefSet, tableAccessRel.getTable().getQualifiedName());
        }
    }
}
Also used : Project(org.apache.calcite.rel.core.Project) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) RelOptTable(org.apache.calcite.plan.RelOptTable) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) RexNode(org.apache.calcite.rex.RexNode)

Example 7 with RelOptHiveTable

use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.

the class HiveMaterializedViewsRegistry method createTableScan.

private static RelNode createTableScan(Table viewTable) {
    // 0. Recreate cluster
    final RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(null);
    final RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl());
    final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
    // 1. Create column schema
    final RowResolver rr = new RowResolver();
    // 1.1 Add Column info for non partion cols (Object Inspector fields)
    StructObjectInspector rowObjectInspector;
    try {
        rowObjectInspector = (StructObjectInspector) viewTable.getDeserializer().getObjectInspector();
    } catch (SerDeException e) {
        // Bail out
        return null;
    }
    List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
    ColumnInfo colInfo;
    String colName;
    ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
    for (int i = 0; i < fields.size(); i++) {
        colName = fields.get(i).getFieldName();
        colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), null, false);
        rr.put(null, colName, colInfo);
        cInfoLst.add(colInfo);
    }
    ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
    // 1.2 Add column info corresponding to partition columns
    ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
    for (FieldSchema part_col : viewTable.getPartCols()) {
        colName = part_col.getName();
        colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true);
        rr.put(null, colName, colInfo);
        cInfoLst.add(colInfo);
        partitionColumns.add(colInfo);
    }
    // 1.3 Build row type from field <type, name>
    RelDataType rowType;
    try {
        rowType = TypeConverter.getType(cluster, rr, null);
    } catch (CalciteSemanticException e) {
        // Bail out
        return null;
    }
    // 2. Build RelOptAbstractTable
    String fullyQualifiedTabName = viewTable.getDbName();
    if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) {
        fullyQualifiedTabName = fullyQualifiedTabName + "." + viewTable.getTableName();
    } else {
        fullyQualifiedTabName = viewTable.getTableName();
    }
    RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<VirtualColumn>(), SessionState.get().getConf(), new HashMap<String, PrunedPartitionList>(), new AtomicInteger());
    RelNode tableRel;
    // 3. Build operator
    if (obtainTableType(viewTable) == TableType.DRUID) {
        // Build Druid query
        String address = HiveConf.getVar(SessionState.get().getConf(), HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
        String dataSource = viewTable.getParameters().get(Constants.DRUID_DATA_SOURCE);
        Set<String> metrics = new HashSet<>();
        List<RelDataType> druidColTypes = new ArrayList<>();
        List<String> druidColNames = new ArrayList<>();
        for (RelDataTypeField field : rowType.getFieldList()) {
            druidColTypes.add(field.getType());
            druidColNames.add(field.getName());
            if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
                // timestamp
                continue;
            }
            if (field.getType().getSqlTypeName() == SqlTypeName.VARCHAR) {
                // dimension
                continue;
            }
            metrics.add(field.getName());
        }
        List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
        DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals);
        final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
        tableRel = DruidQuery.create(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, druidTable, ImmutableList.<RelNode>of(scan));
    } else {
        // Build Hive Table Scan Rel
        tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false);
    }
    return tableRel;
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) DruidTable(org.apache.calcite.adapter.druid.DruidTable) RelDataType(org.apache.calcite.rel.type.RelDataType) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) RelOptPlanner(org.apache.calcite.plan.RelOptPlanner) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) JavaTypeFactoryImpl(org.apache.calcite.jdbc.JavaTypeFactoryImpl) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HashSet(java.util.HashSet) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) TableScan(org.apache.calcite.rel.core.TableScan) DruidSchema(org.apache.calcite.adapter.druid.DruidSchema) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Interval(org.joda.time.Interval)

Example 8 with RelOptHiveTable

use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.

the class HiveExpandDistinctAggregatesRule method onMatch.

//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    final Aggregate aggregate = call.rel(0);
    int numCountDistinct = getNumCountDistinctCall(aggregate);
    if (numCountDistinct == 0) {
        return;
    }
    // Find all of the agg expressions. We use a List (for all count(distinct))
    // as well as a Set (for all others) to ensure determinism.
    int nonDistinctCount = 0;
    List<List<Integer>> argListList = new ArrayList<List<Integer>>();
    Set<List<Integer>> argListSets = new LinkedHashSet<List<Integer>>();
    Set<Integer> positions = new HashSet<>();
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        if (!aggCall.isDistinct()) {
            ++nonDistinctCount;
            continue;
        }
        ArrayList<Integer> argList = new ArrayList<Integer>();
        for (Integer arg : aggCall.getArgList()) {
            argList.add(arg);
            positions.add(arg);
        }
        // Aggr checks for sorted argList.
        argListList.add(argList);
        argListSets.add(argList);
    }
    Util.permAssert(argListSets.size() > 0, "containsDistinctCall lied");
    if (numCountDistinct > 1 && numCountDistinct == aggregate.getAggCallList().size() && aggregate.getGroupSet().isEmpty()) {
        LOG.debug("Trigger countDistinct rewrite. numCountDistinct is " + numCountDistinct);
        // now positions contains all the distinct positions, i.e., $5, $4, $6
        // we need to first sort them as group by set
        // and then get their position later, i.e., $4->1, $5->2, $6->3
        cluster = aggregate.getCluster();
        rexBuilder = cluster.getRexBuilder();
        RelNode converted = null;
        List<Integer> sourceOfForCountDistinct = new ArrayList<>();
        sourceOfForCountDistinct.addAll(positions);
        Collections.sort(sourceOfForCountDistinct);
        try {
            converted = convert(aggregate, argListList, sourceOfForCountDistinct);
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
        call.transformTo(converted);
        return;
    }
    // arguments then we can use a more efficient form.
    if ((nonDistinctCount == 0) && (argListSets.size() == 1)) {
        for (Integer arg : argListSets.iterator().next()) {
            Set<RelColumnOrigin> colOrigs = RelMetadataQuery.instance().getColumnOrigins(aggregate, arg);
            if (null != colOrigs) {
                for (RelColumnOrigin colOrig : colOrigs) {
                    RelOptHiveTable hiveTbl = (RelOptHiveTable) colOrig.getOriginTable();
                    if (hiveTbl.getPartColInfoMap().containsKey(colOrig.getOriginColumnOrdinal())) {
                        // Encountered partitioning column, this will be better handled by MetadataOnly optimizer.
                        return;
                    }
                }
            }
        }
        RelNode converted = convertMonopole(aggregate, argListSets.iterator().next());
        call.transformTo(converted);
        return;
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RelColumnOrigin(org.apache.calcite.rel.metadata.RelColumnOrigin) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) Aggregate(org.apache.calcite.rel.core.Aggregate) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 9 with RelOptHiveTable

use of org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable in project hive by apache.

the class HivePartitionPruneRule method perform.

protected void perform(RelOptRuleCall call, Filter filter, HiveTableScan tScan) {
    RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
    RexNode predicate = filter.getCondition();
    Pair<RexNode, RexNode> predicates = PartitionPrune.extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
    RexNode partColExpr = predicates.left;
    hiveTable.computePartitionList(conf, partColExpr, tScan.getPartOrVirtualCols());
}
Also used : RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

RelOptHiveTable (org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable)9 HiveTableScan (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan)4 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 ImmutableList (com.google.common.collect.ImmutableList)2 RelNode (org.apache.calcite.rel.RelNode)2 Project (org.apache.calcite.rel.core.Project)2 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)2 RexNode (org.apache.calcite.rex.RexNode)2 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)2 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)2 HiveRelNode (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode)2 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 DruidQuery (org.apache.calcite.adapter.druid.DruidQuery)1 DruidSchema (org.apache.calcite.adapter.druid.DruidSchema)1 DruidTable (org.apache.calcite.adapter.druid.DruidTable)1 JavaTypeFactoryImpl (org.apache.calcite.jdbc.JavaTypeFactoryImpl)1 RelOptCluster (org.apache.calcite.plan.RelOptCluster)1