Search in sources :

Example 6 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class MapReduceCompiler method setInputFormat.

// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof ExecDriver) {
        MapWork work = ((MapredWork) task.getWork()).getMapWork();
        HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
        if (!opMap.isEmpty()) {
            for (Operator<? extends OperatorDesc> op : opMap.values()) {
                setInputFormat(work, op);
            }
        }
    } else if (task instanceof ConditionalTask) {
        List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
        for (Task<? extends Serializable> tsk : listTasks) {
            setInputFormat(tsk);
        }
    }
    if (task.getChildTasks() != null) {
        for (Task<? extends Serializable> childTask : task.getChildTasks()) {
            setInputFormat(childTask);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) List(java.util.List) ArrayList(java.util.ArrayList) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 7 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class MapWork method checkVectorizerSupportedTypes.

private boolean checkVectorizerSupportedTypes(boolean hasLlap) {
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : aliasToWork.entrySet()) {
        final String alias = entry.getKey();
        Operator<? extends OperatorDesc> op = entry.getValue();
        PartitionDesc partitionDesc = aliasToPartnInfo.get(alias);
        if (op instanceof TableScanOperator && partitionDesc != null && partitionDesc.getTableDesc() != null) {
            final TableScanOperator tsOp = (TableScanOperator) op;
            final List<String> readColumnNames = tsOp.getNeededColumns();
            final Properties props = partitionDesc.getTableDesc().getProperties();
            final List<TypeInfo> typeInfos = TypeInfoUtils.getTypeInfosFromTypeString(props.getProperty(serdeConstants.LIST_COLUMN_TYPES));
            final List<String> allColumnTypes = TypeInfoUtils.getTypeStringsFromTypeInfo(typeInfos);
            final List<String> allColumnNames = Utilities.getColumnNames(props);
            hasLlap = Utilities.checkVectorizerSupportedTypes(readColumnNames, allColumnNames, allColumnTypes);
        }
    }
    return hasLlap;
}
Also used : FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Properties(java.util.Properties) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 8 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class DriverContext method finished.

public void finished(TaskRunner runner) {
    if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
        return;
    }
    MapRedTask mapredTask = (MapRedTask) runner.getTask();
    MapWork mapWork = mapredTask.getWork().getMapWork();
    ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
    List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
    if (reduceWork != null) {
        operators.add(reduceWork.getReducer());
    }
    final List<String> statKeys = new ArrayList<String>(1);
    NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {

        @Override
        public void apply(FileSinkOperator fsOp) {
            if (fsOp.getConf().isGatherStats()) {
                statKeys.add(fsOp.getConf().getStatsAggPrefix());
            }
        }
    });
    for (String statKey : statKeys) {
        statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork)

Example 9 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SemanticAnalyzer method genJoinReduceSinkChild.

@SuppressWarnings("nls")
private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, Operator<?> child, String[] srcs, int tag) throws SemanticException {
    // dummy for backtracking
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(child));
    RowResolver inputRR = opParseCtx.get(child).getRowResolver();
    RowResolver outputRR = new RowResolver();
    ArrayList<String> outputColumns = new ArrayList<String>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
    // Compute join keys and store in reduceKeys
    for (ExprNodeDesc joinKey : joinKeys) {
        reduceKeys.add(joinKey);
        reduceKeysBack.add(ExprNodeDescUtils.backtrack(joinKey, dummy, child));
    }
    // Walk over the input row resolver and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    int[] index = new int[columns.size()];
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo colInfo = columns.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child);
        int kindex;
        if (exprBack == null) {
            kindex = -1;
        } else if (ExprNodeDescUtils.isConstant(exprBack)) {
            kindex = reduceKeysBack.indexOf(exprBack);
        } else {
            kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
        }
        if (kindex >= 0) {
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            outputRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            index[i] = kindex;
            continue;
        }
        index[i] = -reduceValues.size() - 1;
        String outputColName = getColumnInternalName(reduceValues.size());
        reduceValues.add(expr);
        reduceValuesBack.add(exprBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        outputRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    int numReds = -1;
    // Use only 1 reducer in case of cartesian product
    if (reduceKeys.size() == 0) {
        numReds = 1;
        String error = StrictChecks.checkCartesian(conf);
        if (error != null) {
            throw new SemanticException(error);
        }
    }
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, tag, reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), child), outputRR);
    List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
    }
    List<String> valColNames = rsDesc.getOutputValueColumnNames();
    for (int i = 0; i < valColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
    }
    rsOp.setValueIndex(index);
    rsOp.setColumnExprMap(colExprMap);
    rsOp.setInputAliases(srcs);
    return rsOp;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 10 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class SemanticAnalyzer method genLimitPlan.

@SuppressWarnings("nls")
private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException {
    // A map-only job can be optimized - instead of converting it to a
    // map-reduce job, we can have another map
    // job to do the same to avoid the cost of sorting in the map-reduce phase.
    // A better approach would be to
    // write into a local file and then have a map-only job.
    // Add the limit operator to get the value fields
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    LimitDesc limitDesc = new LimitDesc(offset, limit);
    globalLimitCtx.setLastReduceLimitDesc(limitDesc);
    Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + inputRR.toString());
    }
    return limitMap;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc)

Aggregations

Operator (org.apache.hadoop.hive.ql.exec.Operator)215 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)167 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)156 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)134 ArrayList (java.util.ArrayList)123 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)119 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)118 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)107 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)103 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)97 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)85 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)85 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)79 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)71 HashMap (java.util.HashMap)65 LinkedHashMap (java.util.LinkedHashMap)64 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)62 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)60 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)59 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)52