use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class MapReduceCompiler method setInputFormat.
// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof ExecDriver) {
MapWork work = ((MapredWork) task.getWork()).getMapWork();
HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(work, op);
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class MapWork method checkVectorizerSupportedTypes.
private boolean checkVectorizerSupportedTypes(boolean hasLlap) {
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : aliasToWork.entrySet()) {
final String alias = entry.getKey();
Operator<? extends OperatorDesc> op = entry.getValue();
PartitionDesc partitionDesc = aliasToPartnInfo.get(alias);
if (op instanceof TableScanOperator && partitionDesc != null && partitionDesc.getTableDesc() != null) {
final TableScanOperator tsOp = (TableScanOperator) op;
final List<String> readColumnNames = tsOp.getNeededColumns();
final Properties props = partitionDesc.getTableDesc().getProperties();
final List<TypeInfo> typeInfos = TypeInfoUtils.getTypeInfosFromTypeString(props.getProperty(serdeConstants.LIST_COLUMN_TYPES));
final List<String> allColumnTypes = TypeInfoUtils.getTypeStringsFromTypeInfo(typeInfos);
final List<String> allColumnNames = Utilities.getColumnNames(props);
hasLlap = Utilities.checkVectorizerSupportedTypes(readColumnNames, allColumnNames, allColumnTypes);
}
}
return hasLlap;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class DriverContext method finished.
public void finished(TaskRunner runner) {
if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
return;
}
MapRedTask mapredTask = (MapRedTask) runner.getTask();
MapWork mapWork = mapredTask.getWork().getMapWork();
ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
if (reduceWork != null) {
operators.add(reduceWork.getReducer());
}
final List<String> statKeys = new ArrayList<String>(1);
NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {
@Override
public void apply(FileSinkOperator fsOp) {
if (fsOp.getConf().isGatherStats()) {
statKeys.add(fsOp.getConf().getStatsAggPrefix());
}
}
});
for (String statKey : statKeys) {
statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genJoinReduceSinkChild.
@SuppressWarnings("nls")
private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, Operator<?> child, String[] srcs, int tag) throws SemanticException {
// dummy for backtracking
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(child));
RowResolver inputRR = opParseCtx.get(child).getRowResolver();
RowResolver outputRR = new RowResolver();
ArrayList<String> outputColumns = new ArrayList<String>();
ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
// Compute join keys and store in reduceKeys
for (ExprNodeDesc joinKey : joinKeys) {
reduceKeys.add(joinKey);
reduceKeysBack.add(ExprNodeDescUtils.backtrack(joinKey, dummy, child));
}
// Walk over the input row resolver and copy in the output
ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ColumnInfo> columns = inputRR.getColumnInfos();
int[] index = new int[columns.size()];
for (int i = 0; i < columns.size(); i++) {
ColumnInfo colInfo = columns.get(i);
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo);
// backtrack can be null when input is script operator
ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child);
int kindex;
if (exprBack == null) {
kindex = -1;
} else if (ExprNodeDescUtils.isConstant(exprBack)) {
kindex = reduceKeysBack.indexOf(exprBack);
} else {
kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
}
if (kindex >= 0) {
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
newColInfo.setTabAlias(nm[0]);
outputRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
index[i] = kindex;
continue;
}
index[i] = -reduceValues.size() - 1;
String outputColName = getColumnInternalName(reduceValues.size());
reduceValues.add(expr);
reduceValuesBack.add(exprBack);
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
newColInfo.setTabAlias(nm[0]);
outputRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
outputColumns.add(outputColName);
}
dummy.setParentOperators(null);
int numReds = -1;
// Use only 1 reducer in case of cartesian product
if (reduceKeys.size() == 0) {
numReds = 1;
String error = StrictChecks.checkCartesian(conf);
if (error != null) {
throw new SemanticException(error);
}
}
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, tag, reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID);
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), child), outputRR);
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
for (int i = 0; i < keyColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
}
List<String> valColNames = rsDesc.getOutputValueColumnNames();
for (int i = 0; i < valColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
}
rsOp.setValueIndex(index);
rsOp.setColumnExprMap(colExprMap);
rsOp.setInputAliases(srcs);
return rsOp;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genLimitPlan.
@SuppressWarnings("nls")
private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
// job to do the same to avoid the cost of sorting in the map-reduce phase.
// A better approach would be to
// write into a local file and then have a map-only job.
// Add the limit operator to get the value fields
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
LimitDesc limitDesc = new LimitDesc(offset, limit);
globalLimitCtx.setLastReduceLimitDesc(limitDesc);
Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + inputRR.toString());
}
return limitMap;
}
Aggregations