use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class ExecMapper method configure.
@Override
public void configure(JobConf job) {
execContext = new ExecMapperContext(job);
// Allocate the bean at the beginning -
try {
l4j.info("conf classpath = " + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
l4j.info("thread classpath = " + Arrays.asList(((URLClassLoader) Thread.currentThread().getContextClassLoader()).getURLs()));
} catch (Exception e) {
l4j.info("cannot get classpath: " + e.getMessage());
}
setDone(false);
try {
jc = job;
execContext.setJc(jc);
// create map and fetch operators
MapWork mrwork = Utilities.getMapWork(job);
CompilationOpContext runtimeCtx = new CompilationOpContext();
if (mrwork.getVectorMode()) {
mo = new VectorMapOperator(runtimeCtx);
} else {
mo = new MapOperator(runtimeCtx);
}
mo.setConf(mrwork);
// initialize map operator
mo.initialize(job, null);
mo.setChildren(job);
l4j.info(mo.dump(0));
// initialize map local work
localWork = mrwork.getMapRedLocalWork();
execContext.setLocalWork(localWork);
MapredContext.init(true, new JobConf(jc));
mo.passExecContext(execContext);
mo.initializeLocalWork(jc);
mo.initializeMapOperator(jc);
if (localWork == null) {
return;
}
//The following code is for mapjoin
//initialize all the dummy ops
l4j.info("Initializing dummy operator");
List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
dummyOp.passExecContext(execContext);
dummyOp.initialize(jc, null);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Map operator initialization failed", e);
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HashTableLoader method loadDirectly.
private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
MapredLocalWork localWork = context.getLocalWork();
List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
if (directWorks == null || directWorks.isEmpty()) {
return;
}
JobConf job = new JobConf(hconf);
MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
for (Operator<?> operator : directWorks) {
if (operator != null) {
operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
}
}
localTask.setExecContext(context);
localTask.startForward(inputFileName);
MapJoinTableContainer[] tables = sink.getMapJoinTables();
for (int i = 0; i < sink.getNumParent(); i++) {
if (sink.getParentOperators().get(i) != null) {
mapJoinTables[i] = tables[i];
}
}
Arrays.fill(tables, null);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveOutputFormatImpl method checkOutputSpecs.
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
MapredWork work = Utilities.getMapRedWork(job);
List<Operator<?>> opList = work.getAllOperators();
for (Operator<?> op : opList) {
if (op instanceof FileSinkOperator) {
((FileSinkOperator) op).checkOutputSpecs(ignored, job);
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB2.
private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
colOutputName = gbInfo.outputColNames.get(i);
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsPosition = -1;
if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
groupingSetsPosition = gbKeys.size();
ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
gbKeys.add(grpSetColExpr);
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
;
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, grpSetColExpr);
}
// 2. Add UDAF
UDAFAttrs udafAttr;
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
outputColNames.add(colOutputName);
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
}
Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp2.setColumnExprMap(colExprMap);
// TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveOpConverter method genJoin.
private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
// 1. Extract join type
JoinCondDesc[] joinCondns;
boolean semiJoin;
boolean noOuterJoin;
if (join instanceof HiveMultiJoin) {
HiveMultiJoin hmj = (HiveMultiJoin) join;
joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
}
semiJoin = false;
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
semiJoin = join instanceof SemiJoin;
JoinType joinType;
if (semiJoin) {
joinType = JoinType.LEFTSEMI;
} else {
joinType = extractJoinType((Join) join);
}
joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
}
// 2. We create the join aux structures
ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
Operator<?>[] childOps = new Operator[children.size()];
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
int outputPos = 0;
for (int pos = 0; pos < children.size(); pos++) {
// 2.1. Backtracking from RS
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
if (inputRS.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = inputRS.getParentOperators().get(0);
ReduceSinkDesc rsDesc = inputRS.getConf();
int[] index = inputRS.getValueIndex();
Byte tag = (byte) rsDesc.getTag();
// 2.1.1. If semijoin...
if (semiJoin && pos != 0) {
exprMap.put(tag, new ArrayList<ExprNodeDesc>());
childOps[pos] = inputRS;
continue;
}
posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
for (int i = 0; i < index.length; i++) {
ColumnInfo info = new ColumnInfo(parentColumns.get(i));
info.setInternalName(outputColumnNames.get(outputPos));
info.setTabAlias(tabAlias);
outputColumns.add(info);
reversedExprs.put(outputColumnNames.get(outputPos), tag);
outputPos++;
}
exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
colExprMap.putAll(descriptors);
childOps[pos] = inputRS;
}
// 3. We populate the filters and filterMap structure needed in the join descriptor
List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
int[][] filterMap = new int[children.size()][];
for (int i = 0; i < children.size(); i++) {
filtersPerInput.add(new ArrayList<ExprNodeDesc>());
}
// 3. We populate the filters structure
for (int i = 0; i < filterExpressions.size(); i++) {
int leftPos = joinCondns[i].getLeft();
int rightPos = joinCondns[i].getRight();
for (ExprNodeDesc expr : filterExpressions.get(i)) {
// We need to update the exprNode, as currently
// they refer to columns in the output of the join;
// they should refer to the columns output by the RS
int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
if (inputPos == -1) {
inputPos = leftPos;
}
filtersPerInput.get(inputPos).add(expr);
if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
if (inputPos == leftPos) {
updateFilterMap(filterMap, leftPos, rightPos);
} else {
updateFilterMap(filterMap, rightPos, leftPos);
}
}
}
}
for (int pos = 0; pos < children.size(); pos++) {
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
ReduceSinkDesc rsDesc = inputRS.getConf();
Byte tag = (byte) rsDesc.getTag();
filters.put(tag, filtersPerInput.get(pos));
}
// 4. We create the join operator with its descriptor
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(filterMap);
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
joinOp.getConf().setBaseSrc(baseSrc);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
}
return joinOp;
}
Aggregations