use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class QueryPlanTreeTransformation method applyCorrelation.
/**
* Based on the correlation, we transform the query plan tree (operator tree).
* In here, we first create DemuxOperator and all bottom ReduceSinkOperators
* (bottom means near TableScanOperaotr) in the correlation will be be
* the parents of the DemuxOperaotr. We also reassign tags to those
* ReduceSinkOperators. Then, we use MuxOperators to replace ReduceSinkOperators
* which are not bottom ones in this correlation.
* Example: The original operator tree is ...
* JOIN2
* / \
* RS4 RS5
* / \
* GBY1 JOIN1
* | / \
* RS1 RS2 RS3
* If GBY1, JOIN1, and JOIN2 can be executed in the same reducer
* (optimized by Correlation Optimizer).
* The new operator tree will be ...
* JOIN2
* |
* MUX
* / \
* GBY1 JOIN1
* \ /
* DEMUX
* / | \
* / | \
* / | \
* RS1 RS2 RS3
* @param pCtx
* @param corrCtx
* @param correlation
* @throws SemanticException
*/
protected static void applyCorrelation(ParseContext pCtx, CorrelationNodeProcCtx corrCtx, IntraQueryCorrelation correlation) throws SemanticException {
final List<ReduceSinkOperator> bottomReduceSinkOperators = correlation.getBottomReduceSinkOperators();
final int numReducers = correlation.getNumReducers();
List<Operator<? extends OperatorDesc>> childrenOfDemux = new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> parentRSsOfDemux = new ArrayList<Operator<? extends OperatorDesc>>();
Map<Integer, Integer> childIndexToOriginalNumParents = new HashMap<Integer, Integer>();
List<TableDesc> keysSerializeInfos = new ArrayList<TableDesc>();
List<TableDesc> valuessSerializeInfos = new ArrayList<TableDesc>();
Map<ReduceSinkOperator, Integer> bottomRSToNewTag = new HashMap<ReduceSinkOperator, Integer>();
int newTag = 0;
CompilationOpContext opCtx = null;
for (ReduceSinkOperator rsop : bottomReduceSinkOperators) {
if (opCtx == null) {
opCtx = rsop.getCompilationOpContext();
}
rsop.getConf().setNumReducers(numReducers);
bottomRSToNewTag.put(rsop, newTag);
parentRSsOfDemux.add(rsop);
keysSerializeInfos.add(rsop.getConf().getKeySerializeInfo());
valuessSerializeInfos.add(rsop.getConf().getValueSerializeInfo());
Operator<? extends OperatorDesc> child = CorrelationUtilities.getSingleChild(rsop, true);
if (!childrenOfDemux.contains(child)) {
childrenOfDemux.add(child);
int childIndex = childrenOfDemux.size() - 1;
childIndexToOriginalNumParents.put(childIndex, child.getNumParent());
}
newTag++;
}
for (ReduceSinkOperator rsop : bottomReduceSinkOperators) {
setNewTag(correlation, childrenOfDemux, rsop, bottomRSToNewTag);
}
// Create the DemuxOperaotr
DemuxDesc demuxDesc = new DemuxDesc(correlation.getNewTagToOldTag(), correlation.getNewTagToChildIndex(), childIndexToOriginalNumParents, keysSerializeInfos, valuessSerializeInfos);
Operator<? extends OperatorDesc> demuxOp = OperatorFactory.get(opCtx, demuxDesc);
demuxOp.setChildOperators(childrenOfDemux);
demuxOp.setParentOperators(parentRSsOfDemux);
for (Operator<? extends OperatorDesc> child : childrenOfDemux) {
List<Operator<? extends OperatorDesc>> parentsWithMultipleDemux = new ArrayList<Operator<? extends OperatorDesc>>();
boolean hasBottomReduceSinkOperators = false;
boolean hasNonBottomReduceSinkOperators = false;
for (int i = 0; i < child.getParentOperators().size(); i++) {
Operator<? extends OperatorDesc> p = child.getParentOperators().get(i);
assert p instanceof ReduceSinkOperator;
ReduceSinkOperator rsop = (ReduceSinkOperator) p;
if (bottomReduceSinkOperators.contains(rsop)) {
hasBottomReduceSinkOperators = true;
parentsWithMultipleDemux.add(demuxOp);
} else {
hasNonBottomReduceSinkOperators = true;
parentsWithMultipleDemux.add(rsop);
}
}
if (hasBottomReduceSinkOperators && hasNonBottomReduceSinkOperators) {
child.setParentOperators(parentsWithMultipleDemux);
} else {
child.setParentOperators(Utilities.makeList(demuxOp));
}
}
for (Operator<? extends OperatorDesc> parent : parentRSsOfDemux) {
parent.setChildOperators(Utilities.makeList(demuxOp));
}
// replace all ReduceSinkOperators which are not at the bottom of
// this correlation to MuxOperators
Set<ReduceSinkOperator> handledRSs = new HashSet<ReduceSinkOperator>();
for (ReduceSinkOperator rsop : correlation.getAllReduceSinkOperators()) {
if (!bottomReduceSinkOperators.contains(rsop)) {
if (handledRSs.contains(rsop)) {
continue;
}
Operator<? extends OperatorDesc> childOP = CorrelationUtilities.getSingleChild(rsop, true);
if (childOP instanceof GroupByOperator) {
CorrelationUtilities.removeReduceSinkForGroupBy(rsop, (GroupByOperator) childOP, pCtx, corrCtx);
List<Operator<? extends OperatorDesc>> parentsOfMux = new ArrayList<Operator<? extends OperatorDesc>>();
Operator<? extends OperatorDesc> parentOp = CorrelationUtilities.getSingleParent(childOP, true);
parentsOfMux.add(parentOp);
Operator<? extends OperatorDesc> mux = OperatorFactory.get(childOP.getCompilationOpContext(), new MuxDesc(parentsOfMux));
mux.setChildOperators(Utilities.makeList(childOP));
mux.setParentOperators(parentsOfMux);
childOP.setParentOperators(Utilities.makeList(mux));
parentOp.setChildOperators(Utilities.makeList(mux));
} else {
List<Operator<? extends OperatorDesc>> parentsOfMux = new ArrayList<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> siblingOPs = CorrelationUtilities.findSiblingOperators(rsop);
for (Operator<? extends OperatorDesc> op : siblingOPs) {
if (op instanceof DemuxOperator) {
parentsOfMux.add(op);
} else if (op instanceof ReduceSinkOperator) {
GroupByOperator pGBYm = CorrelationUtilities.getSingleParent(op, GroupByOperator.class);
if (pGBYm != null && pGBYm.getConf().getMode() == GroupByDesc.Mode.HASH) {
// We get a semi join at here.
// This map-side GroupByOperator needs to be removed
CorrelationUtilities.removeOperator(pGBYm, op, CorrelationUtilities.getSingleParent(pGBYm, true), pCtx);
}
handledRSs.add((ReduceSinkOperator) op);
parentsOfMux.add(CorrelationUtilities.getSingleParent(op, true));
} else {
throw new SemanticException("A sibling of ReduceSinkOperator is neither a " + "DemuxOperator nor a ReduceSinkOperator");
}
}
MuxDesc muxDesc = new MuxDesc(siblingOPs);
Operator<? extends OperatorDesc> mux = OperatorFactory.get(rsop.getCompilationOpContext(), muxDesc);
mux.setChildOperators(Utilities.makeList(childOP));
mux.setParentOperators(parentsOfMux);
for (Operator<? extends OperatorDesc> op : parentsOfMux) {
if (op instanceof DemuxOperator) {
// and childOP.
if (op.getChildOperators().contains(childOP)) {
op.replaceChild(childOP, mux);
}
} else {
// op is not a DemuxOperator, so it should have
// a single child.
op.setChildOperators(Utilities.makeList(mux));
}
}
childOP.setParentOperators(Utilities.makeList(mux));
}
}
}
for (ReduceSinkOperator rsop : handledRSs) {
rsop.setChildOperators(null);
rsop.setParentOperators(null);
}
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class SerializationUtilities method clonePlan.
/**
* Clones using the powers of XML. Do not use unless necessary.
* @param plan The plan.
* @return The clone.
*/
public static MapredWork clonePlan(MapredWork plan) {
// TODO: need proper clone. Meanwhile, let's at least keep this horror in one place
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN);
Operator<?> op = plan.getAnyOperator();
CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext();
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
serializePlan(plan, baos, true);
MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), MapredWork.class, true);
// Restore the context.
for (Operator<?> newOp : newPlan.getAllOperators()) {
newOp.setCompilationOpContext(ctx);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN);
return newPlan;
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class SerializationUtilities method cloneBaseWork.
/**
* Clones using the powers of XML. Do not use unless necessary.
* @param plan The plan.
* @return The clone.
*/
public static BaseWork cloneBaseWork(BaseWork plan) {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN);
Operator<?> op = plan.getAnyRootOperator();
CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext();
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
serializePlan(plan, baos, true);
BaseWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), plan.getClass(), true);
// Restore the context.
for (Operator<?> newOp : newPlan.getAllOperators()) {
newOp.setCompilationOpContext(ctx);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN);
return newPlan;
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class TestHCatMultiOutputFormat method getTableData.
/**
* Method to fetch table data
*
* @param table table name
* @param database database
* @return list of columns in comma seperated way
* @throws Exception if any error occurs
*/
private List<String> getTableData(String table, String database) throws Exception {
QueryState queryState = new QueryState(null);
HiveConf conf = queryState.getConf();
conf.addResource("hive-site.xml");
ArrayList<String> results = new ArrayList<String>();
ArrayList<String> temp = new ArrayList<String>();
Hive hive = Hive.get(conf);
org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
FetchWork work;
if (!tbl.getPartCols().isEmpty()) {
List<Partition> partitions = hive.getPartitions(tbl);
List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
List<Path> partLocs = new ArrayList<Path>();
TableDesc tableDesc = Utilities.getTableDesc(tbl);
for (Partition part : partitions) {
partLocs.add(part.getDataLocation());
partDesc.add(Utilities.getPartitionDescFromTableDesc(tableDesc, part, true));
}
work = new FetchWork(partLocs, partDesc, tableDesc);
work.setLimit(100);
} else {
work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl));
}
FetchTask task = new FetchTask();
task.setWork(work);
task.initialize(queryState, null, null, new CompilationOpContext());
task.fetch(temp);
for (String str : temp) {
results.add(str.replace("\t", ","));
}
return results;
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class SerializationUtilities method cloneOperatorTree.
public static List<Operator<?>> cloneOperatorTree(List<Operator<?>> roots, int indexForTezUnion) {
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
CompilationOpContext ctx = roots.isEmpty() ? null : roots.get(0).getCompilationOpContext();
serializePlan(roots, baos, true);
@SuppressWarnings("unchecked") List<Operator<?>> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), roots.getClass(), true);
// Restore the context.
LinkedList<Operator<?>> newOps = new LinkedList<>(result);
while (!newOps.isEmpty()) {
Operator<?> newOp = newOps.poll();
newOp.setIndexForTezUnion(indexForTezUnion);
newOp.setCompilationOpContext(ctx);
List<Operator<?>> children = newOp.getChildOperators();
if (children != null) {
newOps.addAll(children);
}
}
return result;
}
Aggregations