use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class SparkCrossProductCheck method checkShuffleJoin.
private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException {
for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) {
Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) {
Map<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info> rsInfo = new HashMap<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info>();
for (BaseWork parent : sparkWork.getParents(reduceWork)) {
rsInfo.putAll(new CrossProductCheck.ExtractReduceSinkInfo(null).analyze(parent));
}
checkForCrossProduct(reduceWork.getName(), reducer, rsInfo);
}
}
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class DagUtils method createVertex.
/**
* Create a vertex from a given work object.
*
* @param conf JobConf to be used to this execution unit
* @param work The instance of BaseWork representing the actual work to be performed
* by this vertex.
* @param scratchDir HDFS scratch dir for this execution unit.
* @param appJarLr Local resource for hive-exec.
* @param additionalLr
* @param fileSystem FS corresponding to scratchDir and LocalResources
* @param ctx This query's context
* @return Vertex
*/
@SuppressWarnings("deprecation")
public Vertex createVertex(JobConf conf, BaseWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork, VertexType vertexType) throws Exception {
Vertex v = null;
// BaseWork.
if (work instanceof MapWork) {
v = createVertex(conf, (MapWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType);
} else if (work instanceof ReduceWork) {
v = createVertex(conf, (ReduceWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx);
} else if (work instanceof MergeJoinWork) {
v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType);
} else {
// something is seriously wrong if this is happening
throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
}
// initialize stats publisher if necessary
if (work.isGatheringStats()) {
StatsPublisher statsPublisher;
StatsFactory factory = StatsFactory.newFactory(conf);
if (factory != null) {
StatsCollectionContext sCntxt = new StatsCollectionContext(conf);
sCntxt.setStatsTmpDirs(Utilities.getStatsTmpDirs(work, conf));
statsPublisher = factory.getStatsPublisher();
if (!statsPublisher.init(sCntxt)) {
// creating stats table if not exists
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
}
}
}
// final vertices need to have at least one output
if (!hasChildren) {
v.addDataSink("out_" + work.getName(), new DataSinkDescriptor(OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(conf)), null, null));
}
return v;
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class SparkReduceRecordHandler method init.
@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
ReduceWork gWork = Utilities.getReduceWork(job);
reducer = gWork.getReducer();
vectorized = gWork.getVectorMode();
// clear out any parents as reducer is the
reducer.setParentOperators(null);
// root
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
if (vectorized) {
final int maxTags = gWork.getTagToValueDesc().size();
keyStructInspector = (StructObjectInspector) keyObjectInspector;
batches = new VectorizedRowBatch[maxTags];
valueStructInspectors = new StructObjectInspector[maxTags];
valueStringWriters = new List[maxTags];
keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
buffer = new DataOutputBuffer();
}
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag];
final int totalColumns = keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size();
valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns);
valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors[tag])));
rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors[tag]);
batches[tag] = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
//reducer.setGroupKeyObjectInspector(keyObjectInspector);
rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
ExecMapperContext execContext = new ExecMapperContext(job);
localWork = gWork.getMapRedLocalWork();
execContext.setJc(jc);
execContext.setLocalWork(localWork);
reducer.passExecContext(execContext);
reducer.setReporter(rp);
OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
if (localWork != null) {
for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class SparkPlanGenerator method generateParentTran.
// Generate (possibly get from a cached result) parent SparkTran
private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception {
if (cloneToWork.containsKey(work)) {
BaseWork originalWork = cloneToWork.get(work);
if (workToParentWorkTranMap.containsKey(originalWork)) {
return workToParentWorkTranMap.get(originalWork);
}
}
SparkTran result;
if (work instanceof MapWork) {
result = generateMapInput(sparkPlan, (MapWork) work);
sparkPlan.addTran(result);
} else if (work instanceof ReduceWork) {
List<BaseWork> parentWorks = sparkWork.getParents(work);
result = generate(sparkPlan, sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work));
sparkPlan.addTran(result);
for (BaseWork parentWork : parentWorks) {
sparkPlan.connect(workToTranMap.get(parentWork), result);
}
} else {
throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName());
}
if (cloneToWork.containsKey(work)) {
workToParentWorkTranMap.put(cloneToWork.get(work), result);
}
return result;
}
Aggregations