use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class VectorPTFOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
if (LOG.isDebugEnabled()) {
// Determine the name of our map or reduce task for debug tracing.
BaseWork work = Utilities.getMapWork(hconf);
if (work == null) {
work = Utilities.getReduceWork(hconf);
}
taskName = work.getName();
}
if (!isPartitionOrderBy) {
currentPartitionIsNull = null;
currentPartitionLongs = null;
currentPartitionDoubles = null;
currentPartitionByteArrays = null;
currentPartitionByteLengths = null;
currentPartitionDecimals = null;
currentPartitionTimestamps = null;
currentPartitionIntervalDayTimes = null;
} else {
final int partitionKeyCount = vectorDesc.getPartitionExprNodeDescs().length;
currentPartitionIsNull = new boolean[partitionKeyCount];
currentPartitionLongs = new long[partitionKeyCount];
currentPartitionDoubles = new double[partitionKeyCount];
currentPartitionByteArrays = new byte[partitionKeyCount][];
currentPartitionByteLengths = new int[partitionKeyCount];
currentPartitionDecimals = new HiveDecimalWritable[partitionKeyCount];
currentPartitionTimestamps = new Timestamp[partitionKeyCount];
currentPartitionIntervalDayTimes = new HiveIntervalDayTime[partitionKeyCount];
}
evaluators = VectorPTFDesc.getEvaluators(vectorDesc, vectorPTFInfo);
streamingEvaluatorNums = VectorPTFDesc.getStreamingEvaluatorNums(evaluators);
allEvaluatorsAreStreaming = (streamingEvaluatorNums.length == evaluatorCount);
/*
* Setup the overflow batch.
*/
overflowBatch = setupOverflowBatch();
groupBatches = new VectorPTFGroupBatches(hconf, vectorDesc.getVectorizedPTFMaxMemoryBufferingBatchCount());
groupBatches.init(reducerBatchTypeInfos, evaluators, outputProjectionColumnMap, outputTypeInfos, keyInputColumnMap, nonKeyInputColumnMap, streamingEvaluatorNums, overflowBatch);
isFirstPartition = true;
batchCounter = 0;
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class SparkPlanGenerator method generateParentTran.
// Generate (possibly get from a cached result) parent SparkTran
private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception {
if (cloneToWork.containsKey(work)) {
BaseWork originalWork = cloneToWork.get(work);
if (workToParentWorkTranMap.containsKey(originalWork)) {
return workToParentWorkTranMap.get(originalWork);
}
}
SparkTran result;
if (work instanceof MapWork) {
result = generateMapInput(sparkPlan, (MapWork) work);
sparkPlan.addTran(result);
} else if (work instanceof ReduceWork) {
boolean toCache = cloneToWork.containsKey(work);
List<BaseWork> parentWorks = sparkWork.getParents(work);
SparkEdgeProperty sparkEdgeProperty = sparkWork.getEdgeProperty(parentWorks.get(0), work);
result = generate(sparkPlan, sparkEdgeProperty, toCache, work.getName());
sparkPlan.addTran(result);
for (BaseWork parentWork : parentWorks) {
sparkPlan.connect(workToTranMap.get(parentWork), result);
}
} else {
throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName());
}
if (cloneToWork.containsKey(work)) {
workToParentWorkTranMap.put(cloneToWork.get(work), result);
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class SparkTask method getOperatorCounters.
private Map<String, List<String>> getOperatorCounters() {
String groupName = HiveConf.getVar(conf, HiveConf.ConfVars.HIVECOUNTERGROUP);
Map<String, List<String>> counters = new HashMap<String, List<String>>();
List<String> hiveCounters = new LinkedList<String>();
counters.put(groupName, hiveCounters);
hiveCounters.add(Operator.HIVE_COUNTER_CREATED_FILES);
// Spark transformation and Hive operators in SparkWork.
for (MapOperator.Counter counter : MapOperator.Counter.values()) {
hiveCounters.add(counter.toString());
}
SparkWork sparkWork = this.getWork();
for (BaseWork work : sparkWork.getAllWork()) {
for (Operator<? extends OperatorDesc> operator : work.getAllOperators()) {
if (operator instanceof FileSinkOperator) {
for (FileSinkOperator.Counter counter : FileSinkOperator.Counter.values()) {
hiveCounters.add(((FileSinkOperator) operator).getCounterName(counter));
}
} else if (operator instanceof ReduceSinkOperator) {
final String contextName = conf.get(Operator.CONTEXT_NAME_KEY, "");
for (ReduceSinkOperator.Counter counter : ReduceSinkOperator.Counter.values()) {
hiveCounters.add(Utilities.getVertexCounterName(counter.name(), contextName));
}
} else if (operator instanceof ScriptOperator) {
for (ScriptOperator.Counter counter : ScriptOperator.Counter.values()) {
hiveCounters.add(counter.toString());
}
} else if (operator instanceof JoinOperator) {
for (JoinOperator.SkewkeyTableCounter counter : JoinOperator.SkewkeyTableCounter.values()) {
hiveCounters.add(counter.toString());
}
}
}
}
return counters;
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class DagUtils method createVertex.
private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType, Map<String, LocalResource> localResources) throws Exception {
Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
if (mergeJoinWork.getMainWork() instanceof MapWork) {
List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
Vertex mergeVx = createVertex(conf, mapWork, fs, mrScratchDir, ctx, vertexType, localResources);
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set
// to false when using this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
for (int i = 0; i < mapWorkList.size(); i++) {
mapWork = (MapWork) (mapWorkList.get(i));
conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
conf.set(Utilities.INPUT_NAME, mapWork.getName());
LOG.info("Going through each work and adding MultiMRInput");
mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
}
// To be populated for SMB joins only for all the small tables
Map<String, Integer> inputToBucketMap = new HashMap<>();
if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) {
// This is an SMB join.
for (BaseWork work : mapWorkList) {
MapWork mw = (MapWork) work;
Map<String, Operator<?>> aliasToWork = mw.getAliasToWork();
Preconditions.checkState(aliasToWork.size() == 1, "More than 1 alias in SMB mapwork");
inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets());
}
}
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
// the +1 to the size is because of the main work.
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1, inputToBucketMap);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
byte[] userPayload = dob.getData();
desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
mergeVx.setVertexManagerPlugin(desc);
return mergeVx;
} else {
return createVertex(conf, (ReduceWork) mergeJoinWork.getMainWork(), fs, mrScratchDir, ctx, localResources);
}
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TezTask method build.
DAG build(JobConf conf, TezWork work, Path scratchDir, Context ctx, Map<String, LocalResource> vertexResources) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
// getAllWork returns a topologically sorted list, which we use to make
// sure that vertices are created before they are used in edges.
List<BaseWork> ws = work.getAllWork();
Collections.reverse(ws);
FileSystem fs = scratchDir.getFileSystem(conf);
// the name of the dag is what is displayed in the AM/Job UI
String dagName = utils.createDagName(conf, queryPlan);
LOG.info("Dag name: " + dagName);
DAG dag = DAG.create(dagName);
// set some info for the query
JSONObject json = new JSONObject(new LinkedHashMap<>()).put("context", "Hive").put("description", ctx.getCmd());
String dagInfo = json.toString();
if (LOG.isDebugEnabled()) {
LOG.debug("DagInfo: " + dagInfo);
}
dag.setDAGInfo(dagInfo);
dag.setCredentials(conf.getCredentials());
setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
for (BaseWork w : ws) {
boolean isFinal = work.getLeaves().contains(w);
// translate work to vertex
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
if (w instanceof UnionWork) {
// Special case for unions. These items translate to VertexGroups
List<BaseWork> unionWorkItems = new LinkedList<BaseWork>();
List<BaseWork> children = new LinkedList<BaseWork>();
// proper children of the union
for (BaseWork v : work.getChildren(w)) {
EdgeType type = work.getEdgeProperty(w, v).getEdgeType();
if (type == EdgeType.CONTAINS) {
unionWorkItems.add(v);
} else {
children.add(v);
}
}
JobConf parentConf = workToConf.get(unionWorkItems.get(0));
checkOutputSpec(w, parentConf);
// create VertexGroup
Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
int i = 0;
for (BaseWork v : unionWorkItems) {
vertexArray[i++] = workToVertex.get(v);
}
VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray);
// now hook up the children
for (BaseWork v : children) {
// finally we can create the grouped edge
GroupInputEdge e = utils.createEdge(group, parentConf, workToVertex.get(v), work.getEdgeProperty(w, v), v, work);
dag.addEdge(e);
}
} else {
// Regular vertices
JobConf wxConf = utils.initializeVertexConf(conf, ctx, w);
checkOutputSpec(w, wxConf);
Vertex wx = utils.createVertex(wxConf, w, scratchDir, fs, ctx, !isFinal, work, work.getVertexType(w), vertexResources);
if (w.getReservedMemoryMB() > 0) {
// If reversedMemoryMB is set, make memory allocation fraction adjustment as needed
double frac = DagUtils.adjustMemoryReserveFraction(w.getReservedMemoryMB(), super.conf);
LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac);
wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac));
}
// Otherwise just leave it up to Tez to decide how much memory to allocate
dag.addVertex(wx);
utils.addCredentials(w, dag);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
workToVertex.put(w, wx);
workToConf.put(w, wxConf);
// add all dependencies (i.e.: edges) to the graph
for (BaseWork v : work.getChildren(w)) {
assert workToVertex.containsKey(v);
Edge e = null;
TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);
e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, work);
dag.addEdge(e);
}
}
}
// Clear the work map after build. TODO: remove caching instead?
Utilities.clearWorkMap(conf);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
return dag;
}
Aggregations