use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class CheckTableAccessHook method run.
public void run(HookContext hookContext) {
HiveConf conf = hookContext.getConf();
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == false) {
return;
}
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
TableAccessInfo tableAccessInfo = hookContext.getQueryPlan().getTableAccessInfo();
if (tableAccessInfo == null || tableAccessInfo.getOperatorToTableAccessMap() == null || tableAccessInfo.getOperatorToTableAccessMap().isEmpty()) {
return;
}
LogHelper console = SessionState.getConsole();
Map<Operator<? extends OperatorDesc>, Map<String, List<String>>> operatorToTableAccessMap = tableAccessInfo.getOperatorToTableAccessMap();
// Must be deterministic order map for consistent q-test output across Java versions
Map<String, String> outputOrderedMap = new LinkedHashMap<String, String>();
for (Map.Entry<Operator<? extends OperatorDesc>, Map<String, List<String>>> tableAccess : operatorToTableAccessMap.entrySet()) {
StringBuilder perOperatorInfo = new StringBuilder();
perOperatorInfo.append("Operator:").append(tableAccess.getKey().getOperatorId()).append("\n");
for (Map.Entry<String, List<String>> entry : tableAccess.getValue().entrySet()) {
perOperatorInfo.append("Table:").append(entry.getKey()).append("\n");
perOperatorInfo.append("Keys:").append(StringUtils.join(entry.getValue(), ',')).append("\n");
}
outputOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString());
}
for (String perOperatorInfo : outputOrderedMap.values()) {
console.printError(perOperatorInfo);
}
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class ExplainTask method outputPlan.
private JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception {
// Check if work has an explain annotation
Annotation note = AnnotationUtils.getAnnotation(work.getClass(), Explain.class);
String keyJSONObject = null;
if (note instanceof Explain) {
Explain xpl_note = (Explain) note;
boolean invokeFlag = false;
if (this.work != null && this.work.isUserLevelExplain()) {
invokeFlag = Level.USER.in(xpl_note.explainLevels());
} else {
if (extended) {
invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels());
} else {
invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels());
}
}
if (invokeFlag) {
Vectorization vectorization = xpl_note.vectorization();
if (this.work != null && this.work.isVectorization()) {
// The EXPLAIN VECTORIZATION option was specified.
final boolean desireOnly = this.work.isVectorizationOnly();
final VectorizationDetailLevel desiredVecDetailLevel = this.work.isVectorizationDetailLevel();
switch(vectorization) {
case NON_VECTORIZED:
// Display all non-vectorized leaf objects unless ONLY.
if (desireOnly) {
invokeFlag = false;
}
break;
case SUMMARY:
case OPERATOR:
case EXPRESSION:
case DETAIL:
if (vectorization.rank < desiredVecDetailLevel.rank) {
// This detail not desired.
invokeFlag = false;
}
break;
case SUMMARY_PATH:
case OPERATOR_PATH:
if (desireOnly) {
if (vectorization.rank < desiredVecDetailLevel.rank) {
// Suppress headers and all objects below.
invokeFlag = false;
}
}
break;
default:
throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
}
} else {
// Do not display vectorization objects.
switch(vectorization) {
case SUMMARY:
case OPERATOR:
case EXPRESSION:
case DETAIL:
invokeFlag = false;
break;
case NON_VECTORIZED:
// No action.
break;
case SUMMARY_PATH:
case OPERATOR_PATH:
// Always include headers since they contain non-vectorized objects, too.
break;
default:
throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
}
}
}
if (invokeFlag) {
keyJSONObject = xpl_note.displayName();
if (out != null) {
out.print(indentString(indent));
if (appendToHeader != null && !appendToHeader.isEmpty()) {
out.println(xpl_note.displayName() + appendToHeader);
} else {
out.println(xpl_note.displayName());
}
}
}
}
JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null;
// conf and then the children
if (work instanceof Operator) {
Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) work;
if (operator.getConf() != null) {
String appender = isLogical ? " (" + operator.getOperatorId() + ")" : "";
JSONObject jsonOut = outputPlan(operator.getConf(), out, extended, jsonOutput, jsonOutput ? 0 : indent, appender);
if (this.work != null && (this.work.isUserLevelExplain() || this.work.isFormatted())) {
if (jsonOut != null && jsonOut.length() > 0) {
((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put("OperatorId:", operator.getOperatorId());
if (!this.work.isUserLevelExplain() && this.work.isFormatted() && operator instanceof ReduceSinkOperator) {
List<String> outputOperators = ((ReduceSinkOperator) operator).getConf().getOutputOperators();
if (outputOperators != null) {
((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put(OUTPUT_OPERATORS, Arrays.toString(outputOperators.toArray()));
}
}
}
}
if (jsonOutput) {
json = jsonOut;
}
}
if (!visitedOps.contains(operator) || !isLogical) {
visitedOps.add(operator);
if (operator.getChildOperators() != null) {
int cindent = jsonOutput ? 0 : indent + 2;
for (Operator<? extends OperatorDesc> op : operator.getChildOperators()) {
JSONObject jsonOut = outputPlan(op, out, extended, jsonOutput, cindent);
if (jsonOutput) {
((JSONObject) json.get(JSONObject.getNames(json)[0])).accumulate("children", jsonOut);
}
}
}
}
if (jsonOutput) {
return json;
}
return null;
}
// We look at all methods that generate values for explain
Method[] methods = work.getClass().getMethods();
Arrays.sort(methods, new MethodComparator());
for (Method m : methods) {
int prop_indents = jsonOutput ? 0 : indent + 2;
note = AnnotationUtils.getAnnotation(m, Explain.class);
if (note instanceof Explain) {
Explain xpl_note = (Explain) note;
boolean invokeFlag = false;
if (this.work != null && this.work.isUserLevelExplain()) {
invokeFlag = Level.USER.in(xpl_note.explainLevels());
} else {
if (extended) {
invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels());
} else {
invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels());
}
}
if (invokeFlag) {
Vectorization vectorization = xpl_note.vectorization();
if (this.work != null && this.work.isVectorization()) {
// The EXPLAIN VECTORIZATION option was specified.
final boolean desireOnly = this.work.isVectorizationOnly();
final VectorizationDetailLevel desiredVecDetailLevel = this.work.isVectorizationDetailLevel();
switch(vectorization) {
case NON_VECTORIZED:
// Display all non-vectorized leaf objects unless ONLY.
if (desireOnly) {
invokeFlag = false;
}
break;
case SUMMARY:
case OPERATOR:
case EXPRESSION:
case DETAIL:
if (vectorization.rank < desiredVecDetailLevel.rank) {
// This detail not desired.
invokeFlag = false;
}
break;
case SUMMARY_PATH:
case OPERATOR_PATH:
if (desireOnly) {
if (vectorization.rank < desiredVecDetailLevel.rank) {
// Suppress headers and all objects below.
invokeFlag = false;
}
}
break;
default:
throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
}
} else {
// Do not display vectorization objects.
switch(vectorization) {
case SUMMARY:
case OPERATOR:
case EXPRESSION:
case DETAIL:
invokeFlag = false;
break;
case NON_VECTORIZED:
// No action.
break;
case SUMMARY_PATH:
case OPERATOR_PATH:
// Always include headers since they contain non-vectorized objects, too.
break;
default:
throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization);
}
}
}
if (invokeFlag) {
Object val = null;
try {
val = m.invoke(work);
} catch (InvocationTargetException ex) {
// Ignore the exception, this may be caused by external jars
val = null;
}
if (val == null) {
continue;
}
String header = null;
boolean skipHeader = xpl_note.skipHeader();
boolean emptyHeader = false;
if (!xpl_note.displayName().equals("")) {
header = indentString(prop_indents) + xpl_note.displayName() + ":";
} else {
emptyHeader = true;
prop_indents = indent;
header = indentString(prop_indents);
}
// Try the output as a primitive object
if (isPrintable(val)) {
if (out != null && shouldPrint(xpl_note, val)) {
if (!skipHeader) {
out.print(header);
out.print(" ");
}
out.println(val);
}
if (jsonOutput && shouldPrint(xpl_note, val)) {
json.put(header, val.toString());
}
continue;
}
int ind = 0;
if (!jsonOutput) {
if (!skipHeader) {
ind = prop_indents + 2;
} else {
ind = indent;
}
}
// Try this as a map
if (val instanceof Map) {
// Go through the map and print out the stuff
Map<?, ?> mp = (Map<?, ?>) val;
if (out != null && !skipHeader && mp != null && !mp.isEmpty()) {
out.print(header);
}
JSONObject jsonOut = outputMap(mp, !skipHeader && !emptyHeader, out, extended, jsonOutput, ind);
if (jsonOutput && !mp.isEmpty()) {
json.put(header, jsonOut);
}
continue;
}
// Try this as a list
if (val instanceof List || val instanceof Set) {
List l = val instanceof List ? (List) val : new ArrayList((Set) val);
if (out != null && !skipHeader && l != null && !l.isEmpty()) {
out.print(header);
}
JSONArray jsonOut = outputList(l, out, !skipHeader && !emptyHeader, extended, jsonOutput, ind);
if (jsonOutput && !l.isEmpty()) {
json.put(header, jsonOut);
}
continue;
}
// Finally check if it is serializable
try {
if (!skipHeader && out != null) {
out.println(header);
}
JSONObject jsonOut = outputPlan(val, out, extended, jsonOutput, ind);
if (jsonOutput && jsonOut != null && jsonOut.length() != 0) {
if (!skipHeader) {
json.put(header, jsonOut);
} else {
for (String k : JSONObject.getNames(jsonOut)) {
json.put(k, jsonOut.get(k));
}
}
}
continue;
} catch (ClassCastException ce) {
// Ignore
}
}
}
}
if (jsonOutput) {
if (keyJSONObject != null) {
JSONObject ret = new JSONObject(new LinkedHashMap<>());
ret.put(keyJSONObject, json);
return ret;
}
return json;
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class MapredLocalTask method initializeOperators.
private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
}
// this mapper operator is used to initialize all the operators
for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
if (entry.getValue() == null) {
continue;
}
JobConf jobClone = new JobConf(job);
TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
// push down projections
ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters
HiveInputFormat.pushFilters(jobClone, ts);
AcidUtils.setTransactionalTableScan(jobClone, ts.getConf().isAcidTable());
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
// create a fetch operator
FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
fetchOpJobConfMap.put(fetchOp, jobClone);
fetchOperators.put(entry.getKey(), fetchOp);
l4j.info("fetchoperator for " + entry.getKey() + " created");
}
// initialize all forward operator
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
// get the forward op
String alias = entry.getKey();
Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// put the exe context into all the operators
forwardOp.passExecContext(execContext);
// All the operators need to be initialized before process
FetchOperator fetchOp = entry.getValue();
JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
if (jobConf == null) {
jobConf = job;
}
// initialize the forward operator
ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
l4j.info("fetchoperator for " + entry.getKey() + " initialized");
}
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class SparkMapRecordHandler method init.
@Override
public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
isLogInfoEnabled = LOG.isInfoEnabled();
try {
jc = job;
execContext = new ExecMapperContext(jc);
// create map and fetch operators
MapWork mrwork = Utilities.getMapWork(job);
CompilationOpContext runtimeCtx = new CompilationOpContext();
if (mrwork.getVectorMode()) {
mo = new VectorMapOperator(runtimeCtx);
} else {
mo = new MapOperator(runtimeCtx);
}
mo.setConf(mrwork);
// initialize map operator
mo.initialize(jc, null);
mo.setChildren(job);
LOG.info(mo.dump(0));
// initialize map local work
localWork = mrwork.getMapRedLocalWork();
execContext.setLocalWork(localWork);
MapredContext.init(true, new JobConf(jc));
MapredContext.get().setReporter(reporter);
mo.passExecContext(execContext);
mo.initializeLocalWork(jc);
mo.initializeMapOperator(jc);
OperatorUtils.setChildrenCollector(mo.getChildOperators(), output);
mo.setReporter(rp);
if (localWork == null) {
return;
}
//The following code is for mapjoin
//initialize all the dummy ops
LOG.info("Initializing dummy operator");
List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Map operator initialization failed: " + e, e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.plan.OperatorDesc in project hive by apache.
the class VectorMapOperator method internalSetChildren.
/*
* Create information for vector map operator.
* The member oneRootOperator has been set.
*/
private void internalSetChildren(Configuration hconf) throws Exception {
// The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
// so set it here to none.
currentReadType = VectorMapOperatorReadType.NONE;
batchContext = conf.getVectorizedRowBatchCtx();
/*
* Use a different batch for vectorized Input File Format readers so they can do their work
* overlapped with work of the row collection that vector/row deserialization does. This allows
* the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
*/
vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
/*
* This batch is used by vector/row deserializer readers.
*/
deserializerBatch = batchContext.createVectorizedRowBatch();
batchCounter = 0;
dataColumnCount = batchContext.getDataColumnCount();
partitionColumnCount = batchContext.getPartitionColumnCount();
partitionValues = new Object[partitionColumnCount];
dataColumnNums = batchContext.getDataColumnNums();
Preconditions.checkState(dataColumnNums != null);
// Form a truncated boolean include array for our vector/row deserializers.
determineDataColumnsToIncludeTruncated();
/*
* Create table related objects
*/
final String[] rowColumnNames = batchContext.getRowColumnNames();
final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
/*
* NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
* the included columns (+ partition columns).
*
* For now, we need to model the object inspector rows because there are still several
* vectorized operators that use them.
*
* We need to continue to model the Object[] as having null objects for not included columns
* until the following has been fixed:
* o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
* o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
* use the row super class to process rows.
*/
/*
* The Vectorizer class enforces that there is only one TableScanOperator, so
* we don't need the more complicated multiple root operator mapping that MapOperator has.
*/
fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
// Temporary map so we only create one partition context entry.
HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
Path path = entry.getKey();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
VectorPartitionContext vectorPartitionContext;
if (!partitionContextMap.containsKey(partDesc)) {
vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
partitionContextMap.put(partDesc, vectorPartitionContext);
} else {
vectorPartitionContext = partitionContextMap.get(partDesc);
}
fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
}
// Create list of one.
List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
children.add(oneRootOperator);
setChildOperators(children);
}
Aggregations