use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class CheckTableAccessHook method run.
public void run(HookContext hookContext) {
HiveConf conf = hookContext.getConf();
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == false) {
return;
}
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
TableAccessInfo tableAccessInfo = hookContext.getQueryPlan().getTableAccessInfo();
if (tableAccessInfo == null || tableAccessInfo.getOperatorToTableAccessMap() == null || tableAccessInfo.getOperatorToTableAccessMap().isEmpty()) {
return;
}
LogHelper console = SessionState.getConsole();
Map<Operator<? extends OperatorDesc>, Map<String, List<String>>> operatorToTableAccessMap = tableAccessInfo.getOperatorToTableAccessMap();
// Must be deterministic order map for consistent q-test output across Java versions
Map<String, String> outputOrderedMap = new LinkedHashMap<String, String>();
for (Map.Entry<Operator<? extends OperatorDesc>, Map<String, List<String>>> tableAccess : operatorToTableAccessMap.entrySet()) {
StringBuilder perOperatorInfo = new StringBuilder();
perOperatorInfo.append("Operator:").append(tableAccess.getKey().getOperatorId()).append("\n");
for (Map.Entry<String, List<String>> entry : tableAccess.getValue().entrySet()) {
perOperatorInfo.append("Table:").append(entry.getKey()).append("\n");
perOperatorInfo.append("Keys:").append(StringUtils.join(entry.getValue(), ',')).append("\n");
}
outputOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString());
}
for (String perOperatorInfo : outputOrderedMap.values()) {
console.printError(perOperatorInfo);
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class LlapInputFormat method findTsOp.
static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {
if (mapWork.getAliasToWork() == null) {
throw new HiveException("Unexpected - aliasToWork is missing; " + NONVECTOR_SETTING_MESSAGE);
}
Iterator<Operator<?>> ops = mapWork.getAliasToWork().values().iterator();
TableScanOperator tableScanOperator = null;
while (ops.hasNext()) {
Operator<?> op = ops.next();
if (op instanceof TableScanOperator) {
if (tableScanOperator != null) {
throw new HiveException("Unexpected - more than one TSOP; " + NONVECTOR_SETTING_MESSAGE);
}
tableScanOperator = (TableScanOperator) op;
}
}
return tableScanOperator;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class MapredLocalTask method initializeOperators.
private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException {
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) {
LOG.debug("initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators());
}
// this mapper operator is used to initialize all the operators
for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
if (entry.getValue() == null) {
continue;
}
JobConf jobClone = new JobConf(job);
TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey());
// push down projections
ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters and as of information
HiveInputFormat.pushFiltersAndAsOf(jobClone, ts, null);
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
AcidUtils.setValidWriteIdList(jobClone, ts.getConf());
// create a fetch operator
FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
fetchOpJobConfMap.put(fetchOp, jobClone);
fetchOperators.put(entry.getKey(), fetchOp);
l4j.info("fetchoperator for " + entry.getKey() + " created");
}
// initialize all forward operator
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
// get the forward op
String alias = entry.getKey();
Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// put the exe context into all the operators
forwardOp.passExecContext(execContext);
// All the operators need to be initialized before process
FetchOperator fetchOp = entry.getValue();
JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
if (jobConf == null) {
jobConf = job;
}
// initialize the forward operator
ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
forwardOp.initialize(jobConf, new ObjectInspector[] { objectInspector });
l4j.info("fetchoperator for " + entry.getKey() + " initialized");
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class VectorMapOperator method internalSetChildren.
/*
* Create information for vector map operator.
* The member oneRootOperator has been set.
*/
private void internalSetChildren(Configuration hconf) throws Exception {
// The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
// so set it here to none.
currentReadType = VectorMapOperatorReadType.NONE;
batchContext = conf.getVectorizedRowBatchCtx();
/*
* Use a different batch for vectorized Input File Format readers so they can do their work
* overlapped with work of the row collection that vector/row deserialization does. This allows
* the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
*/
vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
/*
* This batch is used by vector/row deserializer readers.
*/
deserializerBatch = batchContext.createVectorizedRowBatch();
batchCounter = 0;
dataColumnCount = batchContext.getDataColumnCount();
partitionColumnCount = batchContext.getPartitionColumnCount();
partitionValues = new Object[partitionColumnCount];
virtualColumnCount = batchContext.getVirtualColumnCount();
rowIdentifierColumnNum = batchContext.findVirtualColumnNum(VirtualColumn.ROWID);
hasRowIdentifier = (rowIdentifierColumnNum != -1);
dataColumnNums = batchContext.getDataColumnNums();
Preconditions.checkState(dataColumnNums != null);
// Form a truncated boolean include array for our vector/row deserializers.
determineDataColumnsToIncludeTruncated();
/*
* Create table related objects
*/
final String[] rowColumnNames = batchContext.getRowColumnNames();
final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
/*
* NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
* the included columns (+ partition columns).
*
* For now, we need to model the object inspector rows because there are still several
* vectorized operators that use them.
*
* We need to continue to model the Object[] as having null objects for not included columns
* until the following has been fixed:
* o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
* o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
* use the row super class to process rows.
*/
/*
* The Vectorizer class enforces that there is only one TableScanOperator, so
* we don't need the more complicated multiple root operator mapping that MapOperator has.
*/
fileToPartitionContextMap = new HashMap<>();
// Temporary map so we only create one partition context entry.
HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
for (Map.Entry<Path, List<String>> entry : conf.getPathToAliases().entrySet()) {
Path path = entry.getKey();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
VectorPartitionContext vectorPartitionContext;
if (!partitionContextMap.containsKey(partDesc)) {
vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
partitionContextMap.put(partDesc, vectorPartitionContext);
} else {
vectorPartitionContext = partitionContextMap.get(partDesc);
}
fileToPartitionContextMap.put(path, vectorPartitionContext);
}
// Create list of one.
List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
children.add(oneRootOperator);
setChildOperators(children);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class TaskQueue method finished.
public void finished(TaskRunner runner) {
if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
return;
}
MapRedTask mapredTask = (MapRedTask) runner.getTask();
MapWork mapWork = mapredTask.getWork().getMapWork();
ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
if (reduceWork != null) {
operators.add(reduceWork.getReducer());
}
final List<String> statKeys = new ArrayList<String>(1);
NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {
@Override
public void apply(FileSinkOperator fsOp) {
if (fsOp.getConf().isGatherStats()) {
statKeys.add(fsOp.getConf().getStatsAggPrefix());
}
}
});
for (String statKey : statKeys) {
if (statsTasks.containsKey(statKey)) {
statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
} else {
LOG.debug("There is no correspoing statTask for: " + statKey);
}
}
}
Aggregations