use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class DynamicPartitionPruningOptimization method createFinalRsForSemiJoinOp.
private void createFinalRsForSemiJoinOp(ParseContext parseContext, TableScanOperator ts, GroupByOperator gb, ExprNodeDesc key, String keyBaseAlias, ExprNodeDesc colExpr, boolean isHint) throws SemanticException {
ArrayList<String> gbOutputNames = new ArrayList<>();
// One each for min, max and bloom filter
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
int colPos = 0;
ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbOutputNames.size() - 1; i++) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos++), "", false);
rsValueCols.add(expr);
}
// Bloom Filter uses binary
ExprNodeColumnDesc colBFExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colBFExpr);
// Create the final Reduce Sink Operator
ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID);
ReduceSinkOperator rsOpFinal = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDescFinal, new RowSchema(gb.getSchema()), gb);
Map<String, ExprNodeDesc> columnExprMap = new HashMap<>();
rsOpFinal.setColumnExprMap(columnExprMap);
LOG.debug("DynamicSemiJoinPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts);
SemiJoinBranchInfo sjInfo = new SemiJoinBranchInfo(ts, isHint);
parseContext.getRsToSemiJoinBranchInfo().put(rsOpFinal, sjInfo);
// Save the info that is required at query time to resolve dynamic/runtime values.
RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo();
TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col"));
List<String> dynamicValueIDs = new ArrayList<String>();
dynamicValueIDs.add(keyBaseAlias + "_min");
dynamicValueIDs.add(keyBaseAlias + "_max");
dynamicValueIDs.add(keyBaseAlias + "_bloom_filter");
runtimeValuesInfo.setTableDesc(rsFinalTableDesc);
runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs);
runtimeValuesInfo.setColExprs(rsValueCols);
runtimeValuesInfo.setTsColExpr(colExpr);
parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo);
parseContext.getColExprToGBMap().put(key, gb);
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class GenMRUnion1 method processSubQueryUnionCreateIntermediate.
/**
* Process the union when the parent is a map-reduce job. Create a temporary
* output, and let the union task read from the temporary output.
*
* The files created for all the inputs are in the union context and later
* used to initialize the union plan
*
* @param parent
* @param child
* @param uTask
* @param ctx
* @param uCtxTask
*/
private void processSubQueryUnionCreateIntermediate(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child, Task<? extends Serializable> uTask, GenMRProcContext ctx, GenMRUnionCtx uCtxTask) {
ParseContext parseCtx = ctx.getParseCtx();
TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
// generate the temporary file
Context baseCtx = parseCtx.getContext();
Path taskTmpDir = baseCtx.getMRTmpPath();
// Create the temporary file, its corresponding FileSinkOperaotr, and
// its corresponding TableScanOperator.
TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
// Add the path to alias mapping
uCtxTask.addTaskTmpDir(taskTmpDir.toUri().toString());
uCtxTask.addTTDesc(tt_desc);
uCtxTask.addListTopOperators(tableScanOp);
// The union task is empty. The files created for all the inputs are
// assembled in the union context and later used to initialize the union
// plan
Task<? extends Serializable> currTask = ctx.getCurrTask();
currTask.addDependentTask(uTask);
if (ctx.getRootTasks().contains(uTask)) {
ctx.getRootTasks().remove(uTask);
if (!ctx.getRootTasks().contains(currTask) && shouldBeRootTask(currTask)) {
ctx.getRootTasks().add(currTask);
}
}
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class GenMapRedUtils method createMRWorkForMergingFiles.
/**
* Create a MapredWork based on input path, the top operator and the input
* table descriptor.
*
* @param conf
* @param topOp
* the table scan operator that is the root of the MapReduce task.
* @param fsDesc
* the file sink descriptor that serves as the input to this merge task.
* @param parentMR
* the parent MapReduce work
* @param parentFS
* the last FileSinkOperator in the parent MapReduce work
* @return the MapredWork
*/
private static MapWork createMRWorkForMergingFiles(HiveConf conf, TableScanOperator topOp, FileSinkDesc fsDesc) {
ArrayList<String> aliases = new ArrayList<String>();
Path inputDir = StringInternUtils.internUriStringsInPath(fsDesc.getMergeInputDirName());
String inputDirStr = inputDir.toString().intern();
TableDesc tblDesc = fsDesc.getTableInfo();
// dummy alias: just use the input path
aliases.add(inputDirStr);
// constructing the default MapredWork
MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
MapWork cplan = cMrPlan.getMapWork();
cplan.addPathToAlias(inputDir, aliases);
cplan.addPathToPartitionInfo(inputDir, new PartitionDesc(tblDesc, null));
cplan.getAliasToWork().put(inputDirStr, topOp);
cplan.setMapperCannotSpanPartns(true);
return cplan;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class GenMapRedUtils method setUnionPlan.
private static void setUnionPlan(GenMRProcContext opProcCtx, boolean local, Task<? extends Serializable> currTask, GenMRUnionCtx uCtx, boolean mergeTask) throws SemanticException {
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
if (currTopOp != null) {
String currAliasId = opProcCtx.getCurrAliasId();
if (mergeTask || !opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, local, opProcCtx);
}
currTopOp = null;
opProcCtx.setCurrTopOp(currTopOp);
} else {
List<String> taskTmpDirLst = uCtx.getTaskTmpDir();
if ((taskTmpDirLst != null) && !(taskTmpDirLst.isEmpty())) {
List<TableDesc> tt_descLst = uCtx.getTTDesc();
assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
assert taskTmpDirLst.size() == tt_descLst.size();
int size = taskTmpDirLst.size();
assert local == false;
List<TableScanOperator> topOperators = uCtx.getListTopOperators();
MapredWork plan = (MapredWork) currTask.getWork();
for (int pos = 0; pos < size; pos++) {
String taskTmpDir = taskTmpDirLst.get(pos);
Path taskTmpDirPath = new Path(taskTmpDir);
MapWork mWork = plan.getMapWork();
if (!mWork.getPathToAliases().containsKey(taskTmpDirPath)) {
taskTmpDir = taskTmpDir.intern();
StringInternUtils.internUriStringsInPath(taskTmpDirPath);
TableDesc tt_desc = tt_descLst.get(pos);
mWork.addPathToAlias(taskTmpDirPath, taskTmpDir);
mWork.addPathToPartitionInfo(taskTmpDirPath, new PartitionDesc(tt_desc, null));
mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class MapJoinProcessor method genMapJoinLocalWork.
/**
* Generate the MapRed Local Work for the given map-join operator
*
* @param newWork
* @param mapJoinOp
* map-join operator for which local work needs to be generated.
* @param bigTablePos
* @throws SemanticException
*/
private static void genMapJoinLocalWork(MapredWork newWork, MapJoinOperator mapJoinOp, int bigTablePos) throws SemanticException {
// keep the small table alias to avoid concurrent modification exception
ArrayList<String> smallTableAliasList = new ArrayList<String>();
// create a new MapredLocalWork
MapredLocalWork newLocalWork = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : newWork.getMapWork().getAliasToWork().entrySet()) {
String alias = entry.getKey();
Operator<? extends OperatorDesc> op = entry.getValue();
// if the table scan is for big table; then skip it
// tracing down the operator tree from the table scan operator
Operator<? extends OperatorDesc> parentOp = op;
Operator<? extends OperatorDesc> childOp = op.getChildOperators().get(0);
while ((childOp != null) && (!childOp.equals(mapJoinOp))) {
parentOp = childOp;
assert parentOp.getChildOperators().size() == 1;
childOp = parentOp.getChildOperators().get(0);
}
if (childOp == null) {
throw new SemanticException("Cannot find join op by tracing down the table scan operator tree");
}
// skip the big table pos
int i = childOp.getParentOperators().indexOf(parentOp);
if (i == bigTablePos) {
continue;
}
// set alias to work and put into smallTableAliasList
newLocalWork.getAliasToWork().put(alias, op);
smallTableAliasList.add(alias);
// get input path and remove this alias from pathToAlias
// because this file will be fetched by fetch operator
LinkedHashMap<Path, ArrayList<String>> pathToAliases = newWork.getMapWork().getPathToAliases();
// keep record all the input path for this alias
HashSet<Path> pathSet = new HashSet<>();
HashSet<Path> emptyPath = new HashSet<>();
for (Map.Entry<Path, ArrayList<String>> entry2 : pathToAliases.entrySet()) {
Path path = entry2.getKey();
ArrayList<String> list = entry2.getValue();
if (list.contains(alias)) {
// add to path set
pathSet.add(path);
// remove this alias from the alias list
list.remove(alias);
if (list.size() == 0) {
emptyPath.add(path);
}
}
}
// remove the path, with which no alias associates
for (Path path : emptyPath) {
newWork.getMapWork().removePathToAlias(path);
}
// create fetch work
FetchWork fetchWork = null;
List<Path> partDir = new ArrayList<Path>();
List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
for (Path tablePath : pathSet) {
PartitionDesc partitionDesc = newWork.getMapWork().getPathToPartitionInfo().get(tablePath);
// create fetchwork for non partitioned table
if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) {
fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc());
break;
}
// if table is partitioned,add partDir and partitionDesc
partDir.add(tablePath);
partDesc.add(partitionDesc);
}
// create fetchwork for partitioned table
if (fetchWork == null) {
TableDesc table = newWork.getMapWork().getAliasToPartnInfo().get(alias).getTableDesc();
fetchWork = new FetchWork(partDir, partDesc, table);
}
// set alias to fetch work
newLocalWork.getAliasToFetchWork().put(alias, fetchWork);
}
// remove small table ailias from aliasToWork;Avoid concurrent modification
for (String alias : smallTableAliasList) {
newWork.getMapWork().getAliasToWork().remove(alias);
}
// set up local work
newWork.getMapWork().setMapRedLocalWork(newLocalWork);
// remove reducer
newWork.setReduceWork(null);
}
Aggregations