use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class SparkSkewJoinProcFactory method splitTask.
/**
* If the join is not in a leaf ReduceWork, the spark task has to be split into 2 tasks.
*/
private static void splitTask(SparkTask currentTask, ReduceWork reduceWork, ParseContext parseContext) throws SemanticException {
SparkWork currentWork = currentTask.getWork();
Set<Operator<?>> reduceSinkSet = OperatorUtils.getOp(reduceWork, ReduceSinkOperator.class);
if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork) && reduceSinkSet.size() == 1) {
ReduceSinkOperator reduceSink = (ReduceSinkOperator) reduceSinkSet.iterator().next();
BaseWork childWork = currentWork.getChildren(reduceWork).get(0);
SparkEdgeProperty originEdge = currentWork.getEdgeProperty(reduceWork, childWork);
// disconnect the reduce work from its child. this should produce two isolated sub graphs
currentWork.disconnect(reduceWork, childWork);
// move works following the current reduce work into a new spark work
SparkWork newWork = new SparkWork(parseContext.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
newWork.add(childWork);
copyWorkGraph(currentWork, newWork, childWork);
// remove them from current spark work
for (BaseWork baseWork : newWork.getAllWorkUnsorted()) {
currentWork.remove(baseWork);
currentWork.getCloneToWork().remove(baseWork);
}
// create TS to read intermediate data
Context baseCtx = parseContext.getContext();
Path taskTmpDir = baseCtx.getMRTmpPath();
Operator<? extends OperatorDesc> rsParent = reduceSink.getParentOperators().get(0);
TableDesc tableDesc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rsParent.getSchema(), "temporarycol"));
// this will insert FS and TS between the RS and its parent
TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(rsParent, reduceSink, taskTmpDir, tableDesc, parseContext);
// create new MapWork
MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
newWork.add(mapWork);
newWork.connect(mapWork, childWork, originEdge);
// setup the new map work
String streamDesc = taskTmpDir.toUri().toString();
if (GenMapRedUtils.needsTagging((ReduceWork) childWork)) {
Operator<? extends OperatorDesc> childReducer = ((ReduceWork) childWork).getReducer();
String id = null;
if (childReducer instanceof JoinOperator) {
if (parseContext.getJoinOps().contains(childReducer)) {
id = ((JoinOperator) childReducer).getConf().getId();
}
} else if (childReducer instanceof MapJoinOperator) {
if (parseContext.getMapJoinOps().contains(childReducer)) {
id = ((MapJoinOperator) childReducer).getConf().getId();
}
} else if (childReducer instanceof SMBMapJoinOperator) {
if (parseContext.getSmbMapJoinOps().contains(childReducer)) {
id = ((SMBMapJoinOperator) childReducer).getConf().getId();
}
}
if (id != null) {
streamDesc = id + ":$INTNAME";
} else {
streamDesc = "$INTNAME";
}
String origStreamDesc = streamDesc;
int pos = 0;
while (mapWork.getAliasToWork().get(streamDesc) != null) {
streamDesc = origStreamDesc.concat(String.valueOf(++pos));
}
}
GenMapRedUtils.setTaskPlan(taskTmpDir, streamDesc, tableScanOp, mapWork, false, tableDesc);
// insert the new task between current task and its child
@SuppressWarnings("unchecked") Task<?> newTask = TaskFactory.get(newWork);
List<Task<?>> childTasks = currentTask.getChildTasks();
// must have at most one child
if (childTasks != null && childTasks.size() > 0) {
Task<?> childTask = childTasks.get(0);
currentTask.removeDependentTask(childTask);
newTask.addDependentTask(childTask);
}
currentTask.addDependentTask(newTask);
newTask.setFetchSource(currentTask.isFetchSource());
}
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class GenSparkUtils method processFileSink.
public void processFileSink(GenSparkProcContext context, FileSinkOperator fileSink) throws SemanticException {
ParseContext parseContext = context.parseContext;
// is INSERT OVERWRITE TABLE
boolean isInsertTable = GenMapRedUtils.isInsertInto(parseContext, fileSink);
HiveConf hconf = parseContext.getConf();
boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, hconf, fileSink, context.currentTask, isInsertTable);
// Set stats config for FileSinkOperators which are cloned from the fileSink
List<FileSinkOperator> fileSinkList = context.fileSinkMap.get(fileSink);
if (fileSinkList != null) {
for (FileSinkOperator fsOp : fileSinkList) {
fsOp.getConf().setGatherStats(fileSink.getConf().isGatherStats());
fsOp.getConf().setStatsReliable(fileSink.getConf().isStatsReliable());
}
}
Path finalName = createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
if (chDir) {
// Merge the files in the destination table/partitions by creating Map-only merge job
// If underlying data is RCFile a RCFileBlockMerge task would be created.
LOG.info("using CombineHiveInputformat for the merge job");
GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask, parseContext.getQueryState().getLineageState());
}
FetchTask fetchTask = parseContext.getFetchTask();
if (fetchTask != null && context.currentTask.getNumChild() == 0) {
if (fetchTask.isFetchFrom(fileSink.getConf())) {
context.currentTask.setFetchSource(true);
}
}
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class SparkCompiler method runDynPartitionSortOptimizations.
private void runDynPartitionSortOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
// run Sorted dynamic partition optimization
HiveConf hConf = procCtx.getConf();
ParseContext parseContext = procCtx.getParseContext();
runDynPartitionSortOptimizations(parseContext, hConf);
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class CommandAuthorizerV1 method getTablePartitionUsedColumns.
private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols, Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
// table to columns mapping (tab2Cols)
if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
ParseContext parseCtx = sem.getParseContext();
for (Map.Entry<String, TableScanOperator> topOpMap : parseCtx.getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
if (!tableScanOp.isInsideView()) {
Table tbl = tableScanOp.getConf().getTableMetadata();
List<String> cols = new ArrayList<String>();
for (int id : tableScanOp.getNeededColumnIDs()) {
cols.add(tbl.getCols().get(id).getName());
}
// if it's null then the partition probably doesn't exist so let's use table permission
if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
String aliasId = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, aliasId);
Set<Partition> parts = partsList.getPartitions();
for (Partition part : parts) {
List<String> existingCols = part2Cols.get(part);
if (existingCols == null) {
existingCols = new ArrayList<String>();
}
existingCols.addAll(cols);
part2Cols.put(part, existingCols);
}
} else {
List<String> existingCols = tab2Cols.get(tbl);
if (existingCols == null) {
existingCols = new ArrayList<String>();
}
existingCols.addAll(cols);
tab2Cols.put(tbl, existingCols);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class TestExplainTask method explainToString.
private <K, V> String explainToString(Map<K, V> explainMap) throws Exception {
ExplainWork work = new ExplainWork();
ParseContext pCtx = new ParseContext();
HashMap<String, TableScanOperator> topOps = new HashMap<>();
TableScanOperator scanOp = new DummyOperator(new DummyExplainDesc<K, V>(explainMap));
topOps.put("sample", scanOp);
pCtx.setTopOps(topOps);
work.setParseContext(pCtx);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
work.setConfig(new ExplainConfiguration());
ExplainTask newExplainTask = new ExplainTask();
newExplainTask.queryState = uut.queryState;
newExplainTask.getJSONLogicalPlan(new PrintStream(baos), work);
baos.close();
return baos.toString();
}
Aggregations