use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SharedWorkOptimizer method areMergeableExtendedCheck.
private static boolean areMergeableExtendedCheck(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp1, TableScanOperator tsOp2) throws SemanticException {
// If is a DPP, check if actually it refers to same target, column, etc.
// Further, the DPP value needs to be generated from same subtree
List<Operator<?>> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
List<Operator<?>> dppsOp2 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp2));
if (dppsOp1.isEmpty() && dppsOp2.isEmpty()) {
return true;
}
for (int i = 0; i < dppsOp1.size(); i++) {
Operator<?> op = dppsOp1.get(i);
if (op instanceof ReduceSinkOperator) {
Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
if (ascendants.contains(tsOp2)) {
// This should not happen, we cannot merge
return false;
}
}
}
for (int i = 0; i < dppsOp2.size(); i++) {
Operator<?> op = dppsOp2.get(i);
if (op instanceof ReduceSinkOperator) {
Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
if (ascendants.contains(tsOp1)) {
// This should not happen, we cannot merge
return false;
}
}
}
if (dppsOp1.size() != dppsOp2.size()) {
// Only first or second operator contains DPP pruning
return false;
}
// Check if DPP branches are equal
BitSet bs = new BitSet();
for (int i = 0; i < dppsOp1.size(); i++) {
Operator<?> dppOp1 = dppsOp1.get(i);
for (int j = 0; j < dppsOp2.size(); j++) {
if (!bs.get(j)) {
// If not visited yet
Operator<?> dppOp2 = dppsOp2.get(j);
if (compareAndGatherOps(pctx, dppOp1, dppOp2) != null) {
// The DPP operator/branch are equal
bs.set(j);
break;
}
}
}
if (bs.cardinality() < i + 1) {
return false;
}
}
return true;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SharedWorkOptimizer method rankTablesByAccumulatedSize.
private static List<Entry<String, Long>> rankTablesByAccumulatedSize(ParseContext pctx) {
Map<String, Long> tableToTotalSize = new HashMap<>();
for (Entry<String, TableScanOperator> e : pctx.getTopOps().entrySet()) {
TableScanOperator tsOp = e.getValue();
String tableName = tsOp.getTableName().toString();
long tableSize = tsOp.getStatistics() != null ? tsOp.getStatistics().getDataSize() : 0L;
Long totalSize = tableToTotalSize.get(tableName);
if (totalSize != null) {
tableToTotalSize.put(tableName, StatsUtils.safeAdd(totalSize, tableSize));
} else {
tableToTotalSize.put(tableName, tableSize);
}
}
List<Entry<String, Long>> sortedTables = new ArrayList<>(tableToTotalSize.entrySet());
Collections.sort(sortedTables, Collections.reverseOrder(new Comparator<Map.Entry<String, Long>>() {
@Override
public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
return (o1.getValue()).compareTo(o2.getValue());
}
}));
return sortedTables;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SharedWorkOptimizer method compareOperator.
private static boolean compareOperator(ParseContext pctx, Operator<?> op1, Operator<?> op2) throws SemanticException {
if (!op1.getClass().getName().equals(op2.getClass().getName())) {
return false;
}
// TODO: move this to logicalEquals
if (op1 instanceof ReduceSinkOperator) {
ReduceSinkDesc op1Conf = ((ReduceSinkOperator) op1).getConf();
ReduceSinkDesc op2Conf = ((ReduceSinkOperator) op2).getConf();
if (StringUtils.equals(op1Conf.getKeyColString(), op2Conf.getKeyColString()) && StringUtils.equals(op1Conf.getValueColsString(), op2Conf.getValueColsString()) && StringUtils.equals(op1Conf.getParitionColsString(), op2Conf.getParitionColsString()) && op1Conf.getTag() == op2Conf.getTag() && StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder()) && StringUtils.equals(op1Conf.getNullOrder(), op2Conf.getNullOrder()) && op1Conf.getTopN() == op2Conf.getTopN() && canDeduplicateReduceTraits(op1Conf, op2Conf)) {
return true;
} else {
return false;
}
}
// TODO: move this to logicalEquals
if (op1 instanceof TableScanOperator) {
TableScanOperator tsOp1 = (TableScanOperator) op1;
TableScanOperator tsOp2 = (TableScanOperator) op2;
TableScanDesc op1Conf = tsOp1.getConf();
TableScanDesc op2Conf = tsOp2.getConf();
Table tableMeta1 = op1Conf.getTableMetadata();
Table tableMeta2 = op2Conf.getTableMetadata();
if (StringUtils.equals(tableMeta1.getFullyQualifiedName(), tableMeta2.getFullyQualifiedName()) && op1Conf.getNeededColumns().equals(op2Conf.getNeededColumns()) && StringUtils.equals(op1Conf.getFilterExprString(), op2Conf.getFilterExprString()) && pctx.getPrunedPartitions(tsOp1).getPartitions().equals(pctx.getPrunedPartitions(tsOp2).getPartitions()) && op1Conf.getRowLimit() == op2Conf.getRowLimit() && Objects.equals(op1Conf.getIncludedBuckets(), op2Conf.getIncludedBuckets()) && Objects.equals(op1Conf.getOpProps(), op2Conf.getOpProps())) {
return true;
} else {
return false;
}
}
return op1.logicalEquals(op2);
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class TableSizeBasedBigTableSelectorForAutoSMJ method getBigTablePosition.
public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp, Set<Integer> bigTableCandidates) throws SemanticException {
int bigTablePos = -1;
long maxSize = -1;
HiveConf conf = parseCtx.getConf();
try {
List<TableScanOperator> topOps = new ArrayList<TableScanOperator>();
getListTopOps(joinOp, topOps);
int currentPos = 0;
for (TableScanOperator topOp : topOps) {
if (topOp == null) {
return -1;
}
if (!bigTableCandidates.contains(currentPos)) {
currentPos++;
continue;
}
Table table = topOp.getConf().getTableMetadata();
long currentSize = 0;
if (!table.isPartitioned()) {
currentSize = getSize(conf, table);
} else {
// For partitioned tables, get the size of all the partitions
PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null);
for (Partition part : partsList.getNotDeniedPartns()) {
currentSize += getSize(conf, part);
}
}
if (currentSize > maxSize) {
maxSize = currentSize;
bigTablePos = currentPos;
}
currentPos++;
}
} catch (HiveException e) {
throw new SemanticException(e.getMessage());
}
return bigTablePos;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SharedWorkOptimizer method areMergeableExcludeSemijoinsExtendedCheck.
private static boolean areMergeableExcludeSemijoinsExtendedCheck(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp1, TableScanOperator tsOp2) throws SemanticException {
// We remove RS-based SJs from consideration, then we compare
List<Operator<?>> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
boolean removedDppOp1 = false;
List<ReduceSinkOperator> rsOpsSemijoin1 = new ArrayList<>();
List<Operator<?>> dppsOp2 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp2));
boolean removedDppOp2 = false;
List<ReduceSinkOperator> rsOpsSemijoin2 = new ArrayList<>();
for (int i = 0; i < dppsOp1.size(); i++) {
Operator<?> op = dppsOp1.get(i);
if (op instanceof ReduceSinkOperator) {
ReduceSinkOperator semijoinRSOp = (ReduceSinkOperator) op;
if (pctx.getRsToSemiJoinBranchInfo().get(semijoinRSOp).getIsHint()) {
// This is a hint, we should keep it, hence we bail out
return false;
}
rsOpsSemijoin1.add(semijoinRSOp);
dppsOp1.remove(i);
removedDppOp1 = true;
}
}
for (int i = 0; i < dppsOp2.size(); i++) {
Operator<?> op = dppsOp2.get(i);
if (op instanceof ReduceSinkOperator) {
ReduceSinkOperator semijoinRSOp = (ReduceSinkOperator) op;
if (pctx.getRsToSemiJoinBranchInfo().get(semijoinRSOp).getIsHint()) {
// This is a hint, we should keep it, hence we bail out
return false;
}
rsOpsSemijoin2.add(semijoinRSOp);
dppsOp2.remove(i);
removedDppOp2 = true;
}
}
if (removedDppOp1 && removedDppOp2) {
// are not targetted by a SJ edge
return false;
}
if (!removedDppOp1 && !removedDppOp2) {
// None of them are targetted by a SJ, we skip them
return false;
}
if (dppsOp1.size() != dppsOp2.size()) {
// We cannot merge, we move to the next couple
return false;
}
// Check if DPP branches are equal
boolean equalBranches = true;
BitSet bs = new BitSet();
for (int i = 0; i < dppsOp1.size(); i++) {
Operator<?> dppOp1 = dppsOp1.get(i);
for (int j = 0; j < dppsOp2.size(); j++) {
if (!bs.get(j)) {
// If not visited yet
Operator<?> dppOp2 = dppsOp2.get(j);
if (compareAndGatherOps(pctx, dppOp1, dppOp2) != null) {
// The DPP operator/branch are equal
bs.set(j);
break;
}
}
}
if (bs.cardinality() < i + 1) {
// We cannot merge, we move to the next group
equalBranches = false;
break;
}
}
if (!equalBranches) {
// Skip
return false;
}
// We reached here, other DPP is the same, these two could potentially be merged.
// Hence, we perform the last check. To do this, we remove the SJ operators,
// but we remember their position in the plan. After that, we will reintroduce
// the SJ operator. If the checks were valid, we will merge and remove the semijoin.
// If the rest of tests to merge do not pass, we will abort the shared scan optimization
// and we are done
TableScanOperator targetTSOp;
List<ReduceSinkOperator> semijoinRsOps;
List<SemiJoinBranchInfo> sjBranches = new ArrayList<>();
if (removedDppOp1) {
targetTSOp = tsOp1;
semijoinRsOps = rsOpsSemijoin1;
} else {
targetTSOp = tsOp2;
semijoinRsOps = rsOpsSemijoin2;
}
optimizerCache.tableScanToDPPSource.get(targetTSOp).removeAll(semijoinRsOps);
for (ReduceSinkOperator rsOp : semijoinRsOps) {
sjBranches.add(pctx.getRsToSemiJoinBranchInfo().remove(rsOp));
}
boolean validMerge = validPreConditions(pctx, optimizerCache, extractSharedOptimizationInfoForRoot(pctx, optimizerCache, tsOp1, tsOp2, true, true));
if (validMerge) {
// We are going to merge, hence we remove the semijoins completely
for (ReduceSinkOperator semijoinRsOp : semijoinRsOps) {
Operator<?> branchOp = GenTezUtils.removeBranch(semijoinRsOp);
while (branchOp != null) {
optimizerCache.removeOp(branchOp);
branchOp = branchOp.getNumChild() > 0 ? branchOp.getChildOperators().get(0) : null;
}
GenTezUtils.removeSemiJoinOperator(pctx, semijoinRsOp, targetTSOp);
}
} else {
// Otherwise, the put the semijoins back in the auxiliary data structures
optimizerCache.tableScanToDPPSource.get(targetTSOp).addAll(semijoinRsOps);
for (int i = 0; i < semijoinRsOps.size(); i++) {
pctx.getRsToSemiJoinBranchInfo().put(semijoinRsOps.get(i), sjBranches.get(i));
}
}
return validMerge;
}
Aggregations