use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class TestGenTezWork method setUp.
/**
* @throws java.lang.Exception
*/
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
// Init conf
final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
SessionState.start(conf);
// Init parse context
final ParseContext pctx = new ParseContext();
pctx.setContext(new Context(conf));
ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<?>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
proc = new GenTezWork(new GenTezUtils() {
@Override
protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
});
CompilationOpContext cCtx = new CompilationOpContext();
fs = new FileSinkOperator(cCtx);
fs.setConf(new FileSinkDesc());
rs = new ReduceSinkOperator(cCtx);
rs.setConf(new ReduceSinkDesc());
TableDesc tableDesc = new TableDesc();
tableDesc.setProperties(new Properties());
rs.getConf().setKeySerializeInfo(tableDesc);
ts = new TableScanOperator(cCtx);
ts.setConf(new TableScanDesc(null));
ts.getChildOperators().add(rs);
rs.getParentOperators().add(ts);
rs.getChildOperators().add(fs);
fs.getParentOperators().add(rs);
ctx.preceedingWork = null;
ctx.currentRootOperator = ts;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class TestSharedWorkOptimizer method getTsOp.
private TableScanOperator getTsOp() {
Table tblMetadata = new Table("db", "table");
TableScanDesc desc = new TableScanDesc("alias_" + cCtx.nextOperatorId(), tblMetadata);
Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc);
return (TableScanOperator) ts;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class TestSharedWorkOptimizer method testTSCmpOrdersById.
@Test
public void testTSCmpOrdersById() {
TableScanOperator ts1 = getTsOp();
TableScanOperator ts2 = getTsOp();
ArrayList<TableScanOperator> li1 = Lists.newArrayList(ts1, ts2);
ArrayList<TableScanOperator> li2 = Lists.newArrayList(ts2, ts1);
li1.sort(new TSComparator());
li2.sort(new TSComparator());
assertTrue(li1.get(0) == li2.get(0));
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class TestNullScanTaskDispatcher method verifyNumberOfReads.
private void verifyNumberOfReads(int externalPartitionCount, int managedPartitionCount, int upToDateManagedPartitions, int expectedReadOps) throws IOException, SemanticException {
final String MANAGED_TEST_TABLE = "managedTestTable";
final String EXTERNAL_TEST_TABLE = "externalTestTable";
createTable(EXTERNAL_TEST_TABLE, externalPartitionCount, 0);
createTable(MANAGED_TEST_TABLE, managedPartitionCount, upToDateManagedPartitions);
// operator setup
TableScanOperator tsoManaged = createTableScanOperator(false);
TableScanOperator tsoExternal = createTableScanOperator(true);
aliasToWork.put(MANAGED_TEST_TABLE, tsoManaged);
aliasToWork.put(EXTERNAL_TEST_TABLE, tsoExternal);
PhysicalContext physicalContext = new PhysicalContext(hiveConf, parseContext, context, getAsRootTaskList(mapWork, reduceWork), null);
new MetadataOnlyOptimizer().resolve(physicalContext);
assertEquals(1, mapWork.getPathToPartitionInfo().size());
StorageStatistics statistics = FileSystem.getGlobalStorageStatistics().get("mock");
assertEquals(expectedReadOps, (long) statistics.getLong("readOps"));
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SharedWorkOptimizer method sharedWorkExtendedOptimization.
private static void sharedWorkExtendedOptimization(ParseContext pctx, SharedWorkOptimizerCache optimizerCache) throws SemanticException {
// Gather RS operators that 1) belong to root works, i.e., works containing TS operators,
// and 2) share the same input operator.
// These will be the first target for extended shared work optimization
Multimap<Operator<?>, ReduceSinkOperator> parentToRsOps = ArrayListMultimap.create();
Set<Operator<?>> visited = new HashSet<>();
for (Entry<String, TableScanOperator> e : pctx.getTopOps().entrySet()) {
gatherReduceSinkOpsByInput(parentToRsOps, visited, findWorkOperators(optimizerCache, e.getValue()));
}
Set<Operator<?>> removedOps = new HashSet<>();
while (!parentToRsOps.isEmpty()) {
// As above, we enforce a certain order when we do the reutilization.
// In particular, we use size of data in RS x number of uses.
List<Entry<Operator<?>, Long>> sortedRSGroups = rankOpsByAccumulatedSize(parentToRsOps.keySet());
LOG.debug("Sorted operators by size: {}", sortedRSGroups);
// Execute extended optimization
// For each RS, check whether other RS in same work could be merge into this one.
// If they are merged, RS operators in the resulting work will be considered
// mergeable in next loop iteration.
Multimap<Operator<?>, ReduceSinkOperator> existingRsOps = ArrayListMultimap.create();
for (Entry<Operator<?>, Long> rsGroupInfo : sortedRSGroups) {
Operator<?> rsParent = rsGroupInfo.getKey();
for (ReduceSinkOperator discardableRsOp : parentToRsOps.get(rsParent)) {
if (removedOps.contains(discardableRsOp)) {
LOG.debug("Skip {} as it has already been removed", discardableRsOp);
continue;
}
Collection<ReduceSinkOperator> otherRsOps = existingRsOps.get(rsParent);
for (ReduceSinkOperator retainableRsOp : otherRsOps) {
if (retainableRsOp.getChildOperators().size() == 0) {
// just skip this RS - its a semijoin/bloomfilter related RS
continue;
}
if (removedOps.contains(retainableRsOp)) {
LOG.debug("Skip {} as it has already been removed", retainableRsOp);
continue;
}
// First we quickly check if the two RS operators can actually be merged.
// We already know that these two RS operators have the same parent, but
// we need to check whether both RS are actually equal. Further, we check
// whether their child is also equal. If any of these conditions are not
// met, we are not going to try to merge.
boolean mergeable = compareOperator(pctx, retainableRsOp, discardableRsOp) && compareOperator(pctx, retainableRsOp.getChildOperators().get(0), discardableRsOp.getChildOperators().get(0));
if (!mergeable) {
// Skip
LOG.debug("{} and {} cannot be merged", retainableRsOp, discardableRsOp);
continue;
}
LOG.debug("Checking additional conditions for merging subtree starting at {}" + " into subtree starting at {}", discardableRsOp, retainableRsOp);
// Secondly, we extract information about the part of the tree that can be merged
// as well as some structural information (memory consumption) that needs to be
// used to determined whether the merge can happen
Operator<?> retainableRsOpChild = retainableRsOp.getChildOperators().get(0);
Operator<?> discardableRsOpChild = discardableRsOp.getChildOperators().get(0);
SharedResult sr = extractSharedOptimizationInfo(pctx, optimizerCache, retainableRsOp, discardableRsOp, retainableRsOpChild, discardableRsOpChild);
// tables.
if (sr.retainableOps.isEmpty() || !validPreConditions(pctx, optimizerCache, sr)) {
// Skip
LOG.debug("{} and {} do not meet preconditions", retainableRsOp, discardableRsOp);
continue;
}
deduplicateReduceTraits(retainableRsOp.getConf(), discardableRsOp.getConf());
// We can merge
Operator<?> lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1);
Operator<?> lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1);
if (lastDiscardableOp.getNumChild() != 0) {
List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(lastDiscardableOp.getChildOperators());
for (Operator<? extends OperatorDesc> op : allChildren) {
lastDiscardableOp.getChildOperators().remove(op);
op.replaceParent(lastDiscardableOp, lastRetainableOp);
lastRetainableOp.getChildOperators().add(op);
}
}
LOG.debug("Merging subtree starting at {} into subtree starting at {}", discardableRsOp, retainableRsOp);
// we are going to eliminate
for (Operator<?> op : sr.discardableInputOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
// Remove DPP predicates
if (op instanceof ReduceSinkOperator) {
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
if (sjbi != null && !sr.discardableOps.contains(sjbi.getTsOp()) && !sr.discardableInputOps.contains(sjbi.getTsOp())) {
GenTezUtils.removeSemiJoinOperator(pctx, (ReduceSinkOperator) op, sjbi.getTsOp());
optimizerCache.tableScanToDPPSource.remove(sjbi.getTsOp(), op);
}
} else if (op instanceof AppMasterEventOperator) {
DynamicPruningEventDesc dped = (DynamicPruningEventDesc) op.getConf();
if (!sr.discardableOps.contains(dped.getTableScan()) && !sr.discardableInputOps.contains(dped.getTableScan())) {
GenTezUtils.removeSemiJoinOperator(pctx, (AppMasterEventOperator) op, dped.getTableScan());
optimizerCache.tableScanToDPPSource.remove(dped.getTableScan(), op);
}
}
LOG.debug("Input operator removed: {}", op);
}
// We remove the discardable RS operator
OperatorUtils.removeOperator(discardableRsOp);
optimizerCache.removeOp(discardableRsOp);
removedOps.add(discardableRsOp);
LOG.debug("Operator removed: {}", discardableRsOp);
// Then we merge the operators of the works we are going to merge
optimizerCache.removeOpAndCombineWork(discardableRsOpChild, retainableRsOpChild);
// Finally we remove the rest of the expression from the tree
for (Operator<?> op : sr.discardableOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
LOG.debug("Operator removed: {}", op);
}
if (pctx.getConf().getBoolVar(ConfVars.HIVE_SHARED_WORK_DOWNSTREAM_MERGE)) {
if (sr.discardableOps.size() == 1) {
downStreamMerge(retainableRsOp, optimizerCache, pctx);
}
}
break;
}
if (removedOps.contains(discardableRsOp)) {
// This operator has been removed, remove it from the list of existing operators
existingRsOps.remove(rsParent, discardableRsOp);
} else {
// This operator has not been removed, include it in the list of existing operators
existingRsOps.put(rsParent, discardableRsOp);
}
}
}
// We gather the operators that will be used for next iteration of extended optimization
// (if any)
parentToRsOps = ArrayListMultimap.create();
visited = new HashSet<>();
for (Entry<Operator<?>, ReduceSinkOperator> e : existingRsOps.entries()) {
if (removedOps.contains(e.getValue()) || e.getValue().getNumChild() < 1) {
// semijoin RS), we can quickly skip this one
continue;
}
gatherReduceSinkOpsByInput(parentToRsOps, visited, findWorkOperators(optimizerCache, e.getValue().getChildOperators().get(0)));
}
}
// Remove unused table scan operators
pctx.getTopOps().entrySet().removeIf((Entry<String, TableScanOperator> e) -> e.getValue().getNumChild() == 0);
}
Aggregations