use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestDynamicPartitionPruner method testMultipleSourcesOrdering1.
@Test(timeout = 5000)
public void testMultipleSourcesOrdering1() throws InterruptedException, SerDeException {
InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
doReturn(3).when(mockInitContext).getVertexNumTasks("v2");
MapWork mapWork = createMockMapWork(new TestSource("v1", 2), new TestSource("v2", 1));
DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
PruneRunnable pruneRunnable = new PruneRunnable(pruner);
Thread t = new Thread(pruneRunnable);
t.start();
try {
pruneRunnable.start();
InputInitializerEvent eventV1 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
eventV1.setSourceVertexName("v1");
InputInitializerEvent eventV2 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
eventV2.setSourceVertexName("v2");
// 2 X 2 events for V1. 3 X 1 events for V2
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV2);
pruner.addEvent(eventV2);
pruner.addEvent(eventV2);
pruner.processVertex("v1");
pruner.processVertex("v2");
pruneRunnable.awaitEnd();
assertFalse(pruneRunnable.inError.get());
} finally {
t.interrupt();
t.join();
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestDynamicPartitionPruner method testExtraEvents.
@Test(timeout = 5000, expected = IllegalStateException.class)
public void testExtraEvents() throws InterruptedException, IOException, HiveException, SerDeException {
InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
doReturn(1).when(mockInitContext).getVertexNumTasks("v1");
MapWork mapWork = createMockMapWork(new TestSource("v1", 1));
DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
PruneRunnable pruneRunnable = new PruneRunnable(pruner);
Thread t = new Thread(pruneRunnable);
t.start();
try {
pruneRunnable.start();
InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
event.setSourceVertexName("v1");
pruner.addEvent(event);
pruner.addEvent(event);
pruner.processVertex("v1");
pruneRunnable.awaitEnd();
assertFalse(pruneRunnable.inError.get());
} finally {
t.interrupt();
t.join();
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestDynamicPartitionPruner method testMultipleSourcesOrdering3.
@Test(timeout = 5000)
public void testMultipleSourcesOrdering3() throws InterruptedException, SerDeException {
InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
doReturn(3).when(mockInitContext).getVertexNumTasks("v2");
MapWork mapWork = createMockMapWork(new TestSource("v1", 2), new TestSource("v2", 1));
DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
PruneRunnable pruneRunnable = new PruneRunnable(pruner);
Thread t = new Thread(pruneRunnable);
t.start();
try {
pruneRunnable.start();
InputInitializerEvent eventV1 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
eventV1.setSourceVertexName("v1");
InputInitializerEvent eventV2 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
eventV2.setSourceVertexName("v2");
// 2 X 2 events for V1. 3 X 1 events for V2
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.processVertex("v1");
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV2);
pruner.processVertex("v2");
pruner.addEvent(eventV2);
pruner.addEvent(eventV2);
pruneRunnable.awaitEnd();
assertFalse(pruneRunnable.inError.get());
} finally {
t.interrupt();
t.join();
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestGenTezWork method testCreateMap.
@Test
public void testCreateMap() throws SemanticException {
proc.process(rs, null, ctx, (Object[]) null);
assertNotNull(ctx.currentTask);
assertTrue(ctx.rootTasks.contains(ctx.currentTask));
TezWork work = ctx.currentTask.getWork();
assertEquals(work.getAllWork().size(), 1);
BaseWork w = work.getAllWork().get(0);
assertTrue(w instanceof MapWork);
MapWork mw = (MapWork) w;
// need to make sure names are set for tez to connect things right
assertNotNull(w.getName());
// map work should start with our ts op
assertSame(mw.getAliasToWork().entrySet().iterator().next().getValue(), ts);
// preceeding work must be set to the newly generated map
assertSame(ctx.preceedingWork, mw);
// should have a new root now
assertSame(ctx.currentRootOperator, fs);
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class CommonJoinTaskDispatcher method mergeMapJoinTaskIntoItsChildMapRedTask.
/*
* A task and its child task has been converted from join to mapjoin.
* See if the two tasks can be merged.
*/
private void mergeMapJoinTaskIntoItsChildMapRedTask(MapRedTask mapJoinTask, Configuration conf) throws SemanticException {
// If so, check if we can merge mapJoinTask into that child.
if (mapJoinTask.getChildTasks() == null || mapJoinTask.getChildTasks().size() > 1) {
// child-tasks in which case we don't want to do anything.
return;
}
Task<? extends Serializable> childTask = mapJoinTask.getChildTasks().get(0);
if (!(childTask instanceof MapRedTask)) {
// Nothing to do if it is not a MapReduce task.
return;
}
MapRedTask childMapRedTask = (MapRedTask) childTask;
MapWork mapJoinMapWork = mapJoinTask.getWork().getMapWork();
MapWork childMapWork = childMapRedTask.getWork().getMapWork();
Map<String, Operator<? extends OperatorDesc>> mapJoinAliasToWork = mapJoinMapWork.getAliasToWork();
if (mapJoinAliasToWork.size() > 1) {
// Do not merge if the MapredWork of MapJoin has multiple input aliases.
return;
}
Entry<String, Operator<? extends OperatorDesc>> mapJoinAliasToWorkEntry = mapJoinAliasToWork.entrySet().iterator().next();
String mapJoinAlias = mapJoinAliasToWorkEntry.getKey();
TableScanOperator mapJoinTaskTableScanOperator = OperatorUtils.findSingleOperator(mapJoinAliasToWorkEntry.getValue(), TableScanOperator.class);
if (mapJoinTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + mapJoinAlias + ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator.");
}
FileSinkOperator mapJoinTaskFileSinkOperator = OperatorUtils.findSingleOperator(mapJoinTaskTableScanOperator, FileSinkOperator.class);
if (mapJoinTaskFileSinkOperator == null) {
throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() + " operator at the last operator of the MapJoin Task.");
}
// The mapJoinTaskFileSinkOperator writes to a different directory
Path childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName();
List<String> childMRAliases = childMapWork.getPathToAliases().get(childMRPath);
if (childMRAliases == null || childMRAliases.size() != 1) {
return;
}
String childMRAlias = childMRAliases.get(0);
// Sanity check to make sure there is no alias conflict after merge.
for (Entry<Path, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
Path path = entry.getKey();
List<String> aliases = entry.getValue();
if (path.equals(childMRPath)) {
continue;
}
if (aliases.contains(mapJoinAlias)) {
// alias confict should not happen here.
return;
}
}
MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapRedLocalWork();
MapredLocalWork childLocalWork = childMapWork.getMapRedLocalWork();
if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) {
// We should relax this constraint with a follow-up jira.
return;
}
// is under the limit.
if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)) {
// Do not merge.
return;
}
TableScanOperator childMRTaskTableScanOperator = OperatorUtils.findSingleOperator(childMapWork.getAliasToWork().get(childMRAlias.toString()), TableScanOperator.class);
if (childMRTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + childMRAlias + ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator.");
}
List<Operator<? extends OperatorDesc>> parentsInMapJoinTask = mapJoinTaskFileSinkOperator.getParentOperators();
List<Operator<? extends OperatorDesc>> childrenInChildMRTask = childMRTaskTableScanOperator.getChildOperators();
if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) {
// Do not merge if we do not know how to connect two operator trees.
return;
}
// Step 2: Merge mapJoinTask into the Map-side of its child.
// Step 2.1: Connect the operator trees of two MapRedTasks.
Operator<? extends OperatorDesc> parentInMapJoinTask = parentsInMapJoinTask.get(0);
Operator<? extends OperatorDesc> childInChildMRTask = childrenInChildMRTask.get(0);
parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask);
childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask);
// Step 2.2: Replace the corresponding part childMRWork's MapWork.
GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias.toString(), mapJoinMapWork, childMapWork);
// Step 2.3: Fill up stuff in local work
if (mapJoinLocalWork != null) {
if (childLocalWork == null) {
childMapWork.setMapRedLocalWork(mapJoinLocalWork);
} else {
childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork());
childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork());
}
}
// Step 2.4: Remove this MapJoin task
List<Task<? extends Serializable>> parentTasks = mapJoinTask.getParentTasks();
mapJoinTask.setParentTasks(null);
mapJoinTask.setChildTasks(null);
childMapRedTask.getParentTasks().remove(mapJoinTask);
if (parentTasks != null) {
childMapRedTask.getParentTasks().addAll(parentTasks);
for (Task<? extends Serializable> parentTask : parentTasks) {
parentTask.getChildTasks().remove(mapJoinTask);
if (!parentTask.getChildTasks().contains(childMapRedTask)) {
parentTask.getChildTasks().add(childMapRedTask);
}
}
} else {
if (physicalContext.getRootTasks().contains(mapJoinTask)) {
physicalContext.removeFromRootTask(mapJoinTask);
if (childMapRedTask.getParentTasks() != null && childMapRedTask.getParentTasks().size() == 0 && !physicalContext.getRootTasks().contains(childMapRedTask)) {
physicalContext.addToRootTask(childMapRedTask);
}
}
}
if (childMapRedTask.getParentTasks().size() == 0) {
childMapRedTask.setParentTasks(null);
}
}
Aggregations