use of org.apache.hadoop.hive.ql.session.LineageState in project hive by apache.
the class TestGenMapRedUtilsCreateConditionalTask method testConditionalMoveTaskIsNotOptimized.
@Test
public void testConditionalMoveTaskIsNotOptimized() throws SemanticException {
hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "false");
Path sinkDirName = new Path("s3a://bucket/scratch/-ext-10002");
FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
Path finalDirName = new Path("s3a://bucket/scratch/-ext-10000");
Path tableLocation = new Path("s3a://bucket/warehouse/table");
Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
List<Task<MoveWork>> moveTaskList = Collections.singletonList(moveTask);
GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask, new LineageState());
ConditionalTask conditionalTask = (ConditionalTask) dummyMRTask.getChildTasks().get(0);
Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0);
Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1);
Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
// Verify moveOnlyTask is NOT optimized
assertEquals(1, moveOnlyTask.getChildTasks().size());
verifyMoveTask(moveOnlyTask, sinkDirName, finalDirName);
verifyMoveTask(moveOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
// Verify mergeOnlyTask is NOT optimized
assertEquals(1, mergeOnlyTask.getChildTasks().size());
verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
// Verify mergeAndMoveTask is NOT optimized
assertEquals(1, mergeAndMoveTask.getChildTasks().size());
assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size());
verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName);
verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation);
}
use of org.apache.hadoop.hive.ql.session.LineageState in project hive by apache.
the class TestGenMapRedUtilsCreateConditionalTask method testConditionalMoveTaskIsOptimized.
@Test
public void testConditionalMoveTaskIsOptimized() throws SemanticException {
hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "true");
Path sinkDirName = new Path("s3a://bucket/scratch/-ext-10002");
FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
Path finalDirName = new Path("s3a://bucket/scratch/-ext-10000");
Path tableLocation = new Path("s3a://bucket/warehouse/table");
Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
List<Task<MoveWork>> moveTaskList = Collections.singletonList(moveTask);
GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask, new LineageState());
ConditionalTask conditionalTask = (ConditionalTask) dummyMRTask.getChildTasks().get(0);
Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0);
Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1);
Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
/*
* OPTIMIZATION
* The ConditionalTask avoids linking 2 MoveTask that are expensive on blobstorage systems. Instead of
* linking, it creates one MoveTask where the source is the first MoveTask source, and target is the
* second MoveTask target.
*/
// Verify moveOnlyTask is optimized
assertNull(moveOnlyTask.getChildTasks());
verifyMoveTask(moveOnlyTask, sinkDirName, tableLocation);
// Verify mergeOnlyTask is NOT optimized (a merge task writes directly to finalDirName, then a MoveTask is executed)
assertEquals(1, mergeOnlyTask.getChildTasks().size());
verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
// Verify mergeAndMoveTask is NOT optimized
assertEquals(1, mergeAndMoveTask.getChildTasks().size());
assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size());
verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName);
verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation);
}
use of org.apache.hadoop.hive.ql.session.LineageState in project hive by apache.
the class TestGenMapRedUtilsCreateConditionalTask method testMergePathValidMoveWorkReturnsNewMoveWork.
@Test
public void testMergePathValidMoveWorkReturnsNewMoveWork() {
final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002");
final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003");
final MoveWork mockWork = mock(MoveWork.class);
final LineageState lineageState = new LineageState();
MoveWork newWork;
// test using loadFileWork
when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "", false));
newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
assertNotNull(newWork);
assertNotEquals(newWork, mockWork);
assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath());
assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir());
// test using loadTableWork
TableDesc tableDesc = new TableDesc();
reset(mockWork);
when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc(condOutputPath, tableDesc, null));
newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
assertNotNull(newWork);
assertNotEquals(newWork, mockWork);
assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath());
assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc));
}
use of org.apache.hadoop.hive.ql.session.LineageState in project hive by apache.
the class TestGenMapRedUtilsCreateConditionalTask method testMergePathWithInvalidMoveWorkThrowsException.
@Test(expected = IllegalArgumentException.class)
public void testMergePathWithInvalidMoveWorkThrowsException() {
final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
final MoveWork mockWork = mock(MoveWork.class);
final LineageState lineageState = new LineageState();
when(mockWork.getLoadMultiFilesWork()).thenReturn(new LoadMultiFilesDesc());
GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
}
use of org.apache.hadoop.hive.ql.session.LineageState in project hive by apache.
the class TestGenMapRedUtilsCreateConditionalTask method testConditionalMoveOnHdfsIsNotOptimized.
@Test
public void testConditionalMoveOnHdfsIsNotOptimized() throws SemanticException {
hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "true");
Path sinkDirName = new Path("hdfs://bucket/scratch/-ext-10002");
FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
Path finalDirName = new Path("hdfs://bucket/scratch/-ext-10000");
Path tableLocation = new Path("hdfs://bucket/warehouse/table");
Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
List<Task<MoveWork>> moveTaskList = Collections.singletonList(moveTask);
GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask, new LineageState());
ConditionalTask conditionalTask = (ConditionalTask) dummyMRTask.getChildTasks().get(0);
Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0);
Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1);
Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
// Verify moveOnlyTask is NOT optimized
assertEquals(1, moveOnlyTask.getChildTasks().size());
verifyMoveTask(moveOnlyTask, sinkDirName, finalDirName);
verifyMoveTask(moveOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
// Verify mergeOnlyTask is NOT optimized
assertEquals(1, mergeOnlyTask.getChildTasks().size());
verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
// Verify mergeAndMoveTask is NOT optimized
assertEquals(1, mergeAndMoveTask.getChildTasks().size());
assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size());
verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName);
verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation);
}
Aggregations