Search in sources :

Example 36 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class TruncateTableAnalyzer method addMoveTask.

private void addMoveTask(ASTNode root, Table table, Map<String, String> partitionSpec, Path oldPartitionLocation, Path newPartitionLocation, ListBucketingCtx lbCtx, Path queryTmpdir, Task<?> truncateTask, TableDesc tableDesc) throws SemanticException {
    // Write the output to temporary directory and move it to the final location at the end
    // so the operation is atomic.
    LoadTableDesc loadTableDesc = new LoadTableDesc(queryTmpdir, tableDesc, partitionSpec == null ? new HashMap<>() : partitionSpec);
    loadTableDesc.setLbCtx(lbCtx);
    Task<MoveWork> moveTask = TaskFactory.get(new MoveWork(null, null, loadTableDesc, null, false));
    truncateTask.addDependentTask(moveTask);
    addStatTask(root, table, oldPartitionLocation, newPartitionLocation, loadTableDesc, moveTask);
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) HashMap(java.util.HashMap)

Example 37 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class AlterTableConcatenateAnalyzer method addMoveTask.

private void addMoveTask(TableName tableName, Table table, Map<String, String> partitionSpec, Path oldLocation, Path newLocation, ListBucketingCtx lbCtx, TableDesc tableDesc, Path queryTmpDir, Task<?> mergeTask) throws SemanticException {
    // No need to handle MM tables - unsupported path.
    LoadTableDesc loadTableDesc = new LoadTableDesc(queryTmpDir, tableDesc, partitionSpec == null ? new HashMap<>() : partitionSpec);
    loadTableDesc.setLbCtx(lbCtx);
    loadTableDesc.setInheritTableSpecs(true);
    Task<MoveWork> moveTask = TaskFactory.get(new MoveWork(null, null, loadTableDesc, null, false));
    mergeTask.addDependentTask(moveTask);
    addStatTask(tableName, table, partitionSpec, oldLocation, newLocation, loadTableDesc, moveTask);
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) HashMap(java.util.HashMap)

Example 38 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class TestGenMapRedUtilsCreateConditionalTask method testMovePathsThatCanBeMerged.

@Test
public void testMovePathsThatCanBeMerged() {
    final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
    final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002");
    final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003");
    final MoveWork mockWork = mock(MoveWork.class);
    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "", false));
    assertTrue("Merging BlobStore paths should be allowed.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork));
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) Test(org.junit.Test)

Example 39 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class TestGenMapRedUtilsCreateConditionalTask method testConditionalMoveOnHdfsIsNotOptimized.

@Test
public void testConditionalMoveOnHdfsIsNotOptimized() throws SemanticException {
    hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "true");
    Path sinkDirName = new Path("hdfs://bucket/scratch/-ext-10002");
    FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
    Path finalDirName = new Path("hdfs://bucket/scratch/-ext-10000");
    Path tableLocation = new Path("hdfs://bucket/warehouse/table");
    Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
    List<Task<MoveWork>> moveTaskList = Collections.singletonList(moveTask);
    GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask, new LineageState());
    ConditionalTask conditionalTask = (ConditionalTask) dummyMRTask.getChildTasks().get(0);
    Task<?> moveOnlyTask = conditionalTask.getListTasks().get(0);
    Task<?> mergeOnlyTask = conditionalTask.getListTasks().get(1);
    Task<?> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
    // Verify moveOnlyTask is NOT optimized
    assertEquals(1, moveOnlyTask.getChildTasks().size());
    verifyMoveTask(moveOnlyTask, sinkDirName, finalDirName);
    verifyMoveTask(moveOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
    // Verify mergeOnlyTask is NOT optimized
    assertEquals(1, mergeOnlyTask.getChildTasks().size());
    verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
    // Verify mergeAndMoveTask is NOT optimized
    assertEquals(1, mergeAndMoveTask.getChildTasks().size());
    assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size());
    verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName);
    verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation);
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) LineageState(org.apache.hadoop.hive.ql.session.LineageState) Test(org.junit.Test)

Example 40 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class TestGenMapRedUtilsCreateConditionalTask method testMergePathValidMoveWorkReturnsNewMoveWork.

@Test
public void testMergePathValidMoveWorkReturnsNewMoveWork() {
    final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
    final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002");
    final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003");
    final MoveWork mockWork = mock(MoveWork.class);
    final LineageState lineageState = new LineageState();
    MoveWork newWork;
    // test using loadFileWork
    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "", false));
    newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
    assertNotNull(newWork);
    assertNotEquals(newWork, mockWork);
    assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath());
    assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir());
    // test using loadTableWork
    TableDesc tableDesc = new TableDesc();
    reset(mockWork);
    when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc(condOutputPath, tableDesc, null));
    newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
    assertNotNull(newWork);
    assertNotEquals(newWork, mockWork);
    assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath());
    assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc));
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) LineageState(org.apache.hadoop.hive.ql.session.LineageState) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) Test(org.junit.Test)

Aggregations

MoveWork (org.apache.hadoop.hive.ql.plan.MoveWork)42 Path (org.apache.hadoop.fs.Path)30 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)24 LoadFileDesc (org.apache.hadoop.hive.ql.plan.LoadFileDesc)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 Context (org.apache.hadoop.hive.ql.Context)7 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)7 Task (org.apache.hadoop.hive.ql.exec.Task)7 Partition (org.apache.hadoop.hive.ql.metadata.Partition)7 FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)7 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 BasicStatsWork (org.apache.hadoop.hive.ql.plan.BasicStatsWork)6 LoadMultiFilesDesc (org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc)6 StatsWork (org.apache.hadoop.hive.ql.plan.StatsWork)6 Serializable (java.io.Serializable)5 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)5 MoveTask (org.apache.hadoop.hive.ql.exec.MoveTask)5 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)5