Search in sources :

Example 66 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class VectorizedRowBatchCtx method getPartitionValues.

public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, Configuration hiveConf, FileSplit split, Object[] partitionValues) throws IOException {
    // TODO: this is invalid for SMB. Keep this for now for legacy reasons. See the other overload.
    MapWork mapWork = Utilities.getMapWork(hiveConf);
    getPartitionValues(vrbCtx, mapWork, split, partitionValues);
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork)

Example 67 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestUtilities method runTestGetInputPaths.

private void runTestGetInputPaths(JobConf jobConf, int numOfPartitions) throws Exception {
    MapWork mapWork = new MapWork();
    Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
    LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
    String testTableName = "testTable";
    Path testTablePath = new Path(testTableName);
    Path[] testPartitionsPaths = new Path[numOfPartitions];
    for (int i = 0; i < numOfPartitions; i++) {
        String testPartitionName = "p=" + i;
        testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
        pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
        mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class));
    }
    mapWork.setPathToAliases(pathToAliasTable);
    FileSystem fs = FileSystem.getLocal(jobConf);
    try {
        fs.mkdirs(testTablePath);
        for (int i = 0; i < numOfPartitions; i++) {
            fs.mkdirs(testPartitionsPaths[i]);
            fs.create(new Path(testPartitionsPaths[i], "test1.txt")).close();
        }
        List<Path> inputPaths = Utilities.getInputPaths(jobConf, mapWork, scratchDir, mock(Context.class), false);
        assertEquals(inputPaths.size(), numOfPartitions);
        for (int i = 0; i < numOfPartitions; i++) {
            assertEquals(inputPaths.get(i), testPartitionsPaths[i]);
        }
    } finally {
        if (fs.exists(testTablePath)) {
            fs.delete(testTablePath, true);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap)

Example 68 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestUtilities method testGetInputPathsWithMultipleThreadsAndEmptyPartitions.

/**
 * Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
 * can process two different tables that both have empty partitions when using multiple threads.
 * Some extra logic is placed at the end of the test to validate no race conditions put the
 * {@link MapWork} object in an invalid state.
 */
@Test
public void testGetInputPathsWithMultipleThreadsAndEmptyPartitions() throws Exception {
    int numPartitions = 15;
    JobConf jobConf = new JobConf();
    jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, Runtime.getRuntime().availableProcessors() * 2);
    MapWork mapWork = new MapWork();
    Path testTablePath = new Path("testTable");
    Path[] testPartitionsPaths = new Path[numPartitions];
    PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
    TableDesc mockTableDesc = mock(TableDesc.class);
    when(mockTableDesc.isNonNative()).thenReturn(false);
    when(mockTableDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
    doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
    for (int i = 0; i < numPartitions; i++) {
        String testPartitionName = "p=" + i;
        testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
        mapWork.getPathToAliases().put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
        mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class));
        mapWork.getPathToPartitionInfo().put(testPartitionsPaths[i], mockPartitionDesc);
    }
    FileSystem fs = FileSystem.getLocal(jobConf);
    try {
        fs.mkdirs(testTablePath);
        List<Path> inputPaths = Utilities.getInputPaths(jobConf, mapWork, new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)), mock(Context.class), false);
        assertEquals(inputPaths.size(), numPartitions);
        for (int i = 0; i < numPartitions; i++) {
            assertNotEquals(inputPaths.get(i), testPartitionsPaths[i]);
        }
        assertEquals(mapWork.getPathToAliases().size(), numPartitions);
        assertEquals(mapWork.getPathToPartitionInfo().size(), numPartitions);
        assertEquals(mapWork.getAliasToWork().size(), numPartitions);
        for (Map.Entry<Path, ArrayList<String>> entry : mapWork.getPathToAliases().entrySet()) {
            assertNotNull(entry.getKey());
            assertNotNull(entry.getValue());
            assertEquals(entry.getValue().size(), 1);
            assertTrue(entry.getKey().getFileSystem(new Configuration()).exists(entry.getKey()));
        }
    } finally {
        if (fs.exists(testTablePath)) {
            fs.delete(testTablePath, true);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Properties(java.util.Properties) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSystem(org.apache.hadoop.fs.FileSystem) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 69 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class MoveTask method inferTaskInformation.

private void inferTaskInformation(TaskInformation ti) {
    // (Either standard, local, or a merge)
    while (ti.task.getParentTasks() != null && ti.task.getParentTasks().size() == 1) {
        ti.task = (Task) ti.task.getParentTasks().get(0);
        // If it was a merge task or a local map reduce task, nothing can be inferred
        if (ti.task instanceof MergeFileTask || ti.task instanceof MapredLocalTask) {
            break;
        }
        // the directory this move task is moving
        if (ti.task instanceof MapRedTask) {
            MapredWork work = (MapredWork) ti.task.getWork();
            MapWork mapWork = work.getMapWork();
            ti.bucketCols = mapWork.getBucketedColsByDirectory().get(ti.path);
            ti.sortCols = mapWork.getSortedColsByDirectory().get(ti.path);
            if (work.getReduceWork() != null) {
                ti.numBuckets = work.getReduceWork().getNumReduceTasks();
            }
            if (ti.bucketCols != null || ti.sortCols != null) {
                // operator that writes the final output)
                assert work.isFinalMapRed();
            }
            break;
        }
        // condition for merging is not met, see GenMRFileSink1.
        if (ti.task instanceof MoveTask) {
            MoveTask mt = (MoveTask) ti.task;
            if (mt.getWork().getLoadFileWork() != null) {
                ti.path = mt.getWork().getLoadFileWork().getSourcePath().toUri().toString();
            }
        }
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask)

Example 70 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class LlapRecordReader method findMapWork.

private static MapWork findMapWork(JobConf job) throws HiveException {
    String inputName = job.get(Utilities.INPUT_NAME, null);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Initializing for input " + inputName);
    }
    String prefixes = job.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES);
    if (prefixes != null && !StringUtils.isBlank(prefixes)) {
        // So, we don't use the below code that would get the correct MapWork. See HIVE-16985.
        return null;
    }
    BaseWork work = null;
    // HIVE-16985: try to find the fake merge work for SMB join, that is really another MapWork.
    if (inputName != null) {
        if (prefixes == null || !Lists.newArrayList(prefixes.split(",")).contains(inputName)) {
            inputName = null;
        }
    }
    if (inputName != null) {
        work = Utilities.getMergeWork(job, inputName);
    }
    if (work == null || !(work instanceof MapWork)) {
        work = Utilities.getMapWork(job);
    }
    return (MapWork) work;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

MapWork (org.apache.hadoop.hive.ql.plan.MapWork)79 ArrayList (java.util.ArrayList)25 Path (org.apache.hadoop.fs.Path)24 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)23 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)17 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 Test (org.junit.Test)15 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)14 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)14 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)13 Serializable (java.io.Serializable)12 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)12 Context (org.apache.hadoop.hive.ql.Context)11 LinkedHashMap (java.util.LinkedHashMap)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)10