Examples with MapWork - org.apache.hadoop.hive.ql.plan.MapWork

Example 11 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestUtilities method runTestGetInputSummary.

private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException {
    // creates scratch directories needed by the Context object
    SessionState.start(new HiveConf());
    MapWork mapWork = new MapWork();
    Context context = new Context(jobConf);
    LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
    LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
    TableScanOperator scanOp = new TableScanOperator();
    PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null);
    String testTableName = "testTable";
    Path testTablePath = new Path(testTableName);
    Path[] testPartitionsPaths = new Path[numOfPartitions];
    for (int i = 0; i < numOfPartitions; i++) {
        String testPartitionName = "p=" + 1;
        testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
        pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc);
        pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
        mapWork.getAliasToWork().put(testPartitionName, scanOp);
    }
    mapWork.setPathToAliases(pathToAliasTable);
    mapWork.setPathToPartitionInfo(pathToPartitionInfo);
    FileSystem fs = FileSystem.getLocal(jobConf);
    try {
        fs.mkdirs(testTablePath);
        byte[] data = new byte[bytesPerFile];
        for (int i = 0; i < numOfPartitions; i++) {
            fs.mkdirs(testPartitionsPaths[i]);
            FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt"));
            out.write(data);
            out.close();
        }
        return Utilities.getInputSummary(context, mapWork, null);
    } finally {
        if (fs.exists(testTablePath)) {
            fs.delete(testTablePath, true);
        }
    }
}

Also used : Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 12 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestUtilities method testGetInputPathsWithEmptyTables.

/**
   * Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
   * can process two different empty tables without throwing any exceptions.
   */
@Test
public void testGetInputPathsWithEmptyTables() throws Exception {
    String alias1Name = "alias1";
    String alias2Name = "alias2";
    MapWork mapWork1 = new MapWork();
    MapWork mapWork2 = new MapWork();
    JobConf jobConf = new JobConf();
    Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
    Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
    PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
    TableDesc mockTableDesc = mock(TableDesc.class);
    when(mockTableDesc.isNonNative()).thenReturn(false);
    when(mockTableDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
    doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
    mapWork1.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name))));
    mapWork1.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
    mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
    mapWork2.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name))));
    mapWork2.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
    mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
    List<Path> inputPaths = new ArrayList<>();
    try {
        Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
        inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false));
        inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false));
        assertEquals(inputPaths.size(), 2);
    } finally {
        File file;
        for (Path path : inputPaths) {
            file = new File(path.toString());
            if (file.exists()) {
                file.delete();
            }
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Properties(java.util.Properties) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) File(java.io.File) Test(org.junit.Test)

Example 13 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestDynamicPartitionPruner method testSingleSourceMultipleFiltersOrdering2.

@Test(timeout = 5000)
public void testSingleSourceMultipleFiltersOrdering2() throws InterruptedException, SerDeException {
    InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
    doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
    MapWork mapWork = createMockMapWork(new TestSource("v1", 2));
    DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
    PruneRunnable pruneRunnable = new PruneRunnable(pruner);
    Thread t = new Thread(pruneRunnable);
    t.start();
    try {
        pruneRunnable.start();
        InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        event.setSourceVertexName("v1");
        pruner.processVertex("v1");
        pruner.addEvent(event);
        pruner.addEvent(event);
        pruner.addEvent(event);
        pruner.addEvent(event);
        pruneRunnable.awaitEnd();
        assertFalse(pruneRunnable.inError.get());
    } finally {
        t.interrupt();
        t.join();
    }
}

Also used : InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) Test(org.junit.Test)

Example 14 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestDynamicPartitionPruner method testMultipleSourcesOrdering2.

@Test(timeout = 5000)
public void testMultipleSourcesOrdering2() throws InterruptedException, SerDeException {
    InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
    doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
    doReturn(3).when(mockInitContext).getVertexNumTasks("v2");
    MapWork mapWork = createMockMapWork(new TestSource("v1", 2), new TestSource("v2", 1));
    DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
    PruneRunnable pruneRunnable = new PruneRunnable(pruner);
    Thread t = new Thread(pruneRunnable);
    t.start();
    try {
        pruneRunnable.start();
        InputInitializerEvent eventV1 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        eventV1.setSourceVertexName("v1");
        InputInitializerEvent eventV2 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
        eventV2.setSourceVertexName("v2");
        // 2 X 2 events for V1. 3 X 1 events for V2
        pruner.processVertex("v1");
        pruner.processVertex("v2");
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV1);
        pruner.addEvent(eventV2);
        pruner.addEvent(eventV2);
        pruner.addEvent(eventV2);
        pruneRunnable.awaitEnd();
        assertFalse(pruneRunnable.inError.get());
    } finally {
        t.interrupt();
        t.join();
    }
}

Example 15 with MapWork

use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.

the class TestDynamicPartitionPruner method createMockMapWork.

private MapWork createMockMapWork(TestSource... testSources) {
    MapWork mapWork = mock(MapWork.class);
    Map<String, List<TableDesc>> tableMap = new HashMap<>();
    Map<String, List<String>> columnMap = new HashMap<>();
    Map<String, List<String>> typeMap = new HashMap<>();
    Map<String, List<ExprNodeDesc>> exprMap = new HashMap<>();
    int count = 0;
    for (TestSource testSource : testSources) {
        for (int i = 0; i < testSource.numExpressions; i++) {
            List<TableDesc> tableDescList = tableMap.get(testSource.vertexName);
            if (tableDescList == null) {
                tableDescList = new LinkedList<>();
                tableMap.put(testSource.vertexName, tableDescList);
            }
            tableDescList.add(mock(TableDesc.class));
            List<String> columnList = columnMap.get(testSource.vertexName);
            if (columnList == null) {
                columnList = new LinkedList<>();
                columnMap.put(testSource.vertexName, columnList);
            }
            columnList.add(testSource.vertexName + "c_" + count + "_" + i);
            List<String> typeList = typeMap.get(testSource.vertexName);
            if (typeList == null) {
                typeList = new LinkedList<>();
                typeMap.put(testSource.vertexName, typeList);
            }
            typeList.add("string");
            List<ExprNodeDesc> exprNodeDescList = exprMap.get(testSource.vertexName);
            if (exprNodeDescList == null) {
                exprNodeDescList = new LinkedList<>();
                exprMap.put(testSource.vertexName, exprNodeDescList);
            }
            exprNodeDescList.add(mock(ExprNodeDesc.class));
        }
        count++;
    }
    doReturn(tableMap).when(mapWork).getEventSourceTableDescMap();
    doReturn(columnMap).when(mapWork).getEventSourceColumnNameMap();
    doReturn(exprMap).when(mapWork).getEventSourcePartKeyExprMap();
    doReturn(typeMap).when(mapWork).getEventSourceColumnTypeMap();
    return mapWork;
}

Also used : HashMap(java.util.HashMap) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) List(java.util.List) LinkedList(java.util.LinkedList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

MapWork (org.apache.hadoop.hive.ql.plan.MapWork)79 ArrayList (java.util.ArrayList)25 Path (org.apache.hadoop.fs.Path)24 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)23 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)17 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 Test (org.junit.Test)15 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)14 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)14 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)13 Serializable (java.io.Serializable)12 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)12 Context (org.apache.hadoop.hive.ql.Context)11 LinkedHashMap (java.util.LinkedHashMap)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)10