Search in sources :

Example 26 with InputSpec

use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.

the class TestMapProcessor method testMapProcessorProgress.

@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);
    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
    ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
    Thread monitorProgress = new Thread(new Runnable() {

        @Override
        public void run() {
            float prog = task.getProgress();
            if (prog > 0.0f && prog < 1.0f)
                progressUpdate = prog;
        }
    });
    task.initialize();
    scheduler.scheduleAtFixedRate(monitorProgress, 0, 1, TimeUnit.MILLISECONDS);
    task.run();
    Assert.assertTrue("Progress Updates should be captured!", progressUpdate > 0.0f && progressUpdate < 1.0f);
    task.close();
    sharedExecutor.shutdownNow();
}
Also used : Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) JobConf(org.apache.hadoop.mapred.JobConf) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) Test(org.junit.Test)

Example 27 with InputSpec

use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.

the class TestMROutput method createLogicalTask.

public static LogicalIOProcessorRuntimeTask createLogicalTask(Configuration conf, TezUmbilical umbilical, String dagName, String vertexName, TezExecutors sharedExecutor) throws Exception {
    ProcessorDescriptor procDesc = ProcessorDescriptor.create(TestProcessor.class.getName());
    List<InputSpec> inputSpecs = Lists.newLinkedList();
    List<OutputSpec> outputSpecs = Lists.newLinkedList();
    outputSpecs.add(new OutputSpec("Null", MROutput.createConfigBuilder(conf, TestOutputFormat.class).build().getOutputDescriptor(), 1));
    TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 0, 0, 0), dagName, vertexName, -1, procDesc, inputSpecs, outputSpecs, null, null);
    FileSystem fs = FileSystem.getLocal(conf);
    Path workDir = new Path(new Path(System.getProperty("test.build.data", "/tmp")), "TestMapOutput").makeQualified(fs.getUri(), fs.getWorkingDirectory());
    return new LogicalIOProcessorRuntimeTask(taskSpec, 0, conf, new String[] { workDir.toString() }, umbilical, null, new HashMap<String, String>(), HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
}
Also used : Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) FileSystem(org.apache.hadoop.fs.FileSystem) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec)

Aggregations

InputSpec (org.apache.tez.runtime.api.impl.InputSpec)27 OutputSpec (org.apache.tez.runtime.api.impl.OutputSpec)18 GroupInputSpec (org.apache.tez.runtime.api.impl.GroupInputSpec)15 TaskSpec (org.apache.tez.runtime.api.impl.TaskSpec)12 Test (org.junit.Test)12 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)10 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)9 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)7 TezTaskID (org.apache.tez.dag.records.TezTaskID)7 PlanTaskLocationHint (org.apache.tez.dag.api.records.DAGProtos.PlanTaskLocationHint)6 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)6 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)6 GraceShuffleVertexManagerForTest (org.apache.tez.test.GraceShuffleVertexManagerForTest)6 VertexManagerPluginForTest (org.apache.tez.test.VertexManagerPluginForTest)6 Path (org.apache.hadoop.fs.Path)5 TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)5 ArrayList (java.util.ArrayList)4 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)4 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)4 VertexEvent (org.apache.tez.dag.app.dag.event.VertexEvent)4