Search in sources :

Example 11 with TezSharedExecutor

use of org.apache.tez.common.TezSharedExecutor in project tez by apache.

the class TestOnFileUnorderedKVOutput method testGeneratedDataMovementEvent.

@Test(timeout = 5000)
public void testGeneratedDataMovementEvent() throws Exception {
    Configuration conf = new Configuration();
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
    OutputContext outputContext = createOutputContext(conf, sharedExecutor);
    UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1);
    List<Event> events = null;
    events = kvOutput.initialize();
    kvOutput.start();
    assertTrue(events != null && events.size() == 0);
    KeyValueWriter kvWriter = kvOutput.getWriter();
    List<KVPair> data = KVDataGen.generateTestData(true, 0);
    for (KVPair kvp : data) {
        kvWriter.write(kvp.getKey(), kvp.getvalue());
    }
    events = kvOutput.close();
    assertEquals(45, task.getTaskStatistics().getIOStatistics().values().iterator().next().getDataSize());
    assertEquals(5, task.getTaskStatistics().getIOStatistics().values().iterator().next().getItemsProcessed());
    assertTrue(events != null && events.size() == 2);
    CompositeDataMovementEvent dmEvent = (CompositeDataMovementEvent) events.get(1);
    assertEquals("Invalid source index", 0, dmEvent.getSourceIndexStart());
    DataMovementEventPayloadProto shufflePayload = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(dmEvent.getUserPayload()));
    assertFalse(shufflePayload.hasEmptyPartitions());
    assertEquals(outputContext.getUniqueIdentifier(), shufflePayload.getPathComponent());
    assertEquals(shufflePort, shufflePayload.getPort());
    assertEquals("localhost", shufflePayload.getHost());
    sharedExecutor.shutdownNow();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) Text(org.apache.hadoop.io.Text) OutputContext(org.apache.tez.runtime.api.OutputContext) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DataMovementEventPayloadProto(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 12 with TezSharedExecutor

use of org.apache.tez.common.TezSharedExecutor in project tez by apache.

the class TestMapProcessor method testMapProcessorProgress.

@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);
    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
    ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
    Thread monitorProgress = new Thread(new Runnable() {

        @Override
        public void run() {
            float prog = task.getProgress();
            if (prog > 0.0f && prog < 1.0f)
                progressUpdate = prog;
        }
    });
    task.initialize();
    scheduler.scheduleAtFixedRate(monitorProgress, 0, 1, TimeUnit.MILLISECONDS);
    task.run();
    Assert.assertTrue("Progress Updates should be captured!", progressUpdate > 0.0f && progressUpdate < 1.0f);
    task.close();
    sharedExecutor.shutdownNow();
}
Also used : Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) JobConf(org.apache.hadoop.mapred.JobConf) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) Test(org.junit.Test)

Aggregations

TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)12 Test (org.junit.Test)9 Configuration (org.apache.hadoop.conf.Configuration)7 DefaultHadoopShim (org.apache.tez.hadoop.shim.DefaultHadoopShim)6 LogicalIOProcessorRuntimeTask (org.apache.tez.runtime.LogicalIOProcessorRuntimeTask)6 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)5 OutputSpec (org.apache.tez.runtime.api.impl.OutputSpec)5 TaskSpec (org.apache.tez.runtime.api.impl.TaskSpec)5 ByteBuffer (java.nio.ByteBuffer)4 HashMap (java.util.HashMap)4 Path (org.apache.hadoop.fs.Path)4 Text (org.apache.hadoop.io.Text)4 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)4 TezDAGID (org.apache.tez.dag.records.TezDAGID)4 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)4 TezVertexID (org.apache.tez.dag.records.TezVertexID)4 TestUmbilical (org.apache.tez.mapreduce.TestUmbilical)4 OutputContext (org.apache.tez.runtime.api.OutputContext)4 ExecutionContextImpl (org.apache.tez.runtime.api.impl.ExecutionContextImpl)4