Search in sources :

Example 1 with TestUmbilical

use of org.apache.tez.mapreduce.TestUmbilical in project tez by apache.

the class TestMapProcessor method testMapProcessor.

@Test(timeout = 5000)
public void testMapProcessor() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);
    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
    task.initialize();
    task.run();
    task.close();
    sharedExecutor.shutdownNow();
    OutputContext outputContext = task.getOutputContexts().iterator().next();
    TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier());
    // TODO NEWTEZ FIXME OutputCommitter verification
    // MRTask mrTask = (MRTask)t.getProcessor();
    // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
    // .getCommitter().getClass().getName());
    // t.close();
    Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
    LOG.info("mapOutputFile = " + mapOutputFile);
    IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    DataInputBuffer keyBuf = new DataInputBuffer();
    DataInputBuffer valueBuf = new DataInputBuffer();
    long prev = Long.MIN_VALUE;
    while (reader.nextRawKey(keyBuf)) {
        reader.nextRawValue(valueBuf);
        key.readFields(keyBuf);
        value.readFields(valueBuf);
        if (prev != Long.MIN_VALUE) {
            assert (prev <= key.get());
            prev = key.get();
        }
        LOG.info("key = " + key.get() + "; value = " + value);
    }
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) TezTaskOutputFiles(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) Text(org.apache.hadoop.io.Text) OutputContext(org.apache.tez.runtime.api.OutputContext) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) LongWritable(org.apache.hadoop.io.LongWritable) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) JobConf(org.apache.hadoop.mapred.JobConf) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) TezTaskOutput(org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput) Test(org.junit.Test)

Example 2 with TestUmbilical

use of org.apache.tez.mapreduce.TestUmbilical in project tez by apache.

the class TestMROutput method testPerf.

@Ignore
@Test
public void testPerf() throws Exception {
    Configuration conf = new Configuration();
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf);
    LogicalIOProcessorRuntimeTask task = createLogicalTask(conf, new TestUmbilical(), "dag", "vertex", sharedExecutor);
    task.initialize();
    task.run();
    task.close();
    sharedExecutor.shutdownNow();
}
Also used : LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) Configuration(org.apache.hadoop.conf.Configuration) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with TestUmbilical

use of org.apache.tez.mapreduce.TestUmbilical in project tez by apache.

the class TestReduceProcessor method testReduceProcessor.

@Test(timeout = 5000)
public void testReduceProcessor() throws Exception {
    final String dagName = "mrdag0";
    String mapVertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    String reduceVertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);
    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    // Run a map
    TestUmbilical testUmbilical = new TestUmbilical();
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    LogicalIOProcessorRuntimeTask mapTask = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, mapInput, testUmbilical, dagName, mapVertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
    mapTask.initialize();
    mapTask.run();
    mapTask.close();
    // One VME, One DME
    Assert.assertEquals(2, testUmbilical.getEvents().size());
    Assert.assertEquals(EventType.VERTEX_MANAGER_EVENT, testUmbilical.getEvents().get(0).getEventType());
    Assert.assertEquals(EventType.COMPOSITE_DATA_MOVEMENT_EVENT, testUmbilical.getEvents().get(1).getEventType());
    CompositeDataMovementEvent cdmEvent = (CompositeDataMovementEvent) testUmbilical.getEvents().get(1).getEvent();
    Assert.assertEquals(1, cdmEvent.getCount());
    DataMovementEvent dme = cdmEvent.getEvents().iterator().next();
    dme.setTargetIndex(0);
    LOG.info("Starting reduce...");
    JobTokenIdentifier identifier = new JobTokenIdentifier(new Text(dagName));
    JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
    Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>(identifier, jobTokenSecretManager);
    shuffleToken.setService(identifier.getJobId());
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
    FileOutputFormat.setOutputPath(jobConf, new Path(workDir, "output"));
    ProcessorDescriptor reduceProcessorDesc = ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));
    InputSpec reduceInputSpec = new InputSpec(mapVertexName, InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    OutputSpec reduceOutputSpec = new OutputSpec("NullDestinationVertex", OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    // Now run a reduce
    TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 1, 0, 0), dagName, reduceVertexName, -1, reduceProcessorDesc, Collections.singletonList(reduceInputSpec), Collections.singletonList(reduceOutputSpec), null, null);
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    String auxiliaryService = jobConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    serviceConsumerMetadata.put(auxiliaryService, ShuffleUtils.convertJobTokenToBytes(shuffleToken));
    Map<String, String> serviceProviderEnvMap = new HashMap<String, String>();
    ByteBuffer shufflePortBb = ByteBuffer.allocate(4).putInt(0, 8000);
    AuxiliaryServiceHelper.setServiceDataIntoEnv(auxiliaryService, shufflePortBb, serviceProviderEnvMap);
    LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(taskSpec, 0, jobConf, new String[] { workDir.toString() }, new TestUmbilical(), serviceConsumerMetadata, serviceProviderEnvMap, HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
    List<Event> destEvents = new LinkedList<Event>();
    destEvents.add(dme);
    task.initialize();
    OrderedGroupedInputLegacy sortedOut = (OrderedGroupedInputLegacy) task.getInputs().values().iterator().next();
    sortedOut.handleEvents(destEvents);
    task.run();
    task.close();
    sharedExecutor.shutdownNow();
    // MRTask mrTask = (MRTask)t.getProcessor();
    // TODO NEWTEZ Verify the partitioner has not been created
    // Likely not applicable anymore.
    // Assert.assertNull(mrTask.getPartitioner());
    // Only a task commit happens, hence the data is still in the temporary directory.
    Path reduceOutputDir = new Path(new Path(workDir, "output"), "_temporary/0/" + IDConverter.toMRTaskIdForOutput(TezTestUtils.getMockTaskId(0, 1, 0)));
    Path reduceOutputFile = new Path(reduceOutputDir, "part-v001-o000-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(localFs, reduceOutputFile, jobConf);
    LongWritable key = new LongWritable();
    Text value = new Text();
    long prev = Long.MIN_VALUE;
    while (reader.next(key, value)) {
        if (prev != Long.MIN_VALUE) {
            Assert.assertTrue(prev < key.get());
            prev = key.get();
        }
    }
    reader.close();
}
Also used : OrderedGroupedInputLegacy(org.apache.tez.runtime.library.input.OrderedGroupedInputLegacy) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) HashMap(java.util.HashMap) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) Token(org.apache.hadoop.security.token.Token) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) SequenceFile(org.apache.hadoop.io.SequenceFile) JobTokenSecretManager(org.apache.tez.common.security.JobTokenSecretManager) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) Text(org.apache.hadoop.io.Text) ByteBuffer(java.nio.ByteBuffer) LinkedList(java.util.LinkedList) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) Test(org.junit.Test)

Example 4 with TestUmbilical

use of org.apache.tez.mapreduce.TestUmbilical in project tez by apache.

the class TestMapProcessor method testMapProcessorProgress.

@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);
    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
    ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
    Thread monitorProgress = new Thread(new Runnable() {

        @Override
        public void run() {
            float prog = task.getProgress();
            if (prog > 0.0f && prog < 1.0f)
                progressUpdate = prog;
        }
    });
    task.initialize();
    scheduler.scheduleAtFixedRate(monitorProgress, 0, 1, TimeUnit.MILLISECONDS);
    task.run();
    Assert.assertTrue("Progress Updates should be captured!", progressUpdate > 0.0f && progressUpdate < 1.0f);
    task.close();
    sharedExecutor.shutdownNow();
}
Also used : Path(org.apache.hadoop.fs.Path) LogicalIOProcessorRuntimeTask(org.apache.tez.runtime.LogicalIOProcessorRuntimeTask) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TestUmbilical(org.apache.tez.mapreduce.TestUmbilical) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy) JobConf(org.apache.hadoop.mapred.JobConf) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) Test(org.junit.Test)

Aggregations

TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)4 TestUmbilical (org.apache.tez.mapreduce.TestUmbilical)4 LogicalIOProcessorRuntimeTask (org.apache.tez.runtime.LogicalIOProcessorRuntimeTask)4 Test (org.junit.Test)4 Path (org.apache.hadoop.fs.Path)3 JobConf (org.apache.hadoop.mapred.JobConf)3 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)3 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)3 OutputSpec (org.apache.tez.runtime.api.impl.OutputSpec)3 OrderedPartitionedKVOutput (org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput)3 LongWritable (org.apache.hadoop.io.LongWritable)2 Text (org.apache.hadoop.io.Text)2 ByteBuffer (java.nio.ByteBuffer)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 Configuration (org.apache.hadoop.conf.Configuration)1 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1 SequenceFile (org.apache.hadoop.io.SequenceFile)1 Token (org.apache.hadoop.security.token.Token)1