Search in sources :

Example 31 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMultiMRInput method testMultipleSplits.

@Test(timeout = 5000)
public void testMultipleSplits() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testMultipleSplits");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);
    InputContext inputContext = createTezInputContext(jobConf);
    MultiMRInput input = new MultiMRInput(inputContext, 2);
    input.initialize();
    AtomicLong inputLength = new AtomicLong();
    LinkedHashMap<LongWritable, Text> data = createSplits(2, workDir, jobConf, inputLength);
    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(jobConf, 2);
    assertEquals(2, splits.length);
    MRSplitProto splitProto1 = MRInputHelpers.createSplitProto(splits[0]);
    InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto1.toByteString().asReadOnlyByteBuffer());
    MRSplitProto splitProto2 = MRInputHelpers.createSplitProto(splits[1]);
    InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto2.toByteString().asReadOnlyByteBuffer());
    List<Event> eventList = new ArrayList<Event>();
    eventList.add(event1);
    eventList.add(event2);
    input.handleEvents(eventList);
    assertReaders(input, data, 2, inputLength.get());
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) AtomicLong(java.util.concurrent.atomic.AtomicLong) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 32 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMultiMRInput method testNewFormatSplits.

@Test
public void testNewFormatSplits() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testNewFormatSplits");
    Job job = Job.getInstance(defaultConf);
    job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, workDir);
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapred.mapper.new-api", true);
    // Create sequence file.
    AtomicLong inputLength = new AtomicLong();
    LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, conf, inputLength);
    // Get split information.
    org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<LongWritable, Text> format = new org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<>();
    List<org.apache.hadoop.mapreduce.InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size());
    // Create the event.
    MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits.get(0), new SerializationFactory(conf));
    InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    // Create input context.
    InputContext inputContext = createTezInputContext(conf);
    // Create the MR input object and process the event
    MultiMRInput input = new MultiMRInput(inputContext, 1);
    input.initialize();
    input.handleEvents(Collections.<Event>singletonList(event));
    assertReaders(input, data, 1, inputLength.get());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) Text(org.apache.hadoop.io.Text) AtomicLong(java.util.concurrent.atomic.AtomicLong) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 33 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestMultiMRInput method createTezInputContext.

private InputContext createTezInputContext(Configuration conf) throws Exception {
    MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder();
    builder.setGroupingEnabled(false);
    builder.setConfigurationBytes(TezUtils.createByteStringFromConf(conf));
    byte[] payload = builder.build().toByteArray();
    ApplicationId applicationId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    InputContext inputContext = mock(InputContext.class);
    doReturn(applicationId).when(inputContext).getApplicationId();
    doReturn(counters).when(inputContext).getCounters();
    doReturn(1).when(inputContext).getDAGAttemptNumber();
    doReturn("dagName").when(inputContext).getDAGName();
    doReturn(1).when(inputContext).getInputIndex();
    doReturn("srcVertexName").when(inputContext).getSourceVertexName();
    doReturn(1).when(inputContext).getTaskAttemptNumber();
    doReturn(1).when(inputContext).getTaskIndex();
    doReturn(1).when(inputContext).getTaskVertexIndex();
    doReturn(UUID.randomUUID().toString()).when(inputContext).getUniqueIdentifier();
    doReturn("taskVertexName").when(inputContext).getTaskVertexName();
    doReturn(UserPayload.create(ByteBuffer.wrap(payload))).when(inputContext).getUserPayload();
    return inputContext;
}
Also used : MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputContext(org.apache.tez.runtime.api.InputContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 34 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestKVReadersWithMR method testWithSpecificNumberOfKV_MapReduce.

public void testWithSpecificNumberOfKV_MapReduce(int kvPairs) throws IOException {
    InputContext mockContext = mock(InputContext.class);
    MRReaderMapReduce reader = new MRReaderMapReduce(conf, counters, inputRecordCounter, -1, 1, 10, 20, 30, mockContext);
    reader.recordReader = new DummyRecordReaderMapReduce(kvPairs);
    int records = 0;
    while (reader.next()) {
        records++;
        verify(mockContext, times(records)).notifyProgress();
    }
    assertTrue(kvPairs == records);
    // reading again should fail
    try {
        boolean hasNext = reader.next();
        fail();
    } catch (IOException e) {
        assertTrue(e.getMessage().contains("For usage, please refer to"));
    }
}
Also used : InputContext(org.apache.tez.runtime.api.InputContext) IOException(java.io.IOException)

Example 35 with InputContext

use of org.apache.tez.runtime.api.InputContext in project tez by apache.

the class TestKVReadersWithMR method testWithSpecificNumberOfKV.

public void testWithSpecificNumberOfKV(int kvPairs) throws IOException {
    InputContext mockContext = mock(InputContext.class);
    MRReaderMapred reader = new MRReaderMapred(conf, counters, inputRecordCounter, mockContext);
    reader.recordReader = new DummyRecordReader(kvPairs);
    int records = 0;
    while (reader.next()) {
        records++;
        verify(mockContext, times(records)).notifyProgress();
    }
    assertTrue(kvPairs == records);
    // reading again should fail
    try {
        boolean hasNext = reader.next();
        fail();
    } catch (IOException e) {
        assertTrue(e.getMessage().contains("For usage, please refer to"));
    }
}
Also used : InputContext(org.apache.tez.runtime.api.InputContext) IOException(java.io.IOException)

Aggregations

InputContext (org.apache.tez.runtime.api.InputContext)65 Test (org.junit.Test)47 Configuration (org.apache.hadoop.conf.Configuration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)28 TezCounters (org.apache.tez.common.counters.TezCounters)19 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)18 CompositeInputAttemptIdentifier (org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier)17 IOException (java.io.IOException)16 InputAttemptIdentifier (org.apache.tez.runtime.library.common.InputAttemptIdentifier)16 Event (org.apache.tez.runtime.api.Event)14 LinkedList (java.util.LinkedList)12 Path (org.apache.hadoop.fs.Path)12 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 InvocationOnMock (org.mockito.invocation.InvocationOnMock)10 ExecutorService (java.util.concurrent.ExecutorService)9 OutputContext (org.apache.tez.runtime.api.OutputContext)9 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)8 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)8 FetchedInputAllocator (org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator)8 Text (org.apache.hadoop.io.Text)7