Search in sources :

Example 1 with TezGroupedSplit

use of org.apache.hadoop.mapreduce.split.TezGroupedSplit in project tez by apache.

the class TestMRInputAMSplitGenerator method testGroupSplitsAndSortSplits.

private void testGroupSplitsAndSortSplits(boolean groupSplitsEnabled, boolean sortSplitsEnabled) throws Exception {
    Configuration conf = new Configuration();
    String[] splitLengths = new String[50];
    for (int i = 0; i < splitLengths.length; i++) {
        splitLengths[i] = Integer.toString(1000 * (i + 1));
    }
    conf.setStrings(SPLITS_LENGTHS, splitLengths);
    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf, InputFormatForTest.class).groupSplits(groupSplitsEnabled).sortSplits(sortSplitsEnabled).build();
    UserPayload userPayload = dataSource.getInputDescriptor().getUserPayload();
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputAMSplitGenerator splitGenerator = new MRInputAMSplitGenerator(context);
    List<Event> events = splitGenerator.initialize();
    assertTrue(events.get(0) instanceof InputConfigureVertexTasksEvent);
    boolean shuffled = false;
    InputSplit previousIs = null;
    int numRawInputSplits = 0;
    for (int i = 1; i < events.size(); i++) {
        assertTrue(events.get(i) instanceof InputDataInformationEvent);
        InputDataInformationEvent diEvent = (InputDataInformationEvent) (events.get(i));
        assertNull(diEvent.getDeserializedUserPayload());
        assertNotNull(diEvent.getUserPayload());
        MRSplitProto eventProto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent.getUserPayload()));
        InputSplit is = MRInputUtils.getNewSplitDetailsFromEvent(eventProto, new Configuration());
        if (groupSplitsEnabled) {
            numRawInputSplits += ((TezGroupedSplit) is).getGroupedSplits().size();
            for (InputSplit inputSplit : ((TezGroupedSplit) is).getGroupedSplits()) {
                assertTrue(inputSplit instanceof InputSplitForTest);
            }
            assertTrue(((TezGroupedSplit) is).getGroupedSplits().get(0) instanceof InputSplitForTest);
        } else {
            numRawInputSplits++;
            assertTrue(is instanceof InputSplitForTest);
        }
        // the splits.
        if (previousIs != null) {
            if (sortSplitsEnabled) {
                assertTrue(is.getLength() <= previousIs.getLength());
            } else {
                shuffled |= (is.getLength() > previousIs.getLength());
            }
        }
        previousIs = is;
    }
    assertEquals(splitLengths.length, numRawInputSplits);
    if (!sortSplitsEnabled) {
        assertTrue(shuffled);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) TezGroupedSplit(org.apache.hadoop.mapreduce.split.TezGroupedSplit) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputSplit(org.apache.hadoop.mapreduce.InputSplit) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)

Aggregations

ByteString (com.google.protobuf.ByteString)1 Configuration (org.apache.hadoop.conf.Configuration)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 TezGroupedSplit (org.apache.hadoop.mapreduce.split.TezGroupedSplit)1 DataSourceDescriptor (org.apache.tez.dag.api.DataSourceDescriptor)1 UserPayload (org.apache.tez.dag.api.UserPayload)1 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)1 Event (org.apache.tez.runtime.api.Event)1 InputInitializerContext (org.apache.tez.runtime.api.InputInitializerContext)1 InputConfigureVertexTasksEvent (org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)1 InputDataInformationEvent (org.apache.tez.runtime.api.events.InputDataInformationEvent)1