Search in sources :

Example 6 with Input

use of org.apache.tez.runtime.api.Input in project tez by apache.

the class ReduceProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
    LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
    if (_outputs.size() <= 0 || _outputs.size() > 1) {
        throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
    }
    if (_inputs.size() <= 0 || _inputs.size() > 1) {
        throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
    }
    LogicalInput in = _inputs.values().iterator().next();
    in.start();
    List<Input> pendingInputs = new LinkedList<Input>();
    pendingInputs.add(in);
    processorContext.waitForAllInputsReady(pendingInputs);
    LOG.info("Input is ready for consumption. Starting Output");
    LogicalOutput out = _outputs.values().iterator().next();
    out.start();
    initTask(out);
    progressHelper.scheduleProgressTaskService(0, 100);
    this.statusUpdate();
    Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
    Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
    LOG.info("Using keyClass: " + keyClass);
    LOG.info("Using valueClass: " + valueClass);
    RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
    LOG.info("Using comparator: " + comparator);
    reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
    reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
    // Sanity check
    if (!(in instanceof OrderedGroupedInputLegacy)) {
        throw new IOException("Illegal input to reduce: " + in.getClass());
    }
    OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
    KeyValuesReader kvReader = shuffleInput.getReader();
    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
        kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
        kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {
        throw new IOException("Illegal output to reduce: " + in.getClass());
    }
    if (useNewApi) {
        try {
            runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    } else {
        runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
    }
    done();
}
Also used : OrderedGroupedInputLegacy(org.apache.tez.runtime.library.input.OrderedGroupedInputLegacy) ProgressHelper(org.apache.tez.common.ProgressHelper) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) IOException(java.io.IOException) LinkedList(java.util.LinkedList) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) RawComparator(org.apache.hadoop.io.RawComparator) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) LogicalInput(org.apache.tez.runtime.api.LogicalInput) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader)

Example 7 with Input

use of org.apache.tez.runtime.api.Input in project tez by apache.

the class LogicalIOProcessorRuntimeTask method initializeGroupInputs.

private void initializeGroupInputs() throws TezException {
    if (groupInputSpecs != null && !groupInputSpecs.isEmpty()) {
        groupInputsMap = new ConcurrentHashMap<String, MergedLogicalInput>(groupInputSpecs.size());
        for (GroupInputSpec groupInputSpec : groupInputSpecs) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Initializing GroupInput using GroupInputSpec: " + groupInputSpec);
            }
            MergedInputContext mergedInputContext = new TezMergedInputContextImpl(groupInputSpec.getMergedInputDescriptor().getUserPayload(), groupInputSpec.getGroupName(), groupInputsMap, inputReadyTracker, localDirs, this);
            List<Input> inputs = Lists.newArrayListWithCapacity(groupInputSpec.getGroupVertices().size());
            for (String groupVertex : groupInputSpec.getGroupVertices()) {
                inputs.add(inputsMap.get(groupVertex));
            }
            MergedLogicalInput groupInput = (MergedLogicalInput) createMergedInput(groupInputSpec.getMergedInputDescriptor(), mergedInputContext, inputs);
            groupInputsMap.put(groupInputSpec.getGroupName(), groupInput);
        }
    }
}
Also used : LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput) TezMergedInputContextImpl(org.apache.tez.runtime.api.impl.TezMergedInputContextImpl) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec) MergedInputContext(org.apache.tez.runtime.api.MergedInputContext)

Example 8 with Input

use of org.apache.tez.runtime.api.Input in project tez by apache.

the class TestInputReadyTracker method testWithoutGrouping2.

@Test(timeout = 20000)
public void testWithoutGrouping2() throws InterruptedException {
    InputReadyTracker inputReadyTracker = new InputReadyTracker();
    ControlledReadyInputForTest input1 = new ControlledReadyInputForTest(inputReadyTracker);
    ControlledReadyInputForTest input2 = new ControlledReadyInputForTest(inputReadyTracker);
    ControlledReadyInputForTest input3 = new ControlledReadyInputForTest(inputReadyTracker);
    // Test for simple inputs
    List<Input> requestList;
    long startTime = 0l;
    long readyTime = 0l;
    requestList = new ArrayList<Input>();
    requestList.add(input1);
    requestList.add(input2);
    requestList.add(input3);
    startTime = System.nanoTime();
    setDelayedInputReady(input2);
    Input readyInput = inputReadyTracker.waitForAnyInputReady(requestList);
    assertEquals(input2, readyInput);
    readyTime = System.nanoTime();
    // Should have moved into ready state - only happens when the setReady function is invoked.
    // Ensure the method returned only after the specific Input was told it is ready
    assertTrue(input2.isReady);
    assertTrue(readyTime >= startTime + SLEEP_TIME);
    assertFalse(input1.isReady);
    assertFalse(input3.isReady);
    requestList = new ArrayList<Input>();
    requestList.add(input1);
    requestList.add(input3);
    startTime = System.nanoTime();
    setDelayedInputReady(input1);
    readyInput = inputReadyTracker.waitForAnyInputReady(requestList);
    assertEquals(input1, readyInput);
    readyTime = System.nanoTime();
    // Should have moved into ready state - only happens when the setReady function is invoked.
    // Ensure the method returned only after the specific Input was told it is ready
    assertTrue(input1.isReady);
    assertTrue(readyTime >= startTime + SLEEP_TIME);
    assertTrue(input2.isReady);
    assertFalse(input3.isReady);
    requestList = new ArrayList<Input>();
    requestList.add(input3);
    startTime = System.nanoTime();
    setDelayedInputReady(input3);
    readyInput = inputReadyTracker.waitForAnyInputReady(requestList, 0);
    assertNull(readyInput);
    readyInput = inputReadyTracker.waitForAnyInputReady(requestList, -1);
    assertEquals(input3, readyInput);
    readyTime = System.nanoTime();
    // Should have moved into ready state - only happens when the setReady function is invoked.
    // Ensure the method returned only after the specific Input was told it is ready
    assertTrue(input3.isReady);
    assertTrue(readyTime >= startTime + SLEEP_TIME);
    assertTrue(input1.isReady);
    assertTrue(input2.isReady);
}
Also used : Input(org.apache.tez.runtime.api.Input) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput) Test(org.junit.Test)

Example 9 with Input

use of org.apache.tez.runtime.api.Input in project tez by apache.

the class TestInputReadyTracker method testWithoutGrouping1.

@Test(timeout = 20000)
public void testWithoutGrouping1() throws InterruptedException {
    InputReadyTracker inputReadyTracker = new InputReadyTracker();
    ImmediatelyReadyInputForTest input1 = new ImmediatelyReadyInputForTest(inputReadyTracker);
    ControlledReadyInputForTest input2 = new ControlledReadyInputForTest(inputReadyTracker);
    // Test for simple inputs
    List<Input> requestList;
    long startTime = 0l;
    long readyTime = 0l;
    requestList = new ArrayList<Input>();
    requestList.add(input1);
    requestList.add(input2);
    Input readyInput = inputReadyTracker.waitForAnyInputReady(requestList);
    assertTrue(input1.isReady);
    assertFalse(input2.isReady);
    assertEquals(input1, readyInput);
    startTime = System.nanoTime();
    setDelayedInputReady(input2);
    assertFalse(inputReadyTracker.waitForAllInputsReady(requestList, 0));
    assertTrue(inputReadyTracker.waitForAllInputsReady(requestList, -1));
    readyTime = System.nanoTime();
    // Should have moved into ready state - only happens when the setReady function is invoked.
    // Ensure the method returned only after the specific Input was told it is ready
    assertTrue(input2.isReady);
    assertTrue(readyTime >= startTime + SLEEP_TIME);
    assertTrue(input1.isReady);
}
Also used : Input(org.apache.tez.runtime.api.Input) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput) Test(org.junit.Test)

Example 10 with Input

use of org.apache.tez.runtime.api.Input in project tez by apache.

the class InputReadyTracker method setGroupedInputs.

public void setGroupedInputs(Collection<MergedLogicalInput> inputGroups) {
    lock.lock();
    try {
        if (inputGroups != null) {
            inputToGroupMap = Maps.newConcurrentMap();
            for (MergedLogicalInput mergedInput : inputGroups) {
                for (Input dest : mergedInput.getInputs()) {
                    // Check already ready Inputs - may have become ready during initialize
                    if (readyInputs.containsKey(dest)) {
                        mergedInput.setConstituentInputIsReady(dest);
                    }
                    List<MergedLogicalInput> mergedList = inputToGroupMap.get(dest);
                    if (mergedList == null) {
                        mergedList = Lists.newArrayList();
                        inputToGroupMap.put(dest, mergedList);
                    }
                    mergedList.add(mergedInput);
                }
            }
        }
    } finally {
        lock.unlock();
    }
}
Also used : Input(org.apache.tez.runtime.api.Input) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput)

Aggregations

Input (org.apache.tez.runtime.api.Input)20 Test (org.junit.Test)12 LinkedList (java.util.LinkedList)10 KeyValuesReader (org.apache.tez.runtime.library.api.KeyValuesReader)9 LogicalInput (org.apache.tez.runtime.api.LogicalInput)7 MergedLogicalInput (org.apache.tez.runtime.api.MergedLogicalInput)6 MergedInputContext (org.apache.tez.runtime.api.MergedInputContext)5 ArrayList (java.util.ArrayList)3 AbstractLogicalInput (org.apache.tez.runtime.api.AbstractLogicalInput)3 IOException (java.io.IOException)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)2 TezMergedInputContextImpl (org.apache.tez.runtime.api.impl.TezMergedInputContextImpl)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)1 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1