Search in sources :

Example 41 with TaskConfig

use of org.apache.flink.runtime.operators.util.TaskConfig in project flink by apache.

the class AbstractCachedBuildSideJoinDriver method initialize.

@Override
public void initialize() throws Exception {
    TaskConfig config = this.taskContext.getTaskConfig();
    final Counter numRecordsIn = taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsInCounter();
    TypeSerializer<IT1> serializer1 = this.taskContext.<IT1>getInputSerializer(0).getSerializer();
    TypeSerializer<IT2> serializer2 = this.taskContext.<IT2>getInputSerializer(1).getSerializer();
    TypeComparator<IT1> comparator1 = this.taskContext.getDriverComparator(0);
    TypeComparator<IT2> comparator2 = this.taskContext.getDriverComparator(1);
    MutableObjectIterator<IT1> input1 = new CountingMutableObjectIterator<>(this.taskContext.<IT1>getInput(0), numRecordsIn);
    MutableObjectIterator<IT2> input2 = new CountingMutableObjectIterator<>(this.taskContext.<IT2>getInput(1), numRecordsIn);
    TypePairComparatorFactory<IT1, IT2> pairComparatorFactory = this.taskContext.getTaskConfig().getPairComparatorFactory(this.taskContext.getUserCodeClassLoader());
    double availableMemory = config.getRelativeMemoryDriver();
    boolean hashJoinUseBitMaps = taskContext.getTaskManagerInfo().getConfiguration().getBoolean(ConfigConstants.RUNTIME_HASH_JOIN_BLOOM_FILTERS_KEY, ConfigConstants.DEFAULT_RUNTIME_HASH_JOIN_BLOOM_FILTERS);
    ExecutionConfig executionConfig = taskContext.getExecutionConfig();
    objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    if (objectReuseEnabled) {
        if (buildSideIndex == 0 && probeSideIndex == 1) {
            matchIterator = new ReusingBuildFirstReOpenableHashJoinIterator<IT1, IT2, OT>(input1, input2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator21(comparator1, comparator2), this.taskContext.getMemoryManager(), this.taskContext.getIOManager(), this.taskContext.getContainingTask(), availableMemory, false, false, hashJoinUseBitMaps);
        } else if (buildSideIndex == 1 && probeSideIndex == 0) {
            matchIterator = new ReusingBuildSecondReOpenableHashJoinIterator<IT1, IT2, OT>(input1, input2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator12(comparator1, comparator2), this.taskContext.getMemoryManager(), this.taskContext.getIOManager(), this.taskContext.getContainingTask(), availableMemory, false, false, hashJoinUseBitMaps);
        } else {
            throw new Exception("Error: Inconsistent setup for repeatable hash join driver.");
        }
    } else {
        if (buildSideIndex == 0 && probeSideIndex == 1) {
            matchIterator = new NonReusingBuildFirstReOpenableHashJoinIterator<IT1, IT2, OT>(input1, input2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator21(comparator1, comparator2), this.taskContext.getMemoryManager(), this.taskContext.getIOManager(), this.taskContext.getContainingTask(), availableMemory, false, false, hashJoinUseBitMaps);
        } else if (buildSideIndex == 1 && probeSideIndex == 0) {
            matchIterator = new NonReusingBuildSecondReOpenableHashJoinIterator<IT1, IT2, OT>(input1, input2, serializer1, comparator1, serializer2, comparator2, pairComparatorFactory.createComparator12(comparator1, comparator2), this.taskContext.getMemoryManager(), this.taskContext.getIOManager(), this.taskContext.getContainingTask(), availableMemory, false, false, hashJoinUseBitMaps);
        } else {
            throw new Exception("Error: Inconsistent setup for repeatable hash join driver.");
        }
    }
    this.matchIterator.open();
}
Also used : TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NonReusingBuildSecondReOpenableHashJoinIterator(org.apache.flink.runtime.operators.hash.NonReusingBuildSecondReOpenableHashJoinIterator) ReusingBuildSecondReOpenableHashJoinIterator(org.apache.flink.runtime.operators.hash.ReusingBuildSecondReOpenableHashJoinIterator) Counter(org.apache.flink.metrics.Counter) CountingMutableObjectIterator(org.apache.flink.runtime.operators.util.metrics.CountingMutableObjectIterator) NonReusingBuildSecondReOpenableHashJoinIterator(org.apache.flink.runtime.operators.hash.NonReusingBuildSecondReOpenableHashJoinIterator)

Example 42 with TaskConfig

use of org.apache.flink.runtime.operators.util.TaskConfig in project flink by apache.

the class AllGroupReduceDriver method prepare.

// --------------------------------------------------------------------------------------------
@Override
public void prepare() throws Exception {
    final TaskConfig config = this.taskContext.getTaskConfig();
    this.strategy = config.getDriverStrategy();
    switch(this.strategy) {
        case ALL_GROUP_REDUCE_COMBINE:
            if (!(this.taskContext.getStub() instanceof GroupCombineFunction)) {
                throw new Exception("Using combiner on a UDF that does not implement the combiner interface " + GroupCombineFunction.class.getName());
            }
        case ALL_GROUP_REDUCE:
        case ALL_GROUP_COMBINE:
            break;
        default:
            throw new Exception("Unrecognized driver strategy for AllGroupReduce driver: " + this.strategy.name());
    }
    this.serializer = this.taskContext.<IT>getInputSerializer(0).getSerializer();
    this.input = this.taskContext.getInput(0);
    ExecutionConfig executionConfig = taskContext.getExecutionConfig();
    this.objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    if (LOG.isDebugEnabled()) {
        LOG.debug("AllGroupReduceDriver object reuse: " + (this.objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
    }
}
Also used : GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig)

Example 43 with TaskConfig

use of org.apache.flink.runtime.operators.util.TaskConfig in project flink by apache.

the class BatchTask method invoke.

// --------------------------------------------------------------------------------------------
//                                  Task Interface
// --------------------------------------------------------------------------------------------
/**
	 * The main work method.
	 */
@Override
public void invoke() throws Exception {
    // --------------------------------------------------------------------
    if (LOG.isDebugEnabled()) {
        LOG.debug(formatLogString("Start registering input and output."));
    }
    // obtain task configuration (including stub parameters)
    Configuration taskConf = getTaskConfiguration();
    this.config = new TaskConfig(taskConf);
    // now get the operator class which drives the operation
    final Class<? extends Driver<S, OT>> driverClass = this.config.getDriver();
    this.driver = InstantiationUtil.instantiate(driverClass, Driver.class);
    String headName = getEnvironment().getTaskInfo().getTaskName().split("->")[0].trim();
    this.metrics = getEnvironment().getMetricGroup().addOperator(headName.startsWith("CHAIN") ? headName.substring(6) : headName);
    this.metrics.getIOMetricGroup().reuseInputMetricsForTask();
    if (config.getNumberOfChainedStubs() == 0) {
        this.metrics.getIOMetricGroup().reuseOutputMetricsForTask();
    }
    // initialize the readers.
    // this does not yet trigger any stream consuming or processing.
    initInputReaders();
    initBroadcastInputReaders();
    // initialize the writers.
    initOutputs();
    if (LOG.isDebugEnabled()) {
        LOG.debug(formatLogString("Finished registering input and output."));
    }
    // --------------------------------------------------------------------
    if (LOG.isDebugEnabled()) {
        LOG.debug(formatLogString("Start task code."));
    }
    this.runtimeUdfContext = createRuntimeContext(metrics);
    // this is especially important, since there may be asynchronous closes (such as through canceling).
    try {
        // the local processing includes building the dams / caches
        try {
            int numInputs = driver.getNumberOfInputs();
            int numComparators = driver.getNumberOfDriverComparators();
            int numBroadcastInputs = this.config.getNumBroadcastInputs();
            initInputsSerializersAndComparators(numInputs, numComparators);
            initBroadcastInputsSerializers(numBroadcastInputs);
            // set the iterative status for inputs and broadcast inputs
            {
                List<Integer> iterativeInputs = new ArrayList<Integer>();
                for (int i = 0; i < numInputs; i++) {
                    final int numberOfEventsUntilInterrupt = getTaskConfig().getNumberOfEventsUntilInterruptInIterativeGate(i);
                    if (numberOfEventsUntilInterrupt < 0) {
                        throw new IllegalArgumentException();
                    } else if (numberOfEventsUntilInterrupt > 0) {
                        this.inputReaders[i].setIterativeReader();
                        iterativeInputs.add(i);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(formatLogString("Input [" + i + "] reads in supersteps with [" + +numberOfEventsUntilInterrupt + "] event(s) till next superstep."));
                        }
                    }
                }
                this.iterativeInputs = asArray(iterativeInputs);
            }
            {
                List<Integer> iterativeBcInputs = new ArrayList<Integer>();
                for (int i = 0; i < numBroadcastInputs; i++) {
                    final int numberOfEventsUntilInterrupt = getTaskConfig().getNumberOfEventsUntilInterruptInIterativeBroadcastGate(i);
                    if (numberOfEventsUntilInterrupt < 0) {
                        throw new IllegalArgumentException();
                    } else if (numberOfEventsUntilInterrupt > 0) {
                        this.broadcastInputReaders[i].setIterativeReader();
                        iterativeBcInputs.add(i);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(formatLogString("Broadcast input [" + i + "] reads in supersteps with [" + +numberOfEventsUntilInterrupt + "] event(s) till next superstep."));
                        }
                    }
                }
                this.iterativeBroadcastInputs = asArray(iterativeBcInputs);
            }
            initLocalStrategies(numInputs);
        } catch (Exception e) {
            throw new RuntimeException("Initializing the input processing failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
        }
        if (!this.running) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(formatLogString("Task cancelled before task code was started."));
            }
            return;
        }
        // pre main-function initialization
        initialize();
        // read the broadcast variables. they will be released in the finally clause 
        for (int i = 0; i < this.config.getNumBroadcastInputs(); i++) {
            final String name = this.config.getBroadcastInputName(i);
            readAndSetBroadcastInput(i, name, this.runtimeUdfContext, 1);
        }
        // the work goes here
        run();
    } finally {
        // clean up in any case!
        closeLocalStrategiesAndCaches();
        clearReaders(inputReaders);
        clearWriters(eventualOutputs);
    }
    if (this.running) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(formatLogString("Finished task code."));
        }
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug(formatLogString("Task code cancelled."));
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) ChainedDriver(org.apache.flink.runtime.operators.chaining.ChainedDriver) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) IOException(java.io.IOException) List(java.util.List) ArrayList(java.util.ArrayList)

Example 44 with TaskConfig

use of org.apache.flink.runtime.operators.util.TaskConfig in project flink by apache.

the class CoGroupDriver method prepare.

@Override
public void prepare() throws Exception {
    final TaskConfig config = this.taskContext.getTaskConfig();
    if (config.getDriverStrategy() != DriverStrategy.CO_GROUP) {
        throw new Exception("Unrecognized driver strategy for CoGoup driver: " + config.getDriverStrategy().name());
    }
    final Counter numRecordsIn = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsInCounter();
    final MutableObjectIterator<IT1> in1 = new CountingMutableObjectIterator<>(this.taskContext.<IT1>getInput(0), numRecordsIn);
    final MutableObjectIterator<IT2> in2 = new CountingMutableObjectIterator<>(this.taskContext.<IT2>getInput(1), numRecordsIn);
    // get the key positions and types
    final TypeSerializer<IT1> serializer1 = this.taskContext.<IT1>getInputSerializer(0).getSerializer();
    final TypeSerializer<IT2> serializer2 = this.taskContext.<IT2>getInputSerializer(1).getSerializer();
    final TypeComparator<IT1> groupComparator1 = this.taskContext.getDriverComparator(0);
    final TypeComparator<IT2> groupComparator2 = this.taskContext.getDriverComparator(1);
    final TypePairComparatorFactory<IT1, IT2> pairComparatorFactory = config.getPairComparatorFactory(this.taskContext.getUserCodeClassLoader());
    if (pairComparatorFactory == null) {
        throw new Exception("Missing pair comparator factory for CoGroup driver");
    }
    ExecutionConfig executionConfig = taskContext.getExecutionConfig();
    this.objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    if (LOG.isDebugEnabled()) {
        LOG.debug("CoGroupDriver object reuse: " + (this.objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
    }
    if (objectReuseEnabled) {
        // create CoGropuTaskIterator according to provided local strategy.
        this.coGroupIterator = new ReusingSortMergeCoGroupIterator<IT1, IT2>(in1, in2, serializer1, groupComparator1, serializer2, groupComparator2, pairComparatorFactory.createComparator12(groupComparator1, groupComparator2));
    } else {
        // create CoGropuTaskIterator according to provided local strategy.
        this.coGroupIterator = new NonReusingSortMergeCoGroupIterator<IT1, IT2>(in1, in2, serializer1, groupComparator1, serializer2, groupComparator2, pairComparatorFactory.createComparator12(groupComparator1, groupComparator2));
    }
    // open CoGroupTaskIterator - this triggers the sorting and blocks until the iterator is ready
    this.coGroupIterator.open();
    if (LOG.isDebugEnabled()) {
        LOG.debug(this.taskContext.formatLogString("CoGroup task iterator ready."));
    }
}
Also used : TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Counter(org.apache.flink.metrics.Counter) CountingMutableObjectIterator(org.apache.flink.runtime.operators.util.metrics.CountingMutableObjectIterator)

Example 45 with TaskConfig

use of org.apache.flink.runtime.operators.util.TaskConfig in project flink by apache.

the class CoGroupWithSolutionSetFirstDriver method initialize.

// --------------------------------------------------------------------------------------------
@Override
@SuppressWarnings("unchecked")
public void initialize() {
    final TypeComparator<IT1> solutionSetComparator;
    // grab a handle to the hash table from the iteration broker
    if (taskContext instanceof AbstractIterativeTask) {
        AbstractIterativeTask<?, ?> iterativeTaskContext = (AbstractIterativeTask<?, ?>) taskContext;
        String identifier = iterativeTaskContext.brokerKey();
        Object table = SolutionSetBroker.instance().get(identifier);
        if (table instanceof CompactingHashTable) {
            this.hashTable = (CompactingHashTable<IT1>) table;
            solutionSetSerializer = this.hashTable.getBuildSideSerializer();
            solutionSetComparator = this.hashTable.getBuildSideComparator().duplicate();
        } else if (table instanceof JoinHashMap) {
            this.objectMap = (JoinHashMap<IT1>) table;
            solutionSetSerializer = this.objectMap.getBuildSerializer();
            solutionSetComparator = this.objectMap.getBuildComparator().duplicate();
        } else {
            throw new RuntimeException("Unrecognized solution set index: " + table);
        }
    } else {
        throw new RuntimeException("The task context of this driver is no iterative task context.");
    }
    TaskConfig config = taskContext.getTaskConfig();
    ClassLoader classLoader = taskContext.getUserCodeClassLoader();
    TypeComparatorFactory<IT2> probeSideComparatorFactory = config.getDriverComparator(0, classLoader);
    this.probeSideSerializer = taskContext.<IT2>getInputSerializer(0).getSerializer();
    this.probeSideComparator = probeSideComparatorFactory.createComparator();
    ExecutionConfig executionConfig = taskContext.getExecutionConfig();
    objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    if (objectReuseEnabled) {
        solutionSideRecord = solutionSetSerializer.createInstance();
    }
    TypePairComparatorFactory<IT1, IT2> factory = taskContext.getTaskConfig().getPairComparatorFactory(taskContext.getUserCodeClassLoader());
    pairComparator = factory.createComparator21(solutionSetComparator, this.probeSideComparator);
}
Also used : AbstractIterativeTask(org.apache.flink.runtime.iterative.task.AbstractIterativeTask) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CompactingHashTable(org.apache.flink.runtime.operators.hash.CompactingHashTable) JoinHashMap(org.apache.flink.api.common.operators.util.JoinHashMap)

Aggregations

TaskConfig (org.apache.flink.runtime.operators.util.TaskConfig)48 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)13 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)12 IOException (java.io.IOException)9 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)9 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)9 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)9 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)9 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)9 PlanNode (org.apache.flink.optimizer.plan.PlanNode)9 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)9 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)9 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)9 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)9 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)9 Configuration (org.apache.flink.configuration.Configuration)8 CompilerException (org.apache.flink.optimizer.CompilerException)8 Channel (org.apache.flink.optimizer.plan.Channel)6 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)6