use of org.apache.flink.runtime.operators.sort.UnilateralSortMerger in project flink by apache.
the class BatchTask method initInputLocalStrategy.
private void initInputLocalStrategy(int inputNum) throws Exception {
// check if there is already a strategy
if (this.localStrategies[inputNum] != null) {
throw new IllegalStateException();
}
// now set up the local strategy
final LocalStrategy localStrategy = this.config.getInputLocalStrategy(inputNum);
if (localStrategy != null) {
switch(localStrategy) {
case NONE:
// the input is as it is
this.inputs[inputNum] = this.inputIterators[inputNum];
break;
case SORT:
@SuppressWarnings({ "rawtypes", "unchecked" }) UnilateralSortMerger<?> sorter = new UnilateralSortMerger(getMemoryManager(), getIOManager(), this.inputIterators[inputNum], this, this.inputSerializers[inputNum], getLocalStrategyComparator(inputNum), this.config.getRelativeMemoryInput(inputNum), this.config.getFilehandlesInput(inputNum), this.config.getSpillingThresholdInput(inputNum), this.config.getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
// set the input to null such that it will be lazily fetched from the input strategy
this.inputs[inputNum] = null;
this.localStrategies[inputNum] = sorter;
break;
case COMBININGSORT:
// we should have nested configurations for the local strategies to solve that
if (inputNum != 0) {
throw new IllegalStateException("Performing combining sort outside a (group)reduce task!");
}
// instantiate ourselves a combiner. we should not use the stub, because the sort and the
// subsequent (group)reduce would otherwise share it multi-threaded
final Class<S> userCodeFunctionType = this.driver.getStubType();
if (userCodeFunctionType == null) {
throw new IllegalStateException("Performing combining sort outside a reduce task!");
}
final S localStub;
try {
localStub = initStub(userCodeFunctionType);
} catch (Exception e) {
throw new RuntimeException("Initializing the user code and the configuration failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
}
if (!(localStub instanceof GroupCombineFunction)) {
throw new IllegalStateException("Performing combining sort outside a reduce task!");
}
@SuppressWarnings({ "rawtypes", "unchecked" }) CombiningUnilateralSortMerger<?> cSorter = new CombiningUnilateralSortMerger((GroupCombineFunction) localStub, getMemoryManager(), getIOManager(), this.inputIterators[inputNum], this, this.inputSerializers[inputNum], getLocalStrategyComparator(inputNum), this.config.getRelativeMemoryInput(inputNum), this.config.getFilehandlesInput(inputNum), this.config.getSpillingThresholdInput(inputNum), this.getTaskConfig().getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
cSorter.setUdfConfiguration(this.config.getStubParameters());
// set the input to null such that it will be lazily fetched from the input strategy
this.inputs[inputNum] = null;
this.localStrategies[inputNum] = cSorter;
break;
default:
throw new Exception("Unrecognized local strategy provided: " + localStrategy.name());
}
} else {
// no local strategy in the config
this.inputs[inputNum] = this.inputIterators[inputNum];
}
}
use of org.apache.flink.runtime.operators.sort.UnilateralSortMerger in project flink by apache.
the class DataSinkTask method invoke.
@Override
public void invoke() throws Exception {
// --------------------------------------------------------------------
// Initialize
// --------------------------------------------------------------------
LOG.debug(getLogString("Start registering input and output"));
// initialize OutputFormat
initOutputFormat();
// initialize input readers
try {
initInputReaders();
} catch (Exception e) {
throw new RuntimeException("Initializing the input streams failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
}
LOG.debug(getLogString("Finished registering input and output"));
// --------------------------------------------------------------------
// Invoke
// --------------------------------------------------------------------
LOG.debug(getLogString("Starting data sink operator"));
RuntimeContext ctx = createRuntimeContext();
final Counter numRecordsIn = ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter();
((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseInputMetricsForTask();
((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseOutputMetricsForTask();
if (RichOutputFormat.class.isAssignableFrom(this.format.getClass())) {
((RichOutputFormat) this.format).setRuntimeContext(ctx);
LOG.debug(getLogString("Rich Sink detected. Initializing runtime context."));
}
ExecutionConfig executionConfig = getExecutionConfig();
boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
try {
// initialize local strategies
MutableObjectIterator<IT> input1;
switch(this.config.getInputLocalStrategy(0)) {
case NONE:
// nothing to do
localStrategy = null;
input1 = reader;
break;
case SORT:
// initialize sort local strategy
try {
// get type comparator
TypeComparatorFactory<IT> compFact = this.config.getInputComparator(0, getUserCodeClassLoader());
if (compFact == null) {
throw new Exception("Missing comparator factory for local strategy on input " + 0);
}
// initialize sorter
UnilateralSortMerger<IT> sorter = new UnilateralSortMerger<IT>(getEnvironment().getMemoryManager(), getEnvironment().getIOManager(), this.reader, this, this.inputTypeSerializerFactory, compFact.createComparator(), this.config.getRelativeMemoryInput(0), this.config.getFilehandlesInput(0), this.config.getSpillingThresholdInput(0), this.config.getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
this.localStrategy = sorter;
input1 = sorter.getIterator();
} catch (Exception e) {
throw new RuntimeException("Initializing the input processing failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
}
break;
default:
throw new RuntimeException("Invalid local strategy for DataSinkTask");
}
// read the reader and write it to the output
final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer();
final MutableObjectIterator<IT> input = input1;
final OutputFormat<IT> format = this.format;
// check if task has been canceled
if (this.taskCanceled) {
return;
}
LOG.debug(getLogString("Starting to produce output"));
// open
format.open(this.getEnvironment().getTaskInfo().getIndexOfThisSubtask(), this.getEnvironment().getTaskInfo().getNumberOfParallelSubtasks());
if (objectReuseEnabled) {
IT record = serializer.createInstance();
// work!
while (!this.taskCanceled && ((record = input.next(record)) != null)) {
numRecordsIn.inc();
format.writeRecord(record);
}
} else {
IT record;
// work!
while (!this.taskCanceled && ((record = input.next()) != null)) {
numRecordsIn.inc();
format.writeRecord(record);
}
}
// close. We close here such that a regular close throwing an exception marks a task as failed.
if (!this.taskCanceled) {
this.format.close();
this.format = null;
}
} catch (Exception ex) {
// make a best effort to clean up
try {
if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) {
cleanupCalled = true;
((CleanupWhenUnsuccessful) format).tryCleanupOnError();
}
} catch (Throwable t) {
LOG.error("Cleanup on error failed.", t);
}
ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
if (ex instanceof CancelTaskException) {
// forward canceling exception
throw ex;
} else // drop, if the task was canceled
if (!this.taskCanceled) {
if (LOG.isErrorEnabled()) {
LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex);
}
throw ex;
}
} finally {
if (this.format != null) {
// This should only be the case if we had a previous error, or were canceled.
try {
this.format.close();
} catch (Throwable t) {
if (LOG.isWarnEnabled()) {
LOG.warn(getLogString("Error closing the output format"), t);
}
}
}
// close local strategy if necessary
if (localStrategy != null) {
try {
this.localStrategy.close();
} catch (Throwable t) {
LOG.error("Error closing local strategy", t);
}
}
BatchTask.clearReaders(new MutableReader<?>[] { inputReader });
}
if (!this.taskCanceled) {
LOG.debug(getLogString("Finished data sink operator"));
} else {
LOG.debug(getLogString("Data sink operator cancelled"));
}
}
use of org.apache.flink.runtime.operators.sort.UnilateralSortMerger in project flink by apache.
the class HashVsSortMiniBenchmark method testSortBothMerge.
@Test
public void testSortBothMerge() {
try {
TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, INPUT_1_SIZE / 10, 100, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, INPUT_2_SIZE, 100, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
final FlatJoinFunction matcher = new NoOpMatcher();
final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();
long start = System.nanoTime();
final UnilateralSortMerger<Tuple2<Integer, String>> sorter1 = new UnilateralSortMerger<>(this.memoryManager, this.ioManager, input1, this.parentTask, this.serializer1, this.comparator1.duplicate(), (double) MEMORY_FOR_SORTER / MEMORY_SIZE, 128, 0.8f, true, /*use large record handler*/
true);
final UnilateralSortMerger<Tuple2<Integer, String>> sorter2 = new UnilateralSortMerger<>(this.memoryManager, this.ioManager, input2, this.parentTask, this.serializer2, this.comparator2.duplicate(), (double) MEMORY_FOR_SORTER / MEMORY_SIZE, 128, 0.8f, true, /*use large record handler*/
true);
final MutableObjectIterator<Tuple2<Integer, String>> sortedInput1 = sorter1.getIterator();
final MutableObjectIterator<Tuple2<Integer, String>> sortedInput2 = sorter2.getIterator();
// compare with iterator values
ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator = new ReusingMergeInnerJoinIterator<>(sortedInput1, sortedInput2, this.serializer1.getSerializer(), this.comparator1, this.serializer2.getSerializer(), this.comparator2, this.pairComparator11, this.memoryManager, this.ioManager, MEMORY_PAGES_FOR_MERGE, this.parentTask);
iterator.open();
while (iterator.callWithNextKey(matcher, collector)) ;
iterator.close();
sorter1.close();
sorter2.close();
long elapsed = System.nanoTime() - start;
double msecs = elapsed / (1000 * 1000);
System.out.println("Sort-Merge Took " + msecs + " msecs.");
} catch (Exception e) {
e.printStackTrace();
Assert.fail("An exception occurred during the test: " + e.getMessage());
}
}
Aggregations