use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.
the class InputFormatSourceFunction method run.
@Override
public void run(SourceContext<OUT> ctx) throws Exception {
try {
Counter completedSplitsCounter = getRuntimeContext().getMetricGroup().counter("numSplitsProcessed");
if (isRunning && format instanceof RichInputFormat) {
((RichInputFormat) format).openInputFormat();
}
OUT nextElement = serializer.createInstance();
while (isRunning) {
format.open(splitIterator.next());
while (isRunning && !format.reachedEnd()) {
nextElement = format.nextRecord(nextElement);
if (nextElement != null) {
ctx.collect(nextElement);
} else {
break;
}
}
format.close();
completedSplitsCounter.inc();
if (isRunning) {
isRunning = splitIterator.hasNext();
}
}
} finally {
format.close();
if (format instanceof RichInputFormat) {
((RichInputFormat) format).closeInputFormat();
}
isRunning = false;
}
}
use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.
the class GenericDataSourceBase method executeOnCollections.
// --------------------------------------------------------------------------------------------
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
@SuppressWarnings("unchecked") InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
//configure the input format
inputFormat.configure(this.parameters);
//open the input format
if (inputFormat instanceof RichInputFormat) {
((RichInputFormat) inputFormat).setRuntimeContext(ctx);
((RichInputFormat) inputFormat).openInputFormat();
}
List<OUT> result = new ArrayList<OUT>();
// splits
InputSplit[] splits = inputFormat.createInputSplits(1);
TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
for (InputSplit split : splits) {
inputFormat.open(split);
while (!inputFormat.reachedEnd()) {
OUT next = inputFormat.nextRecord(serializer.createInstance());
if (next != null) {
result.add(serializer.copy(next));
}
}
inputFormat.close();
}
//close the input format
if (inputFormat instanceof RichInputFormat) {
((RichInputFormat) inputFormat).closeInputFormat();
}
return result;
}
use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.
the class InputFormatSourceFunction method open.
@Override
@SuppressWarnings("unchecked")
public void open(Configuration parameters) throws Exception {
StreamingRuntimeContext context = (StreamingRuntimeContext) getRuntimeContext();
if (format instanceof RichInputFormat) {
((RichInputFormat) format).setRuntimeContext(context);
}
format.configure(parameters);
provider = context.getInputSplitProvider();
serializer = typeInfo.createSerializer(getRuntimeContext().getExecutionConfig());
splitIterator = getInputSplits();
isRunning = splitIterator.hasNext();
}
use of org.apache.flink.api.common.io.RichInputFormat in project flink by apache.
the class DataSourceTask method invoke.
@Override
public void invoke() throws Exception {
// --------------------------------------------------------------------
// Initialize
// --------------------------------------------------------------------
initInputFormat();
LOG.debug(getLogString("Start registering input and output"));
try {
initOutputs(getUserCodeClassLoader());
} catch (Exception ex) {
throw new RuntimeException("The initialization of the DataSource's outputs caused an error: " + ex.getMessage(), ex);
}
LOG.debug(getLogString("Finished registering input and output"));
// --------------------------------------------------------------------
// Invoke
// --------------------------------------------------------------------
LOG.debug(getLogString("Starting data source operator"));
RuntimeContext ctx = createRuntimeContext();
Counter completedSplitsCounter = ctx.getMetricGroup().counter("numSplitsProcessed");
((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseInputMetricsForTask();
Counter numRecordsOut = ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().getNumRecordsOutCounter();
if (this.config.getNumberOfChainedStubs() == 0) {
((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseOutputMetricsForTask();
}
if (RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
((RichInputFormat) this.format).setRuntimeContext(ctx);
LOG.debug(getLogString("Rich Source detected. Initializing runtime context."));
((RichInputFormat) this.format).openInputFormat();
LOG.debug(getLogString("Rich Source detected. Opening the InputFormat."));
}
ExecutionConfig executionConfig = getExecutionConfig();
boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
LOG.debug("DataSourceTask object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + ".");
final TypeSerializer<OT> serializer = this.serializerFactory.getSerializer();
try {
// start all chained tasks
BatchTask.openChainedTasks(this.chainedTasks, this);
// get input splits to read
final Iterator<InputSplit> splitIterator = getInputSplits();
// for each assigned input split
while (!this.taskCanceled && splitIterator.hasNext()) {
// get start and end
final InputSplit split = splitIterator.next();
LOG.debug(getLogString("Opening input split " + split.toString()));
final InputFormat<OT, InputSplit> format = this.format;
// open input format
format.open(split);
LOG.debug(getLogString("Starting to read input from split " + split.toString()));
try {
final Collector<OT> output = new CountingCollector<>(this.output, numRecordsOut);
if (objectReuseEnabled) {
OT reuse = serializer.createInstance();
// as long as there is data to read
while (!this.taskCanceled && !format.reachedEnd()) {
OT returned;
if ((returned = format.nextRecord(reuse)) != null) {
output.collect(returned);
}
}
} else {
// as long as there is data to read
while (!this.taskCanceled && !format.reachedEnd()) {
OT returned;
if ((returned = format.nextRecord(serializer.createInstance())) != null) {
output.collect(returned);
}
}
}
if (LOG.isDebugEnabled() && !this.taskCanceled) {
LOG.debug(getLogString("Closing input split " + split.toString()));
}
} finally {
// close. We close here such that a regular close throwing an exception marks a task as failed.
format.close();
}
completedSplitsCounter.inc();
}
// end for all input splits
// close the collector. if it is a chaining task collector, it will close its chained tasks
this.output.close();
// close all chained tasks letting them report failure
BatchTask.closeChainedTasks(this.chainedTasks, this);
} catch (Exception ex) {
// close the input, but do not report any exceptions, since we already have another root cause
try {
this.format.close();
} catch (Throwable ignored) {
}
BatchTask.cancelChainedTasks(this.chainedTasks);
ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
if (ex instanceof CancelTaskException) {
// forward canceling exception
throw ex;
} else if (!this.taskCanceled) {
// drop exception, if the task was canceled
BatchTask.logAndThrowException(ex, this);
}
} finally {
BatchTask.clearWriters(eventualOutputs);
// --------------------------------------------------------------------
if (this.format != null && RichInputFormat.class.isAssignableFrom(this.format.getClass())) {
((RichInputFormat) this.format).closeInputFormat();
LOG.debug(getLogString("Rich Source detected. Closing the InputFormat."));
}
}
if (!this.taskCanceled) {
LOG.debug(getLogString("Finished data source operator"));
} else {
LOG.debug(getLogString("Data source operator cancelled"));
}
}
Aggregations