Search in sources :

Example 11 with RuntimeContext

use of org.apache.flink.api.common.functions.RuntimeContext in project flink by apache.

the class DataSinkTask method invoke.

@Override
public void invoke() throws Exception {
    // --------------------------------------------------------------------
    // Initialize
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Start registering input and output"));
    // initialize OutputFormat
    initOutputFormat();
    // initialize input readers
    try {
        initInputReaders();
    } catch (Exception e) {
        throw new RuntimeException("Initializing the input streams failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
    }
    LOG.debug(getLogString("Finished registering input and output"));
    // --------------------------------------------------------------------
    // Invoke
    // --------------------------------------------------------------------
    LOG.debug(getLogString("Starting data sink operator"));
    RuntimeContext ctx = createRuntimeContext();
    final Counter numRecordsIn = ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter();
    ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseInputMetricsForTask();
    ((OperatorMetricGroup) ctx.getMetricGroup()).getIOMetricGroup().reuseOutputMetricsForTask();
    if (RichOutputFormat.class.isAssignableFrom(this.format.getClass())) {
        ((RichOutputFormat) this.format).setRuntimeContext(ctx);
        LOG.debug(getLogString("Rich Sink detected. Initializing runtime context."));
    }
    ExecutionConfig executionConfig = getExecutionConfig();
    boolean objectReuseEnabled = executionConfig.isObjectReuseEnabled();
    try {
        // initialize local strategies
        MutableObjectIterator<IT> input1;
        switch(this.config.getInputLocalStrategy(0)) {
            case NONE:
                // nothing to do
                localStrategy = null;
                input1 = reader;
                break;
            case SORT:
                // initialize sort local strategy
                try {
                    // get type comparator
                    TypeComparatorFactory<IT> compFact = this.config.getInputComparator(0, getUserCodeClassLoader());
                    if (compFact == null) {
                        throw new Exception("Missing comparator factory for local strategy on input " + 0);
                    }
                    // initialize sorter
                    UnilateralSortMerger<IT> sorter = new UnilateralSortMerger<IT>(getEnvironment().getMemoryManager(), getEnvironment().getIOManager(), this.reader, this, this.inputTypeSerializerFactory, compFact.createComparator(), this.config.getRelativeMemoryInput(0), this.config.getFilehandlesInput(0), this.config.getSpillingThresholdInput(0), this.config.getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
                    this.localStrategy = sorter;
                    input1 = sorter.getIterator();
                } catch (Exception e) {
                    throw new RuntimeException("Initializing the input processing failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
                }
                break;
            default:
                throw new RuntimeException("Invalid local strategy for DataSinkTask");
        }
        // read the reader and write it to the output
        final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer();
        final MutableObjectIterator<IT> input = input1;
        final OutputFormat<IT> format = this.format;
        // check if task has been canceled
        if (this.taskCanceled) {
            return;
        }
        LOG.debug(getLogString("Starting to produce output"));
        // open
        format.open(this.getEnvironment().getTaskInfo().getIndexOfThisSubtask(), this.getEnvironment().getTaskInfo().getNumberOfParallelSubtasks());
        if (objectReuseEnabled) {
            IT record = serializer.createInstance();
            // work!
            while (!this.taskCanceled && ((record = input.next(record)) != null)) {
                numRecordsIn.inc();
                format.writeRecord(record);
            }
        } else {
            IT record;
            // work!
            while (!this.taskCanceled && ((record = input.next()) != null)) {
                numRecordsIn.inc();
                format.writeRecord(record);
            }
        }
        // close. We close here such that a regular close throwing an exception marks a task as failed.
        if (!this.taskCanceled) {
            this.format.close();
            this.format = null;
        }
    } catch (Exception ex) {
        // make a best effort to clean up
        try {
            if (!cleanupCalled && format instanceof CleanupWhenUnsuccessful) {
                cleanupCalled = true;
                ((CleanupWhenUnsuccessful) format).tryCleanupOnError();
            }
        } catch (Throwable t) {
            LOG.error("Cleanup on error failed.", t);
        }
        ex = ExceptionInChainedStubException.exceptionUnwrap(ex);
        if (ex instanceof CancelTaskException) {
            // forward canceling exception
            throw ex;
        } else // drop, if the task was canceled
        if (!this.taskCanceled) {
            if (LOG.isErrorEnabled()) {
                LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex);
            }
            throw ex;
        }
    } finally {
        if (this.format != null) {
            // This should only be the case if we had a previous error, or were canceled.
            try {
                this.format.close();
            } catch (Throwable t) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn(getLogString("Error closing the output format"), t);
                }
            }
        }
        // close local strategy if necessary
        if (localStrategy != null) {
            try {
                this.localStrategy.close();
            } catch (Throwable t) {
                LOG.error("Error closing local strategy", t);
            }
        }
        BatchTask.clearReaders(new MutableReader<?>[] { inputReader });
    }
    if (!this.taskCanceled) {
        LOG.debug(getLogString("Finished data sink operator"));
    } else {
        LOG.debug(getLogString("Data sink operator cancelled"));
    }
}
Also used : ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) Counter(org.apache.flink.metrics.Counter) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) UnilateralSortMerger(org.apache.flink.runtime.operators.sort.UnilateralSortMerger) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) RichOutputFormat(org.apache.flink.api.common.io.RichOutputFormat) CleanupWhenUnsuccessful(org.apache.flink.api.common.io.CleanupWhenUnsuccessful)

Example 12 with RuntimeContext

use of org.apache.flink.api.common.functions.RuntimeContext in project flink by apache.

the class ElasticsearchSinkExample method main.

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> source = env.generateSequence(0, 20).map(new MapFunction<Long, String>() {

        @Override
        public String map(Long value) throws Exception {
            return "message #" + value;
        }
    });
    Map<String, String> userConfig = new HashMap<>();
    userConfig.put("cluster.name", "elasticsearch");
    // This instructs the sink to emit after every element, otherwise they would be buffered
    userConfig.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
    List<TransportAddress> transports = new ArrayList<>();
    transports.add(new InetSocketTransportAddress(InetAddress.getByName("127.0.0.1"), 9300));
    source.addSink(new ElasticsearchSink<>(userConfig, transports, new ElasticsearchSinkFunction<String>() {

        @Override
        public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
            indexer.add(createIndexRequest(element));
        }
    }));
    env.execute("Elasticsearch Sink Example");
}
Also used : HashMap(java.util.HashMap) InetSocketTransportAddress(org.elasticsearch.common.transport.InetSocketTransportAddress) TransportAddress(org.elasticsearch.common.transport.TransportAddress) ArrayList(java.util.ArrayList) RequestIndexer(org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer) InetSocketTransportAddress(org.elasticsearch.common.transport.InetSocketTransportAddress) ElasticsearchSinkFunction(org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext)

Example 13 with RuntimeContext

use of org.apache.flink.api.common.functions.RuntimeContext in project flink by apache.

the class ElasticsearchSinkExample method main.

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> source = env.generateSequence(0, 20).map(new MapFunction<Long, String>() {

        @Override
        public String map(Long value) throws Exception {
            return "message #" + value;
        }
    });
    Map<String, String> userConfig = new HashMap<>();
    userConfig.put("cluster.name", "elasticsearch");
    // This instructs the sink to emit after every element, otherwise they would be buffered
    userConfig.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
    List<InetSocketAddress> transports = new ArrayList<>();
    transports.add(new InetSocketAddress(InetAddress.getByName("127.0.0.1"), 9300));
    source.addSink(new ElasticsearchSink<>(userConfig, transports, new ElasticsearchSinkFunction<String>() {

        @Override
        public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
            indexer.add(createIndexRequest(element));
        }
    }));
    env.execute("Elasticsearch Sink Example");
}
Also used : HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) RequestIndexer(org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer) ElasticsearchSinkFunction(org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext)

Example 14 with RuntimeContext

use of org.apache.flink.api.common.functions.RuntimeContext in project flink by apache.

the class FlinkKafkaProducerBase method open.

// ----------------------------------- Utilities --------------------------
/**
	 * Initializes the connection to Kafka.
	 */
@Override
public void open(Configuration configuration) {
    producer = getKafkaProducer(this.producerConfig);
    RuntimeContext ctx = getRuntimeContext();
    if (partitioner != null) {
        // the fetched list is immutable, so we're creating a mutable copy in order to sort it
        List<PartitionInfo> partitionsList = new ArrayList<>(producer.partitionsFor(defaultTopicId));
        // sort the partitions by partition id to make sure the fetched partition list is the same across subtasks
        Collections.sort(partitionsList, new Comparator<PartitionInfo>() {

            @Override
            public int compare(PartitionInfo o1, PartitionInfo o2) {
                return Integer.compare(o1.partition(), o2.partition());
            }
        });
        partitions = new int[partitionsList.size()];
        for (int i = 0; i < partitions.length; i++) {
            partitions[i] = partitionsList.get(i).partition();
        }
        partitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks(), partitions);
    }
    LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
    // register Kafka metrics to Flink accumulators
    if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
        Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
        if (metrics == null) {
            // MapR's Kafka implementation returns null here.
            LOG.info("Producer implementation does not support metrics");
        } else {
            final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
            for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
                kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
            }
        }
    }
    if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
        LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
        flushOnCheckpoint = false;
    }
    if (logFailuresOnly) {
        callback = new Callback() {

            @Override
            public void onCompletion(RecordMetadata metadata, Exception e) {
                if (e != null) {
                    LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
                }
                acknowledgeMessage();
            }
        };
    } else {
        callback = new Callback() {

            @Override
            public void onCompletion(RecordMetadata metadata, Exception exception) {
                if (exception != null && asyncException == null) {
                    asyncException = exception;
                }
                acknowledgeMessage();
            }
        };
    }
}
Also used : StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) ArrayList(java.util.ArrayList) MetricGroup(org.apache.flink.metrics.MetricGroup) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) MetricName(org.apache.kafka.common.MetricName) Callback(org.apache.kafka.clients.producer.Callback) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) PartitionInfo(org.apache.kafka.common.PartitionInfo) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Map(java.util.Map)

Example 15 with RuntimeContext

use of org.apache.flink.api.common.functions.RuntimeContext in project flink by apache.

the class RMQSource method open.

@Override
public void open(Configuration config) throws Exception {
    super.open(config);
    ConnectionFactory factory = setupConnectionFactory();
    try {
        connection = factory.newConnection();
        channel = connection.createChannel();
        if (channel == null) {
            throw new RuntimeException("None of RabbitMQ channels are available");
        }
        setupQueue();
        consumer = new QueueingConsumer(channel);
        RuntimeContext runtimeContext = getRuntimeContext();
        if (runtimeContext instanceof StreamingRuntimeContext && ((StreamingRuntimeContext) runtimeContext).isCheckpointingEnabled()) {
            autoAck = false;
            // enables transaction mode
            channel.txSelect();
        } else {
            autoAck = true;
        }
        LOG.debug("Starting RabbitMQ source with autoAck status: " + autoAck);
        channel.basicConsume(queueName, autoAck, consumer);
    } catch (IOException e) {
        throw new RuntimeException("Cannot create RMQ connection with " + queueName + " at " + rmqConnectionConfig.getHost(), e);
    }
    running = true;
}
Also used : ConnectionFactory(com.rabbitmq.client.ConnectionFactory) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) QueueingConsumer(com.rabbitmq.client.QueueingConsumer) IOException(java.io.IOException) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext)

Aggregations

RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)30 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)19 Test (org.junit.Test)17 Configuration (org.apache.flink.configuration.Configuration)16 Collector (org.apache.flink.util.Collector)14 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)10 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)10 HashSet (java.util.HashSet)5 TaskInfo (org.apache.flink.api.common.TaskInfo)5 RuntimeUDFContext (org.apache.flink.api.common.functions.util.RuntimeUDFContext)5 UnregisteredMetricsGroup (org.apache.flink.metrics.groups.UnregisteredMetricsGroup)5 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 Accumulator (org.apache.flink.api.common.accumulators.Accumulator)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 Map (java.util.Map)3 Set (java.util.Set)3 Path (org.apache.flink.core.fs.Path)3 StreamingRuntimeContext (org.apache.flink.streaming.api.operators.StreamingRuntimeContext)3 InetSocketAddress (java.net.InetSocketAddress)2