Search in sources :

Example 1 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class StateChangeFormat method read.

@Override
public CloseableIterator<StateChange> read(StreamStateHandle handle, long offset) throws IOException {
    FSDataInputStream stream = handle.openInputStream();
    DataInputViewStreamWrapper input = wrap(stream);
    if (stream.getPos() != offset) {
        LOG.debug("seek from {} to {}", stream.getPos(), offset);
        input.skipBytesToRead((int) offset);
    }
    return new CloseableIterator<StateChange>() {

        int numUnreadGroups = input.readInt();

        int numLeftInGroup = numUnreadGroups-- == 0 ? 0 : input.readInt();

        int keyGroup = numLeftInGroup == 0 ? 0 : input.readInt();

        @Override
        public boolean hasNext() {
            advance();
            return numLeftInGroup > 0;
        }

        private void advance() {
            if (numLeftInGroup == 0 && numUnreadGroups > 0) {
                numUnreadGroups--;
                try {
                    numLeftInGroup = input.readInt();
                    keyGroup = input.readInt();
                } catch (IOException e) {
                    ExceptionUtils.rethrow(e);
                }
            }
        }

        @Override
        public StateChange next() {
            advance();
            if (numLeftInGroup == 0) {
                throw new NoSuchElementException();
            }
            numLeftInGroup--;
            try {
                return readChange();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        private StateChange readChange() throws IOException {
            int size = input.readInt();
            byte[] bytes = new byte[size];
            IOUtils.readFully(input, bytes, 0, size);
            return new StateChange(keyGroup, bytes);
        }

        @Override
        public void close() throws Exception {
            LOG.trace("close {}", stream);
            stream.close();
        }
    };
}
Also used : CloseableIterator(org.apache.flink.util.CloseableIterator) StateChange(org.apache.flink.runtime.state.changelog.StateChange) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) IOException(java.io.IOException) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) NoSuchElementException(java.util.NoSuchElementException)

Example 2 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class DataStreamBatchExecutionITCase method batchKeyedNonKeyedTwoInputOperator.

/**
 * Verifies that all regular input is processed before keyed input.
 *
 * <p>Here, the first input is keyed while the second input is not keyed.
 */
@Test
public void batchKeyedNonKeyedTwoInputOperator() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> keyedInput = env.fromElements(Tuple2.of("regular2", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 2), Tuple2.of("regular2", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular4", 4), Tuple2.of("regular3", 3), Tuple2.of("regular3", 2), Tuple2.of("regular4", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<String> result = keyedInput.keyBy(in -> in.f0).connect(regularInput).transform("operator", BasicTypeInfo.STRING_TYPE_INFO, new TwoInputIdentityOperator());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular4,4)", "(regular3,3)", "(regular3,2)", "(regular4,1)", "(regular1,2)", "(regular1,3)", "(regular2,1)", "(regular2,4)")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 3 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class RemoteInputChannelTest method testExceptionOnPersisting.

@Test
public void testExceptionOnPersisting() throws Exception {
    // Setup
    final SingleInputGate inputGate = createSingleInputGate(1);
    final RemoteInputChannel inputChannel = InputChannelBuilder.newBuilder().setStateWriter(new ChannelStateWriter.NoOpChannelStateWriter() {

        @Override
        public void addInputData(long checkpointId, InputChannelInfo info, int startSeqNum, CloseableIterator<Buffer> data) {
            try {
                data.close();
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            throw new ExpectedTestException();
        }
    }).buildRemoteChannel(inputGate);
    inputChannel.checkpointStarted(new CheckpointBarrier(42, System.currentTimeMillis(), CheckpointOptions.unaligned(CheckpointType.CHECKPOINT, getDefault())));
    final Buffer buffer = createBuffer(TestBufferFactory.BUFFER_SIZE);
    assertFalse(buffer.isRecycled());
    try {
        inputChannel.onBuffer(buffer, 0, -1);
        fail("This should have failed");
    } catch (ExpectedTestException ex) {
    // ignore
    }
    // This check is not strictly speaking necessary. Generally speaking if exception happens
    // during persisting, there are two potentially correct outcomes:
    // 1. buffer is recycled only once, in #onBuffer call when handling exception
    // 2. buffer is stored inside RemoteInputChannel and recycled on releaseAllResources.
    // What's not acceptable is that it would be released twice, in both places. Without this
    // check below, we would be just relaying on Buffer throwing IllegalReferenceCountException.
    // I've added this check just to be sure. It's freezing the current implementation that's
    // unlikely to change, on the other hand, thanks to it we don't need to relay on
    // IllegalReferenceCountException being thrown from the Buffer.
    // 
    // In other words, if you end up reading this after refactoring RemoteInputChannel, it might
    // be safe to remove this assertion. Just make sure double recycling of the same buffer is
    // still throwing IllegalReferenceCountException.
    assertFalse(buffer.isRecycled());
    inputChannel.releaseAllResources();
    assertTrue(buffer.isRecycled());
}
Also used : CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) NetworkBuffer(org.apache.flink.runtime.io.network.buffer.NetworkBuffer) TestBufferFactory.createBuffer(org.apache.flink.runtime.io.network.util.TestBufferFactory.createBuffer) Buffer(org.apache.flink.runtime.io.network.buffer.Buffer) EventSerializer.toBuffer(org.apache.flink.runtime.io.network.api.serialization.EventSerializer.toBuffer) BufferBuilderTestUtils.buildSingleBuffer(org.apache.flink.runtime.io.network.buffer.BufferBuilderTestUtils.buildSingleBuffer) CloseableIterator(org.apache.flink.util.CloseableIterator) InputChannelInfo(org.apache.flink.runtime.checkpoint.channel.InputChannelInfo) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) InputChannelTestUtils.createSingleInputGate(org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createSingleInputGate) ProducerFailedException(org.apache.flink.runtime.io.network.partition.ProducerFailedException) TimeoutException(java.util.concurrent.TimeoutException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class StateChangelogStorageTest method extract.

private Map<Integer, List<byte[]>> extract(T handle, StateChangelogHandleReader<T> reader) throws Exception {
    Map<Integer, List<byte[]>> changes = new HashMap<>();
    try (CloseableIterator<StateChange> it = reader.getChanges(handle)) {
        while (it.hasNext()) {
            StateChange change = it.next();
            changes.computeIfAbsent(change.getKeyGroup(), k -> new ArrayList<>()).add(change.getChange());
        }
    }
    return changes;
}
Also used : StateChangelogStorage(org.apache.flink.runtime.state.changelog.StateChangelogStorage) HashMap(java.util.HashMap) Random(java.util.Random) ArrayList(java.util.ArrayList) ChangelogStateHandle(org.apache.flink.runtime.state.changelog.ChangelogStateHandle) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) StateChangelogHandleReader(org.apache.flink.runtime.state.changelog.StateChangelogHandleReader) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) Iterator(java.util.Iterator) StateChange(org.apache.flink.runtime.state.changelog.StateChange) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Rule(org.junit.Rule) Stream(java.util.stream.Stream) StreamSupport.stream(java.util.stream.StreamSupport.stream) Assert.assertFalse(org.junit.Assert.assertFalse) Function.identity(java.util.function.Function.identity) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) SequenceNumber(org.apache.flink.runtime.state.changelog.SequenceNumber) StateChangelogWriter(org.apache.flink.runtime.state.changelog.StateChangelogWriter) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) StateChange(org.apache.flink.runtime.state.changelog.StateChange) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 5 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class DataStreamBatchExecutionITCase method batchBroadcastExecution.

/**
 * Verifies that all broadcast input is processed before regular input.
 */
@Test
public void batchBroadcastExecution() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
    DataStream<String> result = regularInput.connect(broadcastStream).process(new TestBroadcastFunction());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        // regular, that is non-keyed input is not sorted by timestamp. For keyed inputs
        // this is a by-product of the grouping/sorting we use to get the keyed groups.
        assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

CloseableIterator (org.apache.flink.util.CloseableIterator)7 List (java.util.List)5 Arrays (java.util.Arrays)4 RuntimeExecutionMode (org.apache.flink.api.common.RuntimeExecutionMode)4 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)4 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)4 RestartStrategies (org.apache.flink.api.common.restartstrategy.RestartStrategies)4 BroadcastState (org.apache.flink.api.common.state.BroadcastState)4 ListState (org.apache.flink.api.common.state.ListState)4 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)4 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)4 ReadOnlyBroadcastState (org.apache.flink.api.common.state.ReadOnlyBroadcastState)4 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)4 Time (org.apache.flink.api.common.time.Time)4 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)4 StringSerializer (org.apache.flink.api.common.typeutils.base.StringSerializer)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 IOException (java.io.IOException)3 MiniClusterResourceConfiguration (org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration)3 BroadcastStream (org.apache.flink.streaming.api.datastream.BroadcastStream)3