use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testCustomKryoSerializerHandling.
/**
* This test checks if custom Kryo serializers are loaded with correct classloader.
*/
@Test
public void testCustomKryoSerializerHandling() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1;
final String stateName = "teriberka";
final String customSerializerClassName = "CustomKryo";
final URLClassLoader userClassLoader = createLoaderWithCustomKryoSerializer(customSerializerClassName);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
// Custom serializer is not needed, it's used just to check if serialization works.
env.getConfig().addDefaultKryoSerializer(Byte.class, (Serializer<?> & Serializable) createSerializer(userClassLoader));
// Here we *force* using Kryo, to check if custom serializers are handled correctly WRT
// classloading
@SuppressWarnings({ "rawtypes", "unchecked" }) ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", new GenericTypeInfo(Tuple2.class));
env.addSource(new TestAscendingValueSource(numElements)).keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7662520075515707428L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState(stateName, valueState);
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
jobGraph.setClasspaths(Arrays.asList(userClassLoader.getURLs()));
clusterClient.submitJob(jobGraph).get();
try {
client.setUserClassLoader(userClassLoader);
executeValueQuery(deadline, client, jobId, stateName, valueState, numElements);
} finally {
client.setUserClassLoader(null);
}
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testAggregatingState.
@Test
public void testAggregatingState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
final AggregatingStateDescriptor<Tuple2<Integer, Long>, String, String> aggrStateDescriptor = new AggregatingStateDescriptor<>("aggregates", new SumAggr(), String.class);
aggrStateDescriptor.setQueryable("aggr-queryable");
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 8470749712274833552L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).transform("TestAggregatingOperator", BasicTypeInfo.STRING_TYPE_INFO, new AggregatingTestOperator(aggrStateDescriptor));
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
for (int key = 0; key < maxParallelism; key++) {
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
CompletableFuture<AggregatingState<Tuple2<Integer, Long>, String>> future = getKvState(deadline, client, jobId, "aggr-queryable", key, BasicTypeInfo.INT_TYPE_INFO, aggrStateDescriptor, false, executor);
String value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get();
if (Long.parseLong(value) == numElements * (numElements + 1L) / 2L) {
success = true;
} else {
// Retry
Thread.sleep(RETRY_TIMEOUT);
}
}
assertTrue("Did not succeed query", success);
}
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class AbstractQueryableStateTestBase method testListState.
/**
* Tests simple list state queryable state instance. Each source emits (subtaskIndex,
* 0)..(subtaskIndex, numElements) tuples, which are then queried. The list state instance add
* the values to the list. The test succeeds after each subtask index is queried and the list
* contains the correct number of distinct elements.
*/
@Test
public void testListState() throws Exception {
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
final ListStateDescriptor<Long> listStateDescriptor = new ListStateDescriptor<Long>("list", BasicTypeInfo.LONG_TYPE_INFO);
listStateDescriptor.setQueryable("list-queryable");
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 8470749712274833552L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).process(new ProcessFunction<Tuple2<Integer, Long>, Object>() {
private static final long serialVersionUID = -805125545438296619L;
private transient ListState<Long> listState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
listState = getRuntimeContext().getListState(listStateDescriptor);
}
@Override
public void processElement(Tuple2<Integer, Long> value, Context ctx, Collector<Object> out) throws Exception {
listState.add(value.f1);
}
});
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
clusterClient.submitJob(jobGraph).get();
final Map<Integer, Set<Long>> results = new HashMap<>();
for (int key = 0; key < maxParallelism; key++) {
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
final CompletableFuture<ListState<Long>> future = getKvState(deadline, client, jobId, "list-queryable", key, BasicTypeInfo.INT_TYPE_INFO, listStateDescriptor, false, executor);
Iterable<Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get();
Set<Long> res = new HashSet<>();
for (Long v : value) {
res.add(v);
}
// the source starts at 0, so +1
if (res.size() == numElements + 1L) {
success = true;
results.put(key, res);
} else {
// Retry
Thread.sleep(RETRY_TIMEOUT);
}
}
assertTrue("Did not succeed query", success);
}
for (int key = 0; key < maxParallelism; key++) {
Set<Long> values = results.get(key);
for (long i = 0L; i <= numElements; i++) {
assertTrue(values.contains(i));
}
}
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class StreamMultipleInputProcessorFactory method create.
@SuppressWarnings({ "unchecked", "rawtypes" })
public static StreamMultipleInputProcessor create(TaskInvokable ownerTask, CheckpointedInputGate[] checkpointedInputGates, StreamConfig.InputConfig[] configuredInputs, IOManager ioManager, MemoryManager memoryManager, TaskIOMetricGroup ioMetricGroup, Counter mainOperatorRecordsIn, MultipleInputStreamOperator<?> mainOperator, WatermarkGauge[] inputWatermarkGauges, StreamConfig streamConfig, Configuration taskManagerConfig, Configuration jobConfig, ExecutionConfig executionConfig, ClassLoader userClassloader, OperatorChain<?, ?> operatorChain, InflightDataRescalingDescriptor inflightDataRescalingDescriptor, Function<Integer, StreamPartitioner<?>> gatePartitioners, TaskInfo taskInfo) {
checkNotNull(operatorChain);
List<Input> operatorInputs = mainOperator.getInputs();
int inputsCount = operatorInputs.size();
StreamOneInputProcessor<?>[] inputProcessors = new StreamOneInputProcessor[inputsCount];
Counter networkRecordsIn = new SimpleCounter();
ioMetricGroup.reuseRecordsInputCounter(networkRecordsIn);
checkState(configuredInputs.length == inputsCount, "Number of configured inputs in StreamConfig [%s] doesn't match the main operator's number of inputs [%s]", configuredInputs.length, inputsCount);
StreamTaskInput[] inputs = new StreamTaskInput[inputsCount];
for (int i = 0; i < inputsCount; i++) {
StreamConfig.InputConfig configuredInput = configuredInputs[i];
if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
StreamConfig.NetworkInputConfig networkInput = (StreamConfig.NetworkInputConfig) configuredInput;
inputs[i] = StreamTaskNetworkInputFactory.create(checkpointedInputGates[networkInput.getInputGateIndex()], networkInput.getTypeSerializer(), ioManager, new StatusWatermarkValve(checkpointedInputGates[networkInput.getInputGateIndex()].getNumberOfInputChannels()), i, inflightDataRescalingDescriptor, gatePartitioners, taskInfo);
} else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
inputs[i] = operatorChain.getSourceTaskInput(sourceInput);
} else {
throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
}
}
InputSelectable inputSelectable = mainOperator instanceof InputSelectable ? (InputSelectable) mainOperator : null;
StreamConfig.InputConfig[] inputConfigs = streamConfig.getInputs(userClassloader);
boolean anyRequiresSorting = Arrays.stream(inputConfigs).anyMatch(StreamConfig::requiresSorting);
if (anyRequiresSorting) {
if (inputSelectable != null) {
throw new IllegalStateException("The InputSelectable interface is not supported with sorting inputs");
}
StreamTaskInput[] sortingInputs = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
KeySelector[] sortingInputKeySelectors = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getStatePartitioner(idx, userClassloader)).toArray(KeySelector[]::new);
TypeSerializer[] sortingInputKeySerializers = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getTypeSerializerIn(idx, userClassloader)).toArray(TypeSerializer[]::new);
StreamTaskInput[] passThroughInputs = IntStream.range(0, inputsCount).filter(idx -> !requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(ownerTask, sortingInputs, sortingInputKeySelectors, sortingInputKeySerializers, streamConfig.getStateKeySerializer(userClassloader), passThroughInputs, memoryManager, ioManager, executionConfig.isObjectReuseEnabled(), streamConfig.getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, userClassloader), jobConfig, executionConfig);
StreamTaskInput<?>[] sortedInputs = selectableSortingInputs.getSortedInputs();
StreamTaskInput<?>[] passedThroughInputs = selectableSortingInputs.getPassThroughInputs();
int sortedIndex = 0;
int passThroughIndex = 0;
for (int i = 0; i < inputs.length; i++) {
if (requiresSorting(inputConfigs[i])) {
inputs[i] = sortedInputs[sortedIndex];
sortedIndex++;
} else {
inputs[i] = passedThroughInputs[passThroughIndex];
passThroughIndex++;
}
}
inputSelectable = selectableSortingInputs.getInputSelectable();
}
for (int i = 0; i < inputsCount; i++) {
StreamConfig.InputConfig configuredInput = configuredInputs[i];
if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
StreamTaskNetworkOutput dataOutput = new StreamTaskNetworkOutput<>(operatorChain.getFinishedOnRestoreInputOrDefault(operatorInputs.get(i)), inputWatermarkGauges[i], mainOperatorRecordsIn, networkRecordsIn);
inputProcessors[i] = new StreamOneInputProcessor(inputs[i], dataOutput, operatorChain);
} else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
OperatorChain.ChainedSource chainedSource = operatorChain.getChainedSource(sourceInput);
inputProcessors[i] = new StreamOneInputProcessor(inputs[i], new StreamTaskSourceOutput(chainedSource.getSourceOutput(), inputWatermarkGauges[i], chainedSource.getSourceTaskInput().getOperator().getSourceMetricGroup()), operatorChain);
} else {
throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
}
}
return new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(inputSelectable, inputsCount), inputProcessors);
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class LargeSortingDataInputITCase method multiInputKeySorting.
@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void multiInputKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input1 = new GeneratedRecordsDataInput(numberOfRecords, 0);
GeneratedRecordsDataInput input2 = new GeneratedRecordsDataInput(numberOfRecords, 1);
KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { input1, input2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { GeneratedRecordsDataInput.SERIALIZER, GeneratedRecordsDataInput.SERIALIZER }, new StringSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput1 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[0];
StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput2 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[1]) {
VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
StreamMultipleInputProcessor multiSortedProcessor = new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2), new StreamOneInputProcessor[] { new StreamOneInputProcessor(sortedInput1, output, new DummyOperatorChain()), new StreamOneInputProcessor(sortedInput2, output, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = multiSortedProcessor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords * 2));
}
}
}
Aggregations