use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class WordCountSubclassInterfacePOJOITCase method testProgram.
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(textPath);
DataSet<WCBase> counts = text.flatMap(new Tokenizer()).groupBy("word").reduce(new ReduceFunction<WCBase>() {
private static final long serialVersionUID = 1L;
public WCBase reduce(WCBase value1, WCBase value2) {
WC wc1 = (WC) value1;
WC wc2 = (WC) value2;
int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
wc1.secretCount.setCount(c);
return wc1;
}
}).map(new MapFunction<WCBase, WCBase>() {
@Override
public WCBase map(WCBase value) throws Exception {
WC wc = (WC) value;
wc.count = wc.secretCount.getCount();
return wc;
}
});
counts.writeAsText(resultPath);
env.execute("WordCount with custom data types example");
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class JobManagerHAProcessFailureBatchRecoveryITCase method testJobManagerFailure.
/**
* Test program with JobManager failure.
*
* @param zkQuorum ZooKeeper quorum to connect to
* @param coordinateDir Coordination directory
* @throws Exception
*/
public void testJobManagerFailure(String zkQuorum, final File coordinateDir) throws Exception {
Configuration config = new Configuration();
config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zkQuorum);
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("leader", 1, config);
env.setParallelism(PARALLELISM);
env.setNumberOfExecutionRetries(1);
env.getConfig().setExecutionMode(executionMode);
env.getConfig().disableSysoutLogging();
final long NUM_ELEMENTS = 100000L;
final DataSet<Long> result = env.generateSequence(1, NUM_ELEMENTS).rebalance().map(new RichMapFunction<Long, Long>() {
private final File proceedFile = new File(coordinateDir, PROCEED_MARKER_FILE);
private boolean markerCreated = false;
private boolean checkForProceedFile = true;
@Override
public Long map(Long value) throws Exception {
if (!markerCreated) {
int taskIndex = getRuntimeContext().getIndexOfThisSubtask();
AbstractTaskManagerProcessFailureRecoveryTest.touchFile(new File(coordinateDir, READY_MARKER_FILE_PREFIX + taskIndex));
markerCreated = true;
}
// check if the proceed file exists
if (checkForProceedFile) {
if (proceedFile.exists()) {
checkForProceedFile = false;
} else {
// otherwise wait so that we make slow progress
Thread.sleep(100);
}
}
return value;
}
}).reduce(new ReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2) {
return value1 + value2;
}
}).flatMap(new RichFlatMapFunction<Long, Long>() {
@Override
public void flatMap(Long value, Collector<Long> out) throws Exception {
assertEquals(NUM_ELEMENTS * (NUM_ELEMENTS + 1L) / 2L, (long) value);
int taskIndex = getRuntimeContext().getIndexOfThisSubtask();
AbstractTaskManagerProcessFailureRecoveryTest.touchFile(new File(coordinateDir, FINISH_MARKER_FILE_PREFIX + taskIndex));
}
});
result.output(new DiscardingOutputFormat<Long>());
env.execute();
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class WindowedStream method createFastTimeOperatorIfValid.
private <R> SingleOutputStreamOperator<R> createFastTimeOperatorIfValid(ReduceFunction<?> function, TypeInformation<R> resultType, String functionName) {
if (windowAssigner.getClass() == SlidingAlignedProcessingTimeWindows.class && trigger == null && evictor == null) {
SlidingAlignedProcessingTimeWindows timeWindows = (SlidingAlignedProcessingTimeWindows) windowAssigner;
final long windowLength = timeWindows.getSize();
final long windowSlide = timeWindows.getSlide();
String opName = "Fast " + timeWindows + " of " + functionName;
@SuppressWarnings("unchecked") ReduceFunction<T> reducer = (ReduceFunction<T>) function;
@SuppressWarnings("unchecked") OneInputStreamOperator<T, R> op = (OneInputStreamOperator<T, R>) new AggregatingProcessingTimeWindowOperator<>(reducer, input.getKeySelector(), input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), input.getType().createSerializer(getExecutionEnvironment().getConfig()), windowLength, windowSlide);
return input.transform(opName, resultType, op);
} else if (windowAssigner.getClass() == TumblingAlignedProcessingTimeWindows.class && trigger == null && evictor == null) {
TumblingAlignedProcessingTimeWindows timeWindows = (TumblingAlignedProcessingTimeWindows) windowAssigner;
final long windowLength = timeWindows.getSize();
final long windowSlide = timeWindows.getSize();
String opName = "Fast " + timeWindows + " of " + functionName;
@SuppressWarnings("unchecked") ReduceFunction<T> reducer = (ReduceFunction<T>) function;
@SuppressWarnings("unchecked") OneInputStreamOperator<T, R> op = (OneInputStreamOperator<T, R>) new AggregatingProcessingTimeWindowOperator<>(reducer, input.getKeySelector(), input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()), input.getType().createSerializer(getExecutionEnvironment().getConfig()), windowLength, windowSlide);
return input.transform(opName, resultType, op);
}
return null;
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class ReducingStateDescriptorTest method testReducingStateDescriptor.
@Test
public void testReducingStateDescriptor() throws Exception {
ReduceFunction<String> reducer = (a, b) -> a;
TypeSerializer<String> serializer = new KryoSerializer<>(String.class, new ExecutionConfig());
ReducingStateDescriptor<String> descr = new ReducingStateDescriptor<>("testName", reducer, serializer);
assertEquals("testName", descr.getName());
assertNotNull(descr.getSerializer());
assertEquals(serializer, descr.getSerializer());
assertEquals(reducer, descr.getReduceFunction());
ReducingStateDescriptor<String> copy = CommonTestUtils.createCopySerializable(descr);
assertEquals("testName", copy.getName());
assertNotNull(copy.getSerializer());
assertEquals(serializer, copy.getSerializer());
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class ReduceOperatorTest method testReduceCollectionWithRuntimeContext.
@Test
public void testReduceCollectionWithRuntimeContext() {
try {
final String taskName = "Test Task";
final AtomicBoolean opened = new AtomicBoolean();
final AtomicBoolean closed = new AtomicBoolean();
final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
return new Tuple2<>(value1.f0, value1.f1 + value2.f1);
}
@Override
public void open(Configuration parameters) throws Exception {
opened.set(true);
RuntimeContext ctx = getRuntimeContext();
assertEquals(0, ctx.getIndexOfThisSubtask());
assertEquals(1, ctx.getNumberOfParallelSubtasks());
assertEquals(taskName, ctx.getTaskName());
}
@Override
public void close() throws Exception {
closed.set(true);
}
};
ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<>(reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), new int[] { 0 }, "TestReducer");
List<Tuple2<String, Integer>> input = new ArrayList<>(asList(new Tuple2<>("foo", 1), new Tuple2<>("foo", 3), new Tuple2<>("bar", 2), new Tuple2<>("bar", 4)));
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(new Tuple2<>("foo", 4), new Tuple2<>("bar", 6)));
assertEquals(expectedResult, resultSetMutableSafe);
assertEquals(expectedResult, resultSetRegular);
assertTrue(opened.get());
assertTrue(closed.get());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations