use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class ReduceDriverTest method testReduceDriverImmutableEmpty.
@Test
public void testReduceDriverImmutableEmpty() {
try {
TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>>();
List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
context.setDriverStrategy(DriverStrategy.SORTED_REDUCE);
TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setComparator1(comparator);
context.setCollector(result);
ReduceDriver<Tuple2<String, Integer>> driver = new ReduceDriver<Tuple2<String, Integer>>();
driver.setup(context);
driver.prepare();
driver.run();
Assert.assertEquals(0, result.getList().size());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class WordCountSubclassInterfacePOJOITCase method testProgram.
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(textPath);
DataSet<WCBase> counts = text.flatMap(new Tokenizer()).groupBy("word").reduce(new ReduceFunction<WCBase>() {
private static final long serialVersionUID = 1L;
public WCBase reduce(WCBase value1, WCBase value2) {
WC wc1 = (WC) value1;
WC wc2 = (WC) value2;
int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
wc1.secretCount.setCount(c);
return wc1;
}
}).map(new MapFunction<WCBase, WCBase>() {
@Override
public WCBase map(WCBase value) throws Exception {
WC wc = (WC) value;
wc.count = wc.secretCount.getCount();
return wc;
}
});
counts.writeAsText(resultPath);
env.execute("WordCount with custom data types example");
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class ReduceOperatorTest method testReduceCollectionWithRuntimeContext.
@Test
public void testReduceCollectionWithRuntimeContext() {
try {
final String taskName = "Test Task";
final AtomicBoolean opened = new AtomicBoolean();
final AtomicBoolean closed = new AtomicBoolean();
final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
return new Tuple2<String, Integer>(value1.f0, value1.f1 + value2.f1);
}
@Override
public void open(Configuration parameters) throws Exception {
opened.set(true);
RuntimeContext ctx = getRuntimeContext();
assertEquals(0, ctx.getIndexOfThisSubtask());
assertEquals(1, ctx.getNumberOfParallelSubtasks());
assertEquals(taskName, ctx.getTaskName());
}
@Override
public void close() throws Exception {
closed.set(true);
}
};
ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>>(reducer, new UnaryOperatorInformation<Tuple2<String, Integer>, Tuple2<String, Integer>>(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, " + "Integer>")), new int[] { 0 }, "TestReducer");
List<Tuple2<String, Integer>> input = new ArrayList<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 1), new Tuple2<String, Integer>("foo", 3), new Tuple2<String, Integer>("bar", 2), new Tuple2<String, Integer>("bar", 4)));
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<Tuple2<String, Integer>>(resultMutableSafe);
Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<Tuple2<String, Integer>>(resultRegular);
Set<Tuple2<String, Integer>> expectedResult = new HashSet<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 4), new Tuple2<String, Integer>("bar", 6)));
assertEquals(expectedResult, resultSetMutableSafe);
assertEquals(expectedResult, resultSetRegular);
assertTrue(opened.get());
assertTrue(closed.get());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class ReduceOperatorTest method testReduceCollection.
@Test
public void testReduceCollection() {
try {
final ReduceFunction<Tuple2<String, Integer>> reducer = new ReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
return new Tuple2<String, Integer>(value1.f0, value1.f1 + value2.f1);
}
};
ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>>(reducer, new UnaryOperatorInformation<Tuple2<String, Integer>, Tuple2<String, Integer>>(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, " + "Integer>")), new int[] { 0 }, "TestReducer");
List<Tuple2<String, Integer>> input = new ArrayList<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 1), new Tuple2<String, Integer>("foo", 3), new Tuple2<String, Integer>("bar", 2), new Tuple2<String, Integer>("bar", 4)));
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, null, executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, null, executionConfig);
Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<Tuple2<String, Integer>>(resultMutableSafe);
Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<Tuple2<String, Integer>>(resultRegular);
Set<Tuple2<String, Integer>> expectedResult = new HashSet<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 4), new Tuple2<String, Integer>("bar", 6)));
assertEquals(expectedResult, resultSetMutableSafe);
assertEquals(expectedResult, resultSetRegular);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.ReduceFunction in project flink by apache.
the class DistinctOperator method translateToDataFlow.
@Override
protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow(Operator<T> input) {
final ReduceFunction<T> function = new DistinctFunction<>();
String name = getName() != null ? getName() : "Distinct at " + distinctLocationName;
if (keys instanceof Keys.ExpressionKeys) {
int[] logicalKeyPositions = keys.computeLogicalKeyPositions();
UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
po.setCombineHint(hint);
po.setInput(input);
po.setParallelism(getParallelism());
// make sure that distinct preserves the partitioning for the fields on which they operate
if (getType().isTupleType()) {
SingleInputSemanticProperties sProps = new SingleInputSemanticProperties();
for (int field : keys.computeLogicalKeyPositions()) {
sProps.addForwardedField(field, field);
}
po.setSemanticProperties(sProps);
}
return po;
} else if (keys instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<T, ?> selectorKeys = (SelectorFunctionKeys<T, ?>) keys;
org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> po = translateSelectorFunctionDistinct(selectorKeys, function, getResultType(), name, input, parallelism, hint);
return po;
} else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
Aggregations