use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.
the class DataSetUtils method summarize.
// --------------------------------------------------------------------------------------------
// Summarize
// --------------------------------------------------------------------------------------------
/**
* Summarize a DataSet of Tuples by collecting single pass statistics for all columns
*
* Example usage:
* <pre>
* {@code
* Dataset<Tuple3<Double, String, Boolean>> input = // [...]
* Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input)
*
* summary.f0.getStandardDeviation()
* summary.f1.getMaxLength()
* }
* </pre>
* @return the summary as a Tuple the same width as input rows
*/
public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception {
if (!input.getType().isTupleType()) {
throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
}
final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() {
@Override
public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception {
TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType);
for (Tuple value : values) {
aggregator.aggregate(value);
}
out.collect(aggregator);
}
}).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() {
@Override
public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception {
agg1.combine(agg2);
return agg1;
}
});
return result.collect().get(0).result();
}
use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.
the class FieldsFromTupleTest method testUserSpecifiedOrder.
@Test
public void testUserSpecifiedOrder() throws InstantiationException, IllegalAccessException {
Tuple currentTuple = (Tuple) CLASSES[Tuple.MAX_ARITY - 1].newInstance();
for (int i = 0; i < Tuple.MAX_ARITY; i++) {
currentTuple.setField(testDouble[i], i);
}
double[] expected = { testDouble[5], testDouble[3], testDouble[6], testDouble[7], testDouble[0] };
arrayEqualityCheck(expected, new FieldsFromTuple(5, 3, 6, 7, 0).extract(currentTuple));
double[] expected2 = { testDouble[0], testDouble[Tuple.MAX_ARITY - 1] };
arrayEqualityCheck(expected2, new FieldsFromTuple(0, Tuple.MAX_ARITY - 1).extract(currentTuple));
double[] expected3 = { testDouble[Tuple.MAX_ARITY - 1], testDouble[0] };
arrayEqualityCheck(expected3, new FieldsFromTuple(Tuple.MAX_ARITY - 1, 0).extract(currentTuple));
double[] expected4 = { testDouble[13], testDouble[4], testDouble[5], testDouble[4], testDouble[2], testDouble[8], testDouble[6], testDouble[2], testDouble[8], testDouble[3], testDouble[5], testDouble[2], testDouble[16], testDouble[4], testDouble[3], testDouble[2], testDouble[6], testDouble[4], testDouble[7], testDouble[4], testDouble[2], testDouble[8], testDouble[7], testDouble[2] };
arrayEqualityCheck(expected4, new FieldsFromTuple(13, 4, 5, 4, 2, 8, 6, 2, 8, 3, 5, 2, 16, 4, 3, 2, 6, 4, 7, 4, 2, 8, 7, 2).extract(currentTuple));
}
use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.
the class TestBaseUtils method compareResult.
private static <T> void compareResult(List<T> result, String expected, boolean asTuples, boolean sort) {
String[] expectedStrings = expected.split("\n");
String[] resultStrings = new String[result.size()];
for (int i = 0; i < resultStrings.length; i++) {
T val = result.get(i);
if (asTuples) {
if (val instanceof Tuple) {
Tuple t = (Tuple) val;
Object first = t.getField(0);
StringBuilder bld = new StringBuilder(first == null ? "null" : first.toString());
for (int pos = 1; pos < t.getArity(); pos++) {
Object next = t.getField(pos);
bld.append(',').append(next == null ? "null" : next.toString());
}
resultStrings[i] = bld.toString();
} else {
throw new IllegalArgumentException(val + " is no tuple");
}
} else {
resultStrings[i] = (val == null) ? "null" : val.toString();
}
}
if (sort) {
Arrays.sort(expectedStrings);
Arrays.sort(resultStrings);
}
// Include content of both arrays to provide more context in case of a test failure
String msg = String.format("Different elements in arrays: expected %d elements and received %d\n expected: %s\n received: %s", expectedStrings.length, resultStrings.length, Arrays.toString(expectedStrings), Arrays.toString(resultStrings));
assertEquals(msg, expectedStrings.length, resultStrings.length);
for (int i = 0; i < expectedStrings.length; i++) {
assertEquals(msg, expectedStrings[i], resultStrings[i]);
}
}
use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.
the class AbstractEventTimeWindowCheckpointingITCase method testTumblingTimeWindow.
// ------------------------------------------------------------------------
@Test
public void testTumblingTimeWindow() {
final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
final int WINDOW_SIZE = windowSize();
final int NUM_KEYS = numKeys();
FailingSource.reset();
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setParallelism(PARALLELISM);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env.setStateBackend(this.stateBackend);
env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {
private boolean open = false;
@Override
public void open(Configuration parameters) {
assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
open = true;
}
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) {
// validate that the function has been opened properly
assertTrue(open);
int sum = 0;
long key = -1;
for (Tuple2<Long, IntType> value : values) {
sum += value.f1.value;
key = value.f0;
}
out.collect(new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
}
}).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.
the class AbstractEventTimeWindowCheckpointingITCase method testSlidingTimeWindow.
@Test
public void testSlidingTimeWindow() {
final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
final int WINDOW_SIZE = windowSize();
final int WINDOW_SLIDE = windowSlide();
final int NUM_KEYS = numKeys();
FailingSource.reset();
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setParallelism(PARALLELISM);
env.setMaxParallelism(2 * PARALLELISM);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(100);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
env.getConfig().disableSysoutLogging();
env.setStateBackend(this.stateBackend);
env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS), Time.of(WINDOW_SLIDE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {
private boolean open = false;
@Override
public void open(Configuration parameters) {
assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
open = true;
}
@Override
public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) {
// validate that the function has been opened properly
assertTrue(open);
int sum = 0;
long key = -1;
for (Tuple2<Long, IntType> value : values) {
sum += value.f1.value;
key = value.f0;
}
out.collect(new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
}
}).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SLIDE)).setParallelism(1);
tryExecute(env, "Tumbling Window Test");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations