use of org.apache.flink.util.Collector in project flink by apache.
the class DataSetUtils method summarize.
// --------------------------------------------------------------------------------------------
// Summarize
// --------------------------------------------------------------------------------------------
/**
* Summarize a DataSet of Tuples by collecting single pass statistics for all columns.
*
* <p>Example usage:
*
* <pre>{@code
* Dataset<Tuple3<Double, String, Boolean>> input = // [...]
* Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input)
*
* summary.f0.getStandardDeviation()
* summary.f1.getMaxLength()
* }</pre>
*
* @return the summary as a Tuple the same width as input rows
*/
public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception {
if (!input.getType().isTupleType()) {
throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
}
final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() {
@Override
public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception {
TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType);
for (Tuple value : values) {
aggregator.aggregate(value);
}
out.collect(aggregator);
}
}).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() {
@Override
public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception {
agg1.combine(agg2);
return agg1;
}
});
return result.collect().get(0).result();
}
use of org.apache.flink.util.Collector in project flink by apache.
the class KubernetesHighAvailabilityRecoverFromSavepointITCase method createJobGraph.
private JobGraph createJobGraph() throws Exception {
final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
final StateBackend stateBackend = new FsStateBackend(temporaryFolder.newFolder().toURI(), 1);
sEnv.setStateBackend(stateBackend);
sEnv.addSource(new InfiniteSourceFunction()).keyBy(e -> e).flatMap(new RichFlatMapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
ValueState<Integer> state;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ValueStateDescriptor<Integer> descriptor = new ValueStateDescriptor<>("total", Types.INT);
state = getRuntimeContext().getState(descriptor);
}
@Override
public void flatMap(Integer value, Collector<Integer> out) throws Exception {
final Integer current = state.value();
if (current != null) {
value += current;
}
state.update(value);
out.collect(value);
}
}).uid(FLAT_MAP_UID).addSink(new DiscardingSink<>());
return sEnv.getStreamGraph().getJobGraph();
}
use of org.apache.flink.util.Collector in project flink by apache.
the class KinesisConsumerTest method testKinesisConsumerThrowsExceptionIfSchemaImplementsCollector.
@Test
public void testKinesisConsumerThrowsExceptionIfSchemaImplementsCollector() {
DeserializationSchema<Object> schemaWithCollector = new DeserializationSchema<Object>() {
@Override
public Object deserialize(byte[] message) throws IOException {
return null;
}
@Override
public void deserialize(byte[] message, Collector<Object> out) throws IOException {
// we do not care about the implementation. we should just check if this
// method is declared
}
@Override
public boolean isEndOfStream(Object nextElement) {
return false;
}
@Override
public TypeInformation<Object> getProducedType() {
return null;
}
};
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Kinesis consumer does not support DeserializationSchema that implements deserialization with a" + " Collector. Unsupported DeserializationSchema: " + "org.apache.flink.streaming.connectors.kinesis.KinesisConsumerTest");
new FlinkKinesisConsumer<>("fakeStream", schemaWithCollector, new Properties());
}
use of org.apache.flink.util.Collector in project flink by apache.
the class OggJsonSerDeSchemaTest method testDeserializationWithMetadata.
public void testDeserializationWithMetadata(String resourceFile) throws Exception {
// we only read the first line for keeping the test simple
final String firstLine = readLines(resourceFile).get(0);
final List<ReadableMetadata> requestedMetadata = Arrays.asList(ReadableMetadata.values());
final DataType producedDataTypes = DataTypeUtils.appendRowFields(PHYSICAL_DATA_TYPE, requestedMetadata.stream().map(m -> DataTypes.FIELD(m.key, m.dataType)).collect(Collectors.toList()));
final OggJsonDeserializationSchema deserializationSchema = new OggJsonDeserializationSchema(PHYSICAL_DATA_TYPE, requestedMetadata, InternalTypeInfo.of(producedDataTypes.getLogicalType()), false, TimestampFormat.ISO_8601);
final SimpleCollector collector = new SimpleCollector();
deserializationSchema.deserialize(firstLine.getBytes(StandardCharsets.UTF_8), collector);
assertEquals(1, collector.list.size());
Consumer<RowData> consumer = row -> {
assertEquals(101, row.getInt(0));
assertEquals("scooter", row.getString(1).toString());
assertEquals("Small 2-wheel scooter", row.getString(2).toString());
assertEquals(3.140000104904175, row.getFloat(3), 1e-15);
assertEquals("OGG.TBL_TEST", row.getString(4).toString());
assertEquals("id", row.getArray(5).getString(0).toString());
assertEquals(1589377175766L, row.getTimestamp(6, 6).getMillisecond());
assertEquals(1589384406000L, row.getTimestamp(7, 6).getMillisecond());
};
consumer.accept(collector.list.get(0));
}
use of org.apache.flink.util.Collector in project flink by apache.
the class MaxwellJsonSerDerTest method testDeserializationWithMetadata.
@Test
public void testDeserializationWithMetadata() throws Exception {
// we only read the first line for keeping the test simple
final String firstLine = readLines("maxwell-data.txt").get(0);
final List<ReadableMetadata> requestedMetadata = Arrays.asList(ReadableMetadata.values());
final DataType producedDataType = DataTypeUtils.appendRowFields(PHYSICAL_DATA_TYPE, requestedMetadata.stream().map(m -> DataTypes.FIELD(m.key, m.dataType)).collect(Collectors.toList()));
final MaxwellJsonDeserializationSchema deserializationSchema = new MaxwellJsonDeserializationSchema(PHYSICAL_DATA_TYPE, requestedMetadata, InternalTypeInfo.of(producedDataType.getLogicalType()), false, TimestampFormat.ISO_8601);
final SimpleCollector collector = new SimpleCollector();
deserializationSchema.deserialize(firstLine.getBytes(StandardCharsets.UTF_8), collector);
assertEquals(1, collector.list.size());
Consumer<RowData> consumer = row -> {
assertThat(row.getInt(0), equalTo(101));
assertThat(row.getString(1).toString(), equalTo("scooter"));
assertThat(row.getString(2).toString(), equalTo("Small 2-wheel scooter"));
assertThat(row.getFloat(3), equalTo(3.14f));
assertThat(row.getString(4).toString(), equalTo("test"));
assertThat(row.getString(5).toString(), equalTo("product"));
assertThat(row.getArray(6).getString(0).toString(), equalTo("id"));
assertThat(row.getTimestamp(7, 3).getMillisecond(), equalTo(1596684883000L));
};
consumer.accept(collector.list.get(0));
}
Aggregations