use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class SolutionSetDuplicatesITCase method testProgram.
@Test
public void testProgram() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.generateSequence(0, 10).flatMap(new FlatMapFunction<Long, Tuple2<Long, Long>>() {
@Override
public void flatMap(Long value, Collector<Tuple2<Long, Long>> out) {
out.collect(new Tuple2<Long, Long>(value, value));
out.collect(new Tuple2<Long, Long>(value, value));
out.collect(new Tuple2<Long, Long>(value, value));
}
}).rebalance();
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = data.iterateDelta(data, 10, 0);
List<Integer> result = iter.closeWith(iter.getWorkset(), iter.getWorkset()).map(new MapFunction<Tuple2<Long, Long>, Integer>() {
@Override
public Integer map(Tuple2<Long, Long> value) {
return value.f0.intValue();
}
}).collect();
assertEquals(11, result.size());
Collections.sort(result);
assertEquals(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), result);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class ConnectedComponentsWithDeferredUpdateITCase method testProgram.
@Override
protected void testProgram() throws Exception {
boolean extraMapper = config.getBoolean("ExtraMapper", false);
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()).groupBy(0).aggregate(Aggregations.MIN, 1).join(iteration.getSolutionSet()).where(0).equalTo(0).with(new UpdateComponentIdMatchNonPreserving());
DataSet<Tuple2<Long, Long>> delta;
if (extraMapper) {
delta = changes.map(// ID Mapper
new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
return v;
}
});
} else {
delta = changes;
}
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class RollingSinkITCase method testNonRollingStringWriter.
/**
* This tests {@link StringWriter} with
* non-rolling output.
*/
@Test
public void testNonRollingStringWriter() throws Exception {
final int NUM_ELEMENTS = 20;
final int PARALLELISM = 2;
final String outPath = hdfsURI + "/string-non-rolling-out";
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
RollingSink<String> sink = new RollingSink<String>(outPath).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
source.map(new MapFunction<Tuple2<Integer, String>, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Tuple2<Integer, String> value) throws Exception {
return value.f1;
}
}).addSink(sink);
env.execute("RollingSink String Write Test");
FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 0; i < NUM_ELEMENTS; i += 2) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
inStream = dfs.open(new Path(outPath + "/part-1-0"));
br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 1; i < NUM_ELEMENTS; i += 2) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class RollingSinkITCase method testUserDefinedConfiguration.
/**
* This tests user defined hdfs configuration
* @throws Exception
*/
@Test
public void testUserDefinedConfiguration() throws Exception {
final int NUM_ELEMENTS = 20;
final int PARALLELISM = 2;
final String outPath = hdfsURI + "/string-non-rolling-with-config";
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(NUM_ELEMENTS)).broadcast().filter(new OddEvenFilter());
Configuration conf = new Configuration();
conf.set("io.file.buffer.size", "40960");
RollingSink<String> sink = new RollingSink<String>(outPath).setFSConfig(conf).setWriter(new StreamWriterWithConfigCheck<String>("io.file.buffer.size", "40960")).setBucketer(new NonRollingBucketer()).setPartPrefix("part").setPendingPrefix("").setPendingSuffix("");
source.map(new MapFunction<Tuple2<Integer, String>, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Tuple2<Integer, String> value) throws Exception {
return value.f1;
}
}).addSink(sink);
env.execute("RollingSink with configuration Test");
FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));
BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 0; i < NUM_ELEMENTS; i += 2) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
inStream = dfs.open(new Path(outPath + "/part-1-0"));
br = new BufferedReader(new InputStreamReader(inStream));
for (int i = 1; i < NUM_ELEMENTS; i += 2) {
String line = br.readLine();
Assert.assertEquals("message #" + i, line);
}
inStream.close();
}
use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.
the class KafkaConsumerTestBase method runStartFromKafkaCommitOffsets.
/**
* This test first writes a total of 300 records to a test topic, reads the first 150 so that some offsets are
* committed to Kafka, and then startup the consumer again to read the remaining records starting from the committed offsets.
* The test ensures that whatever offsets were committed to Kafka, the consumer correctly picks them up
* and starts at the correct position.
*/
public void runStartFromKafkaCommitOffsets() throws Exception {
final int parallelism = 3;
final int recordsInEachPartition = 300;
final String topicName = writeSequence("testStartFromKafkaCommitOffsetsTopic", recordsInEachPartition, parallelism, 1);
KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
Long o1;
Long o2;
Long o3;
int attempt = 0;
// make sure that o1, o2, o3 are not all null before proceeding
do {
attempt++;
LOG.info("Attempt " + attempt + " to read records and commit some offsets to Kafka");
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.setParallelism(parallelism);
// fast checkpoints to make sure we commit some offsets
env.enableCheckpointing(20);
env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), standardProps)).map(new ThrottledMapper<String>(50)).map(new MapFunction<String, Object>() {
int count = 0;
@Override
public Object map(String value) throws Exception {
count++;
if (count == 150) {
throw new SuccessException();
}
return null;
}
}).addSink(new DiscardingSink<>());
tryExecute(env, "Read some records to commit offsets to Kafka");
o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
} while (o1 == null && o2 == null && o3 == null && attempt < 3);
if (o1 == null && o2 == null && o3 == null) {
throw new RuntimeException("No offsets have been committed after 3 attempts");
}
LOG.info("Got final committed offsets from Kafka o1={}, o2={}, o3={}", o1, o2, o3);
final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env2.getConfig().disableSysoutLogging();
env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env2.setParallelism(parallelism);
// whatever offsets were committed for each partition, the consumer should pick
// them up and start from the correct position so that the remaining records are all read
HashMap<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset = new HashMap<>();
partitionsToValuesCountAndStartOffset.put(0, new Tuple2<>((o1 != null) ? (int) (recordsInEachPartition - o1) : recordsInEachPartition, (o1 != null) ? o1.intValue() : 0));
partitionsToValuesCountAndStartOffset.put(1, new Tuple2<>((o2 != null) ? (int) (recordsInEachPartition - o2) : recordsInEachPartition, (o2 != null) ? o2.intValue() : 0));
partitionsToValuesCountAndStartOffset.put(2, new Tuple2<>((o3 != null) ? (int) (recordsInEachPartition - o3) : recordsInEachPartition, (o3 != null) ? o3.intValue() : 0));
readSequence(env2, StartupMode.GROUP_OFFSETS, null, standardProps, topicName, partitionsToValuesCountAndStartOffset);
kafkaOffsetHandler.close();
deleteTestTopic(topicName);
}
Aggregations