use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class KafkaProducerTestBase method runCustomPartitioningTest.
/**
*
* <pre>
* +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
* / | \
* / | \
* (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
* \ | /
* \ | /
* +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
* </pre>
*
* The mapper validates that the values come consistently from the correct Kafka partition.
*
* The final sink validates that there are no duplicates and that all partitions are present.
*/
public void runCustomPartitioningTest() {
try {
LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
final String topic = "customPartitioningTestTopic";
final int parallelism = 3;
createTestTopic(topic, parallelism, 1);
TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getConfig().disableSysoutLogging();
TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
// ------ producing topology ---------
// source has DOP 1 to make sure it generates no duplicates
DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
long cnt = 0;
while (running) {
ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
cnt++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(1);
Properties props = new Properties();
props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
props.putAll(secureProps);
// sink partitions into
kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
// ------ consuming topology ---------
Properties consumerProps = new Properties();
consumerProps.putAll(standardProps);
consumerProps.putAll(secureProps);
FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {
private int ourPartition = -1;
@Override
public Integer map(Tuple2<Long, String> value) {
int partition = value.f0.intValue() % parallelism;
if (ourPartition != -1) {
assertEquals("inconsistent partitioning", ourPartition, partition);
} else {
ourPartition = partition;
}
return partition;
}
}).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {
private int[] valuesPerPartition = new int[parallelism];
@Override
public void invoke(Integer value) throws Exception {
valuesPerPartition[value]++;
boolean missing = false;
for (int i : valuesPerPartition) {
if (i < 100) {
missing = true;
break;
}
}
if (!missing) {
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "custom partitioning test");
deleteTestTopic(topic);
LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class RocksDBKeyedStateBackend method getColumnFamily.
// ------------------------------------------------------------------------
// State factories
// ------------------------------------------------------------------------
/**
* Creates a column family handle for use with a k/v state. When restoring from a snapshot
* we don't restore the individual k/v states, just the global RocksDB data base and the
* list of column families. When a k/v state is first requested we check here whether we
* already have a column family for that and return it or create a new one if it doesn't exist.
*
* <p>This also checks whether the {@link StateDescriptor} for a state matches the one
* that we checkpointed, i.e. is already in the map of column families.
*/
@SuppressWarnings("rawtypes, unchecked")
protected <N, S> ColumnFamilyHandle getColumnFamily(StateDescriptor<?, S> descriptor, TypeSerializer<N> namespaceSerializer) throws IOException {
Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<?, ?>> stateInfo = kvStateInformation.get(descriptor.getName());
RegisteredBackendStateMetaInfo<N, S> newMetaInfo = new RegisteredBackendStateMetaInfo<>(descriptor.getType(), descriptor.getName(), namespaceSerializer, descriptor.getSerializer());
if (stateInfo != null) {
if (newMetaInfo.isCompatibleWith(stateInfo.f1)) {
stateInfo.f1 = newMetaInfo;
return stateInfo.f0;
} else {
throw new IOException("Trying to access state using wrong meta info, was " + stateInfo.f1 + " trying access with " + newMetaInfo);
}
}
ColumnFamilyDescriptor columnDescriptor = new ColumnFamilyDescriptor(descriptor.getName().getBytes(ConfigConstants.DEFAULT_CHARSET), columnOptions);
try {
ColumnFamilyHandle columnFamily = db.createColumnFamily(columnDescriptor);
Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<N, S>> tuple = new Tuple2<>(columnFamily, newMetaInfo);
Map rawAccess = kvStateInformation;
rawAccess.put(descriptor.getName(), tuple);
return columnFamily;
} catch (RocksDBException e) {
throw new IOException("Error creating ColumnFamilyHandle.", e);
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class RocksDBMergeIteratorTest method testMergeIterator.
public void testMergeIterator(int maxParallelism) throws Exception {
Random random = new Random(1234);
File tmpDir = CommonTestUtils.createTempDirectory();
RocksDB rocksDB = RocksDB.open(tmpDir.getAbsolutePath());
try {
List<Tuple2<RocksIterator, Integer>> rocksIteratorsWithKVStateId = new ArrayList<>();
List<Tuple2<ColumnFamilyHandle, Integer>> columnFamilyHandlesWithKeyCount = new ArrayList<>();
int totalKeysExpected = 0;
for (int c = 0; c < NUM_KEY_VAL_STATES; ++c) {
ColumnFamilyHandle handle = rocksDB.createColumnFamily(new ColumnFamilyDescriptor(("column-" + c).getBytes(ConfigConstants.DEFAULT_CHARSET)));
ByteArrayOutputStreamWithPos bos = new ByteArrayOutputStreamWithPos();
DataOutputStream dos = new DataOutputStream(bos);
int numKeys = random.nextInt(MAX_NUM_KEYS + 1);
for (int i = 0; i < numKeys; ++i) {
if (maxParallelism <= Byte.MAX_VALUE) {
dos.writeByte(i);
} else {
dos.writeShort(i);
}
dos.writeInt(i);
byte[] key = bos.toByteArray();
byte[] val = new byte[] { 42 };
rocksDB.put(handle, key, val);
bos.reset();
}
columnFamilyHandlesWithKeyCount.add(new Tuple2<>(handle, numKeys));
totalKeysExpected += numKeys;
}
int id = 0;
for (Tuple2<ColumnFamilyHandle, Integer> columnFamilyHandle : columnFamilyHandlesWithKeyCount) {
rocksIteratorsWithKVStateId.add(new Tuple2<>(rocksDB.newIterator(columnFamilyHandle.f0), id));
++id;
}
RocksDBKeyedStateBackend.RocksDBMergeIterator mergeIterator = new RocksDBKeyedStateBackend.RocksDBMergeIterator(rocksIteratorsWithKVStateId, maxParallelism <= Byte.MAX_VALUE ? 1 : 2);
int prevKVState = -1;
int prevKey = -1;
int prevKeyGroup = -1;
int totalKeysActual = 0;
while (mergeIterator.isValid()) {
ByteBuffer bb = ByteBuffer.wrap(mergeIterator.key());
int keyGroup = maxParallelism > Byte.MAX_VALUE ? bb.getShort() : bb.get();
int key = bb.getInt();
Assert.assertTrue(keyGroup >= prevKeyGroup);
Assert.assertTrue(key >= prevKey);
Assert.assertEquals(prevKeyGroup != keyGroup, mergeIterator.isNewKeyGroup());
Assert.assertEquals(prevKVState != mergeIterator.kvStateId(), mergeIterator.isNewKeyValueState());
prevKeyGroup = keyGroup;
prevKVState = mergeIterator.kvStateId();
//System.out.println(keyGroup + " " + key + " " + mergeIterator.kvStateId());
mergeIterator.next();
++totalKeysActual;
}
Assert.assertEquals(totalKeysExpected, totalKeysActual);
for (Tuple2<ColumnFamilyHandle, Integer> handleWithCount : columnFamilyHandlesWithKeyCount) {
rocksDB.dropColumnFamily(handleWithCount.f0);
}
} finally {
rocksDB.close();
}
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class HBaseWriteExample method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer()).groupBy(0).sum(1);
// emit result
Job job = Job.getInstance();
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
// TODO is "mapred.output.dir" really useful?
job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
counts.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {
private transient Tuple2<Text, Mutation> reuse;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
reuse = new Tuple2<Text, Mutation>();
}
@Override
public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
reuse.f0 = new Text(t.f0);
Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
reuse.f1 = put;
return reuse;
}
}).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));
// execute program
env.execute("WordCount (HBase sink) Example");
}
use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.
the class SpoutSplitExample method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
boolean useFile = SpoutSplitExample.parseParameters(args);
// set up the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
String[] rawOutputs = new String[] { RandomSpout.EVEN_STREAM, RandomSpout.ODD_STREAM };
final DataStream<SplitStreamType<Integer>> numbers = env.addSource(new SpoutWrapper<SplitStreamType<Integer>>(new RandomSpout(true, seed), rawOutputs, 1000), TypeExtractor.getForObject(new SplitStreamType<Integer>()));
SplitStream<SplitStreamType<Integer>> splitStream = numbers.split(new StormStreamSelector<Integer>());
DataStream<SplitStreamType<Integer>> evenStream = splitStream.select(RandomSpout.EVEN_STREAM);
DataStream<SplitStreamType<Integer>> oddStream = splitStream.select(RandomSpout.ODD_STREAM);
DataStream<Tuple2<String, Integer>> evenResult = evenStream.map(new SplitStreamMapper<Integer>()).returns(Integer.class).map(new Enrich(true));
DataStream<Tuple2<String, Integer>> oddResult = oddStream.map(new SplitStreamMapper<Integer>()).transform("oddBolt", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Integer, Tuple2<String, Integer>>(new VerifyAndEnrichBolt(false)));
if (useFile) {
evenResult.writeAsText(outputPath + "/even");
oddResult.writeAsText(outputPath + "/odd");
} else {
evenResult.print();
oddResult.print();
}
// execute program
env.execute("Spout split stream example");
}
Aggregations