Search in sources :

Example 76 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class KafkaProducerTestBase method runCustomPartitioningTest.

/**
	 * 
	 * <pre>
	 *             +------> (sink) --+--> [KAFKA-1] --> (source) -> (map) --+
	 *            /                  |                                       \
	 *           /                   |                                        \
	 * (source) ----------> (sink) --+--> [KAFKA-2] --> (source) -> (map) -----+-> (sink)
	 *           \                   |                                        /
	 *            \                  |                                       /
	 *             +------> (sink) --+--> [KAFKA-3] --> (source) -> (map) --+
	 * </pre>
	 * 
	 * The mapper validates that the values come consistently from the correct Kafka partition.
	 * 
	 * The final sink validates that there are no duplicates and that all partitions are present.
	 */
public void runCustomPartitioningTest() {
    try {
        LOG.info("Starting KafkaProducerITCase.testCustomPartitioning()");
        final String topic = "customPartitioningTestTopic";
        final int parallelism = 3;
        createTestTopic(topic, parallelism, 1);
        TypeInformation<Tuple2<Long, String>> longStringInfo = TypeInfoParser.parse("Tuple2<Long, String>");
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        env.setRestartStrategy(RestartStrategies.noRestart());
        env.getConfig().disableSysoutLogging();
        TypeInformationSerializationSchema<Tuple2<Long, String>> serSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
        TypeInformationSerializationSchema<Tuple2<Long, String>> deserSchema = new TypeInformationSerializationSchema<>(longStringInfo, env.getConfig());
        // ------ producing topology ---------
        // source has DOP 1 to make sure it generates no duplicates
        DataStream<Tuple2<Long, String>> stream = env.addSource(new SourceFunction<Tuple2<Long, String>>() {

            private boolean running = true;

            @Override
            public void run(SourceContext<Tuple2<Long, String>> ctx) throws Exception {
                long cnt = 0;
                while (running) {
                    ctx.collect(new Tuple2<Long, String>(cnt, "kafka-" + cnt));
                    cnt++;
                }
            }

            @Override
            public void cancel() {
                running = false;
            }
        }).setParallelism(1);
        Properties props = new Properties();
        props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
        props.putAll(secureProps);
        // sink partitions into 
        kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(serSchema), props, new CustomPartitioner(parallelism)).setParallelism(parallelism);
        // ------ consuming topology ---------
        Properties consumerProps = new Properties();
        consumerProps.putAll(standardProps);
        consumerProps.putAll(secureProps);
        FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
        env.addSource(source).setParallelism(parallelism).map(new RichMapFunction<Tuple2<Long, String>, Integer>() {

            private int ourPartition = -1;

            @Override
            public Integer map(Tuple2<Long, String> value) {
                int partition = value.f0.intValue() % parallelism;
                if (ourPartition != -1) {
                    assertEquals("inconsistent partitioning", ourPartition, partition);
                } else {
                    ourPartition = partition;
                }
                return partition;
            }
        }).setParallelism(parallelism).addSink(new SinkFunction<Integer>() {

            private int[] valuesPerPartition = new int[parallelism];

            @Override
            public void invoke(Integer value) throws Exception {
                valuesPerPartition[value]++;
                boolean missing = false;
                for (int i : valuesPerPartition) {
                    if (i < 100) {
                        missing = true;
                        break;
                    }
                }
                if (!missing) {
                    throw new SuccessException();
                }
            }
        }).setParallelism(1);
        tryExecute(env, "custom partitioning test");
        deleteTestTopic(topic);
        LOG.info("Finished KafkaProducerITCase.testCustomPartitioning()");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) KeyedSerializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper) Properties(java.util.Properties) SuccessException(org.apache.flink.test.util.SuccessException) TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 77 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class RocksDBKeyedStateBackend method getColumnFamily.

// ------------------------------------------------------------------------
//  State factories
// ------------------------------------------------------------------------
/**
	 * Creates a column family handle for use with a k/v state. When restoring from a snapshot
	 * we don't restore the individual k/v states, just the global RocksDB data base and the
	 * list of column families. When a k/v state is first requested we check here whether we
	 * already have a column family for that and return it or create a new one if it doesn't exist.
	 *
	 * <p>This also checks whether the {@link StateDescriptor} for a state matches the one
	 * that we checkpointed, i.e. is already in the map of column families.
	 */
@SuppressWarnings("rawtypes, unchecked")
protected <N, S> ColumnFamilyHandle getColumnFamily(StateDescriptor<?, S> descriptor, TypeSerializer<N> namespaceSerializer) throws IOException {
    Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<?, ?>> stateInfo = kvStateInformation.get(descriptor.getName());
    RegisteredBackendStateMetaInfo<N, S> newMetaInfo = new RegisteredBackendStateMetaInfo<>(descriptor.getType(), descriptor.getName(), namespaceSerializer, descriptor.getSerializer());
    if (stateInfo != null) {
        if (newMetaInfo.isCompatibleWith(stateInfo.f1)) {
            stateInfo.f1 = newMetaInfo;
            return stateInfo.f0;
        } else {
            throw new IOException("Trying to access state using wrong meta info, was " + stateInfo.f1 + " trying access with " + newMetaInfo);
        }
    }
    ColumnFamilyDescriptor columnDescriptor = new ColumnFamilyDescriptor(descriptor.getName().getBytes(ConfigConstants.DEFAULT_CHARSET), columnOptions);
    try {
        ColumnFamilyHandle columnFamily = db.createColumnFamily(columnDescriptor);
        Tuple2<ColumnFamilyHandle, RegisteredBackendStateMetaInfo<N, S>> tuple = new Tuple2<>(columnFamily, newMetaInfo);
        Map rawAccess = kvStateInformation;
        rawAccess.put(descriptor.getName(), tuple);
        return columnFamily;
    } catch (RocksDBException e) {
        throw new IOException("Error creating ColumnFamilyHandle.", e);
    }
}
Also used : RocksDBException(org.rocksdb.RocksDBException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RegisteredBackendStateMetaInfo(org.apache.flink.runtime.state.RegisteredBackendStateMetaInfo) IOException(java.io.IOException) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) Map(java.util.Map) HashMap(java.util.HashMap) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle)

Example 78 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class RocksDBMergeIteratorTest method testMergeIterator.

public void testMergeIterator(int maxParallelism) throws Exception {
    Random random = new Random(1234);
    File tmpDir = CommonTestUtils.createTempDirectory();
    RocksDB rocksDB = RocksDB.open(tmpDir.getAbsolutePath());
    try {
        List<Tuple2<RocksIterator, Integer>> rocksIteratorsWithKVStateId = new ArrayList<>();
        List<Tuple2<ColumnFamilyHandle, Integer>> columnFamilyHandlesWithKeyCount = new ArrayList<>();
        int totalKeysExpected = 0;
        for (int c = 0; c < NUM_KEY_VAL_STATES; ++c) {
            ColumnFamilyHandle handle = rocksDB.createColumnFamily(new ColumnFamilyDescriptor(("column-" + c).getBytes(ConfigConstants.DEFAULT_CHARSET)));
            ByteArrayOutputStreamWithPos bos = new ByteArrayOutputStreamWithPos();
            DataOutputStream dos = new DataOutputStream(bos);
            int numKeys = random.nextInt(MAX_NUM_KEYS + 1);
            for (int i = 0; i < numKeys; ++i) {
                if (maxParallelism <= Byte.MAX_VALUE) {
                    dos.writeByte(i);
                } else {
                    dos.writeShort(i);
                }
                dos.writeInt(i);
                byte[] key = bos.toByteArray();
                byte[] val = new byte[] { 42 };
                rocksDB.put(handle, key, val);
                bos.reset();
            }
            columnFamilyHandlesWithKeyCount.add(new Tuple2<>(handle, numKeys));
            totalKeysExpected += numKeys;
        }
        int id = 0;
        for (Tuple2<ColumnFamilyHandle, Integer> columnFamilyHandle : columnFamilyHandlesWithKeyCount) {
            rocksIteratorsWithKVStateId.add(new Tuple2<>(rocksDB.newIterator(columnFamilyHandle.f0), id));
            ++id;
        }
        RocksDBKeyedStateBackend.RocksDBMergeIterator mergeIterator = new RocksDBKeyedStateBackend.RocksDBMergeIterator(rocksIteratorsWithKVStateId, maxParallelism <= Byte.MAX_VALUE ? 1 : 2);
        int prevKVState = -1;
        int prevKey = -1;
        int prevKeyGroup = -1;
        int totalKeysActual = 0;
        while (mergeIterator.isValid()) {
            ByteBuffer bb = ByteBuffer.wrap(mergeIterator.key());
            int keyGroup = maxParallelism > Byte.MAX_VALUE ? bb.getShort() : bb.get();
            int key = bb.getInt();
            Assert.assertTrue(keyGroup >= prevKeyGroup);
            Assert.assertTrue(key >= prevKey);
            Assert.assertEquals(prevKeyGroup != keyGroup, mergeIterator.isNewKeyGroup());
            Assert.assertEquals(prevKVState != mergeIterator.kvStateId(), mergeIterator.isNewKeyValueState());
            prevKeyGroup = keyGroup;
            prevKVState = mergeIterator.kvStateId();
            //System.out.println(keyGroup + " " + key + " " + mergeIterator.kvStateId());
            mergeIterator.next();
            ++totalKeysActual;
        }
        Assert.assertEquals(totalKeysExpected, totalKeysActual);
        for (Tuple2<ColumnFamilyHandle, Integer> handleWithCount : columnFamilyHandlesWithKeyCount) {
            rocksDB.dropColumnFamily(handleWithCount.f0);
        }
    } finally {
        rocksDB.close();
    }
}
Also used : RocksDB(org.rocksdb.RocksDB) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ColumnFamilyDescriptor(org.rocksdb.ColumnFamilyDescriptor) ByteBuffer(java.nio.ByteBuffer) ColumnFamilyHandle(org.rocksdb.ColumnFamilyHandle) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) File(java.io.File) ByteArrayOutputStreamWithPos(org.apache.flink.core.memory.ByteArrayOutputStreamWithPos)

Example 79 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class HBaseWriteExample method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    if (!parseParameters(args)) {
        return;
    }
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = getTextDataSet(env);
    DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1)
    text.flatMap(new Tokenizer()).groupBy(0).sum(1);
    // emit result
    Job job = Job.getInstance();
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, outputTableName);
    // TODO is "mapred.output.dir" really useful?
    job.getConfiguration().set("mapred.output.dir", HBaseFlinkTestConstants.TMP_DIR);
    counts.map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<Text, Mutation>>() {

        private transient Tuple2<Text, Mutation> reuse;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            reuse = new Tuple2<Text, Mutation>();
        }

        @Override
        public Tuple2<Text, Mutation> map(Tuple2<String, Integer> t) throws Exception {
            reuse.f0 = new Text(t.f0);
            Put put = new Put(t.f0.getBytes(ConfigConstants.DEFAULT_CHARSET));
            put.add(HBaseFlinkTestConstants.CF_SOME, HBaseFlinkTestConstants.Q_SOME, Bytes.toBytes(t.f1));
            reuse.f1 = put;
            return reuse;
        }
    }).output(new HadoopOutputFormat<Text, Mutation>(new TableOutputFormat<Text>(), job));
    // execute program
    env.execute("WordCount (HBase sink) Example");
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Text(org.apache.hadoop.io.Text) Put(org.apache.hadoop.hbase.client.Put) TableOutputFormat(org.apache.hadoop.hbase.mapreduce.TableOutputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Mutation(org.apache.hadoop.hbase.client.Mutation) Job(org.apache.hadoop.mapreduce.Job)

Example 80 with Tuple2

use of org.apache.flink.api.java.tuple.Tuple2 in project flink by apache.

the class SpoutSplitExample method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
    boolean useFile = SpoutSplitExample.parseParameters(args);
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    String[] rawOutputs = new String[] { RandomSpout.EVEN_STREAM, RandomSpout.ODD_STREAM };
    final DataStream<SplitStreamType<Integer>> numbers = env.addSource(new SpoutWrapper<SplitStreamType<Integer>>(new RandomSpout(true, seed), rawOutputs, 1000), TypeExtractor.getForObject(new SplitStreamType<Integer>()));
    SplitStream<SplitStreamType<Integer>> splitStream = numbers.split(new StormStreamSelector<Integer>());
    DataStream<SplitStreamType<Integer>> evenStream = splitStream.select(RandomSpout.EVEN_STREAM);
    DataStream<SplitStreamType<Integer>> oddStream = splitStream.select(RandomSpout.ODD_STREAM);
    DataStream<Tuple2<String, Integer>> evenResult = evenStream.map(new SplitStreamMapper<Integer>()).returns(Integer.class).map(new Enrich(true));
    DataStream<Tuple2<String, Integer>> oddResult = oddStream.map(new SplitStreamMapper<Integer>()).transform("oddBolt", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Integer, Tuple2<String, Integer>>(new VerifyAndEnrichBolt(false)));
    if (useFile) {
        evenResult.writeAsText(outputPath + "/even");
        oddResult.writeAsText(outputPath + "/odd");
    } else {
        evenResult.print();
        oddResult.print();
    }
    // execute program
    env.execute("Spout split stream example");
}
Also used : RandomSpout(org.apache.flink.storm.split.operators.RandomSpout) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SplitStreamMapper(org.apache.flink.storm.util.SplitStreamMapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) VerifyAndEnrichBolt(org.apache.flink.storm.split.operators.VerifyAndEnrichBolt) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Aggregations

Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1159 Test (org.junit.Test)871 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)486 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)266 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)195 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)137 ArrayList (java.util.ArrayList)136 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)103 Plan (org.apache.flink.api.common.Plan)103 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)103 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)99 Configuration (org.apache.flink.configuration.Configuration)87 List (java.util.List)82 IOException (java.io.IOException)79 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)77 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)74 HashMap (java.util.HashMap)72 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)66 Collection (java.util.Collection)61 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)60