Search in sources :

Example 11 with JoinedRowData

use of org.apache.flink.table.data.utils.JoinedRowData in project flink by apache.

the class String2SortMergeJoinOperatorTest method buildSortMergeJoin.

private TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> buildSortMergeJoin(StreamOperator operator) throws Exception {
    final TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 2, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, joinedInfo);
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.getStreamConfig().setStreamOperator(operator);
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    long initialTime = 0L;
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    testHarness.processElement(new StreamRecord<>(newRow("a", "0"), initialTime), 0, 0);
    testHarness.processElement(new StreamRecord<>(newRow("d", "0"), initialTime), 0, 0);
    testHarness.processElement(new StreamRecord<>(newRow("a", "2"), initialTime), 1, 1);
    testHarness.processElement(new StreamRecord<>(newRow("b", "1"), initialTime), 0, 1);
    testHarness.processElement(new StreamRecord<>(newRow("c", "2"), initialTime), 1, 1);
    testHarness.processElement(new StreamRecord<>(newRow("b", "4"), initialTime), 1, 0);
    testHarness.waitForInputProcessing();
    testHarness.endInput();
    return testHarness;
}
Also used : TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness)

Example 12 with JoinedRowData

use of org.apache.flink.table.data.utils.JoinedRowData in project flink by apache.

the class RankOperator method open.

@Override
public void open() throws Exception {
    super.open();
    ClassLoader cl = getUserCodeClassloader();
    inputSer = (AbstractRowDataSerializer) getOperatorConfig().getTypeSerializerIn1(cl);
    partitionByComp = partitionByGenComp.newInstance(cl);
    partitionByGenComp = null;
    orderByComp = orderByGenComp.newInstance(cl);
    orderByGenComp = null;
    if (outputRankFunColumn) {
        joinedRow = new JoinedRowData();
        rankValueRow = new GenericRowData(1);
    }
    collector = new StreamRecordCollector<>(output);
}
Also used : JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) GenericRowData(org.apache.flink.table.data.GenericRowData)

Example 13 with JoinedRowData

use of org.apache.flink.table.data.utils.JoinedRowData in project flink by apache.

the class RandomSortMergeInnerJoinTest method join.

public static LinkedBlockingQueue<Object> join(StreamOperator operator, MutableObjectIterator<Tuple2<Integer, String>> input1, MutableObjectIterator<Tuple2<Integer, String>> input2, boolean input1First) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), VarCharType.STRING_TYPE);
    InternalTypeInfo<RowData> joinedInfo = InternalTypeInfo.ofFields(new IntType(), VarCharType.STRING_TYPE, new IntType(), VarCharType.STRING_TYPE);
    final TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, joinedInfo);
    // Deep pit!!! Cause in TwoInputStreamTaskTestHarness, one record one buffer.
    testHarness.bufferSize = 10 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.getStreamConfig().setStreamOperator(operator);
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    long initialTime = 0L;
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    if (input1First) {
        Tuple2<Integer, String> tuple2 = new Tuple2<>();
        while ((tuple2 = input1.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 0, 0);
        }
        testHarness.waitForInputProcessing();
        tuple2 = new Tuple2<>();
        while ((tuple2 = input2.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 1, 0);
        }
        testHarness.waitForInputProcessing();
    } else {
        Tuple2<Integer, String> tuple2 = new Tuple2<>();
        while ((tuple2 = input2.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 1, 0);
        }
        testHarness.waitForInputProcessing();
        tuple2 = new Tuple2<>();
        while ((tuple2 = input1.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 0, 0);
        }
        testHarness.waitForInputProcessing();
    }
    testHarness.endInput();
    testHarness.waitForTaskCompletion();
    return testHarness.getOutput();
}
Also used : OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Example 14 with JoinedRowData

use of org.apache.flink.table.data.utils.JoinedRowData in project flink by apache.

the class Int2HashJoinOperatorTest method joinAndAssert.

@SuppressWarnings("unchecked")
static void joinAndAssert(Object operator, MutableObjectIterator<BinaryRowData> input1, MutableObjectIterator<BinaryRowData> input2, int expectOutSize, int expectOutKeySize, int expectOutVal, boolean semiJoin) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType());
    InternalTypeInfo<RowData> rowDataTypeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType(), new IntType(), new IntType());
    TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, rowDataTypeInfo);
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.setupOutputForSingletonOperatorChain();
    if (operator instanceof StreamOperator) {
        testHarness.getStreamConfig().setStreamOperator((StreamOperator<?>) operator);
    } else {
        testHarness.getStreamConfig().setStreamOperatorFactory((StreamOperatorFactory<?>) operator);
    }
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    Random random = new Random();
    do {
        BinaryRowData row1 = null;
        BinaryRowData row2 = null;
        if (random.nextInt(2) == 0) {
            row1 = input1.next();
            if (row1 == null) {
                row2 = input2.next();
            }
        } else {
            row2 = input2.next();
            if (row2 == null) {
                row1 = input1.next();
            }
        }
        if (row1 == null && row2 == null) {
            break;
        }
        if (row1 != null) {
            testHarness.processElement(new StreamRecord<>(row1), 0, 0);
        } else {
            testHarness.processElement(new StreamRecord<>(row2), 1, 0);
        }
    } while (true);
    testHarness.endInput(0, 0);
    testHarness.endInput(1, 0);
    testHarness.waitForInputProcessing();
    testHarness.waitForTaskCompletion();
    Queue<Object> actual = testHarness.getOutput();
    Assert.assertEquals("Output was not correct.", expectOutSize, actual.size());
    // Don't verify the output value when experOutVal is -1
    if (expectOutVal != -1) {
        if (semiJoin) {
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.getInt(0);
                int val = row.getInt(1);
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        } else {
            // create the map for validating the results
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.isNullAt(0) ? row.getInt(2) : row.getInt(0);
                int val1 = 0;
                int val2 = 0;
                if (!row.isNullAt(1)) {
                    val1 = row.getInt(1);
                }
                if (!row.isNullAt(3)) {
                    val2 = row.getInt(3);
                }
                int val = val1 + val2;
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        }
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HashMap(java.util.HashMap) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) Random(java.util.Random) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) HashMap(java.util.HashMap) Map(java.util.Map)

Example 15 with JoinedRowData

use of org.apache.flink.table.data.utils.JoinedRowData in project flink by apache.

the class MiniBatchGroupAggFunction method open.

@Override
public void open(ExecutionContext ctx) throws Exception {
    super.open(ctx);
    // instantiate function
    StateTtlConfig ttlConfig = createTtlConfig(stateRetentionTime);
    function = genAggsHandler.newInstance(ctx.getRuntimeContext().getUserCodeClassLoader());
    function.open(new PerKeyStateDataViewStore(ctx.getRuntimeContext(), ttlConfig));
    // instantiate equaliser
    equaliser = genRecordEqualiser.newInstance(ctx.getRuntimeContext().getUserCodeClassLoader());
    InternalTypeInfo<RowData> accTypeInfo = InternalTypeInfo.ofFields(accTypes);
    ValueStateDescriptor<RowData> accDesc = new ValueStateDescriptor<>("accState", accTypeInfo);
    if (ttlConfig.isEnabled()) {
        accDesc.enableTimeToLive(ttlConfig);
    }
    accState = ctx.getRuntimeContext().getState(accDesc);
    inputRowSerializer = InternalSerializers.create(inputType);
    resultRow = new JoinedRowData();
}
Also used : ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) StateTtlConfig(org.apache.flink.api.common.state.StateTtlConfig) PerKeyStateDataViewStore(org.apache.flink.table.runtime.dataview.PerKeyStateDataViewStore)

Aggregations

JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)47 RowData (org.apache.flink.table.data.RowData)22 GenericRowData (org.apache.flink.table.data.GenericRowData)17 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)12 PerKeyStateDataViewStore (org.apache.flink.table.runtime.dataview.PerKeyStateDataViewStore)12 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)11 ArrayList (java.util.ArrayList)7 List (java.util.List)7 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)7 ListTypeInfo (org.apache.flink.api.java.typeutils.ListTypeInfo)5 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)5 Test (org.junit.Test)5 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)4 StateTtlConfig (org.apache.flink.api.common.state.StateTtlConfig)4 Configuration (org.apache.flink.configuration.Configuration)4 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)3 TwoInputStreamTask (org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask)3 TwoInputStreamTaskTestHarness (org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness)3 StreamRecordRowDataWrappingCollector (org.apache.flink.table.runtime.operators.python.utils.StreamRecordRowDataWrappingCollector)3 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)3