Search in sources :

Example 1 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class PythonOperationInfo method normalizeKeys.

//====Utility=======================================================================================================
private static String[] normalizeKeys(Object keys) {
    if (keys instanceof Tuple) {
        Tuple tupleKeys = (Tuple) keys;
        if (tupleKeys.getArity() == 0) {
            return new String[0];
        }
        if (tupleKeys.getField(0) instanceof Integer) {
            String[] stringKeys = new String[tupleKeys.getArity()];
            for (int x = 0; x < stringKeys.length; x++) {
                stringKeys[x] = "f0.f" + (Integer) tupleKeys.getField(x);
            }
            return stringKeys;
        }
        if (tupleKeys.getField(0) instanceof String) {
            return tupleToStringArray(tupleKeys);
        }
        throw new RuntimeException("Key argument contains field that is neither an int nor a String: " + tupleKeys);
    }
    if (keys instanceof int[]) {
        int[] intKeys = (int[]) keys;
        String[] stringKeys = new String[intKeys.length];
        for (int x = 0; x < stringKeys.length; x++) {
            stringKeys[x] = "f0.f" + intKeys[x];
        }
        return stringKeys;
    }
    throw new RuntimeException("Key argument is neither an int[] nor a Tuple: " + keys.toString());
}
Also used : Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 2 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class LargeRecordHandler method finishWriteAndSortKeys.

public MutableObjectIterator<T> finishWriteAndSortKeys(List<MemorySegment> memory) throws IOException {
    if (recordsOutFile == null || keysOutFile == null) {
        throw new IllegalStateException("The LargeRecordHandler has not spilled any records");
    }
    // close the writers and 
    final int lastBlockBytesKeys;
    final int lastBlockBytesRecords;
    recordsOutFile.close();
    keysOutFile.close();
    lastBlockBytesKeys = keysOutFile.getBytesInLatestSegment();
    lastBlockBytesRecords = recordsOutFile.getBytesInLatestSegment();
    recordsOutFile = null;
    keysOutFile = null;
    final int pagesForReaders = Math.max(3 * MIN_SEGMENTS_FOR_KEY_SPILLING, Math.min(2 * MAX_SEGMENTS_FOR_KEY_SPILLING, memory.size() / 50));
    final int pagesForKeyReader = Math.min(pagesForReaders - MIN_SEGMENTS_FOR_KEY_SPILLING, MAX_SEGMENTS_FOR_KEY_SPILLING);
    final int pagesForRecordReader = pagesForReaders - pagesForKeyReader;
    // grab memory for the record reader
    ArrayList<MemorySegment> memForRecordReader = new ArrayList<MemorySegment>();
    ArrayList<MemorySegment> memForKeysReader = new ArrayList<MemorySegment>();
    for (int i = 0; i < pagesForRecordReader; i++) {
        memForRecordReader.add(memory.remove(memory.size() - 1));
    }
    for (int i = 0; i < pagesForKeyReader; i++) {
        memForKeysReader.add(memory.remove(memory.size() - 1));
    }
    keysReader = new FileChannelInputView(ioManager.createBlockChannelReader(keysChannel), memManager, memForKeysReader, lastBlockBytesKeys);
    InputViewIterator<Tuple> keyIterator = new InputViewIterator<Tuple>(keysReader, keySerializer);
    keySorter = new UnilateralSortMerger<Tuple>(memManager, memory, ioManager, keyIterator, memoryOwner, keySerializerFactory, keyComparator, 1, maxFilehandles, 1.0f, false, this.executionConfig.isObjectReuseEnabled());
    // wait for the sorter to sort the keys
    MutableObjectIterator<Tuple> result;
    try {
        result = keySorter.getIterator();
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
    recordsReader = new SeekableFileChannelInputView(ioManager, recordsChannel, memManager, memForRecordReader, lastBlockBytesRecords);
    return new FetchingIterator<T>(serializer, result, recordsReader, keySerializer, numKeyFields);
}
Also used : SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MemorySegment(org.apache.flink.core.memory.MemorySegment) InputViewIterator(org.apache.flink.runtime.io.disk.InputViewIterator) SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) FileChannelInputView(org.apache.flink.runtime.io.disk.FileChannelInputView) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 3 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class LargeRecordHandler method addRecord.

// --------------------------------------------------------------------------------------------
@SuppressWarnings("unchecked")
public long addRecord(T record) throws IOException {
    if (recordsOutFile == null) {
        if (closed) {
            throw new IllegalStateException("The large record handler has been closed.");
        }
        if (recordsReader != null) {
            throw new IllegalStateException("The handler has already switched to sorting.");
        }
        LOG.debug("Initializing the large record spilling...");
        // initialize the utilities
        {
            final TypeComparator<?>[] keyComps = comparator.getFlatComparators();
            numKeyFields = keyComps.length;
            Object[] keyHolder = new Object[numKeyFields];
            comparator.extractKeys(record, keyHolder, 0);
            TypeSerializer<?>[] keySers = new TypeSerializer<?>[numKeyFields];
            TypeSerializer<?>[] tupleSers = new TypeSerializer<?>[numKeyFields + 1];
            int[] keyPos = new int[numKeyFields];
            for (int i = 0; i < numKeyFields; i++) {
                keyPos[i] = i;
                keySers[i] = createSerializer(keyHolder[i], i);
                tupleSers[i] = keySers[i];
            }
            // add the long serializer for the offset
            tupleSers[numKeyFields] = LongSerializer.INSTANCE;
            keySerializer = new TupleSerializer<Tuple>((Class<Tuple>) Tuple.getTupleClass(numKeyFields + 1), tupleSers);
            keyComparator = new TupleComparator<Tuple>(keyPos, keyComps, keySers);
            keySerializerFactory = new RuntimeSerializerFactory<Tuple>(keySerializer, keySerializer.getTupleClass());
            keyTuple = keySerializer.createInstance();
        }
        // initialize the spilling
        final int totalNumSegments = memory.size();
        final int segmentsForKeys = (totalNumSegments >= 2 * MAX_SEGMENTS_FOR_KEY_SPILLING) ? MAX_SEGMENTS_FOR_KEY_SPILLING : Math.max(MIN_SEGMENTS_FOR_KEY_SPILLING, totalNumSegments - MAX_SEGMENTS_FOR_KEY_SPILLING);
        List<MemorySegment> recordsMemory = new ArrayList<MemorySegment>();
        List<MemorySegment> keysMemory = new ArrayList<MemorySegment>();
        for (int i = 0; i < segmentsForKeys; i++) {
            keysMemory.add(memory.get(i));
        }
        for (int i = segmentsForKeys; i < totalNumSegments; i++) {
            recordsMemory.add(memory.get(i));
        }
        recordsChannel = ioManager.createChannel();
        keysChannel = ioManager.createChannel();
        recordsOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(recordsChannel), memManager, recordsMemory, memManager.getPageSize());
        keysOutFile = new FileChannelOutputView(ioManager.createBlockChannelWriter(keysChannel), memManager, keysMemory, memManager.getPageSize());
    }
    final long offset = recordsOutFile.getWriteOffset();
    if (offset < 0) {
        throw new RuntimeException("wrong offset");
    }
    Object[] keyHolder = new Object[numKeyFields];
    comparator.extractKeys(record, keyHolder, 0);
    for (int i = 0; i < numKeyFields; i++) {
        keyTuple.setField(keyHolder[i], i);
    }
    keyTuple.setField(offset, numKeyFields);
    keySerializer.serialize(keyTuple, keysOutFile);
    serializer.serialize(record, recordsOutFile);
    recordCounter++;
    return offset;
}
Also used : FileChannelOutputView(org.apache.flink.runtime.io.disk.FileChannelOutputView) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) ArrayList(java.util.ArrayList) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 4 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class TimeWindowTranslationTest method testApplyAlignedTimeWindows.

/**
	 * These tests ensure that the fast aligned time windows operator is used if the
	 * conditions are right.
	 */
@Test
public void testApplyAlignedTimeWindows() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));
    DataStream<Tuple2<String, Integer>> window1 = source.keyBy(0).window(TumblingAlignedProcessingTimeWindows.of(Time.of(1000, TimeUnit.MILLISECONDS))).apply(new WindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple, TimeWindow>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception {
        }
    });
    OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform1 = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
    OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator1 = transform1.getOperator();
    Assert.assertTrue(operator1 instanceof AccumulatingProcessingTimeWindowOperator);
}
Also used : TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 5 with Tuple

use of org.apache.flink.api.java.tuple.Tuple in project flink by apache.

the class WindowTranslationTest method testReduceWithEvictorAndProcessFunction.

@Test
@SuppressWarnings("rawtypes")
public void testReduceWithEvictorAndProcessFunction() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
    DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));
    DummyReducer reducer = new DummyReducer();
    DataStream<Tuple2<String, Integer>> window1 = source.keyBy(0).window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))).evictor(CountEvictor.of(100)).reduce(reducer, new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple, TimeWindow>() {

        @Override
        public void process(Tuple tuple, Context context, Iterable<Tuple2<String, Integer>> elements, Collector<Tuple2<String, Integer>> out) throws Exception {
            for (Tuple2<String, Integer> in : elements) {
                out.collect(in);
            }
        }
    });
    OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
    OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
    Assert.assertTrue(operator instanceof EvictingWindowOperator);
    EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?> winOperator = (EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?>) operator;
    Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
    Assert.assertTrue(winOperator.getEvictor() instanceof CountEvictor);
    Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
    Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);
    processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
Also used : SlidingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) CountEvictor(org.apache.flink.streaming.api.windowing.evictors.CountEvictor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Tuple(org.apache.flink.api.java.tuple.Tuple) EventTimeTrigger(org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger) Test(org.junit.Test)

Aggregations

Tuple (org.apache.flink.api.java.tuple.Tuple)59 Test (org.junit.Test)38 AbstractTest (org.apache.flink.storm.util.AbstractTest)17 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)13 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)10 ArrayList (java.util.ArrayList)9 Configuration (org.apache.flink.configuration.Configuration)8 SuccessException (org.apache.flink.test.util.SuccessException)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 Fields (org.apache.storm.tuple.Fields)6 Tuple4 (org.apache.flink.api.java.tuple.Tuple4)5 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)5 Keys (org.apache.flink.api.common.operators.Keys)4 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)4 ComparableAggregator (org.apache.flink.streaming.api.functions.aggregation.ComparableAggregator)4 Values (org.apache.storm.tuple.Values)4