Search in sources :

Example 1 with InputFile

use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile in project flink by apache.

the class CompactCoordinator method processElement.

@Override
public void processElement(StreamRecord<CoordinatorInput> element) throws Exception {
    CoordinatorInput value = element.getValue();
    if (value instanceof InputFile) {
        InputFile file = (InputFile) value;
        currentInputFiles.computeIfAbsent(file.getPartition(), k -> new ArrayList<>()).add(file.getFile());
    } else if (value instanceof EndCheckpoint) {
        EndCheckpoint endCheckpoint = (EndCheckpoint) value;
        if (inputTaskTracker == null) {
            inputTaskTracker = new TaskTracker(endCheckpoint.getNumberOfTasks());
        }
        // ensure all files are ready to be compacted.
        boolean triggerCommit = inputTaskTracker.add(endCheckpoint.getCheckpointId(), endCheckpoint.getTaskId());
        if (triggerCommit) {
            commitUpToCheckpoint(endCheckpoint.getCheckpointId());
        }
    } else {
        throw new UnsupportedOperationException("Unsupported input message: " + value);
    }
}
Also used : EndCompaction(org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCompaction) CompactionUnit(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit) BinPacking(org.apache.flink.connector.file.table.BinPacking) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) Function(java.util.function.Function) ArrayList(java.util.ArrayList) ListState(org.apache.flink.api.common.state.ListState) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Path(org.apache.flink.core.fs.Path) Map(java.util.Map) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) LongSerializer(org.apache.flink.api.common.typeutils.base.LongSerializer) EndCheckpoint(org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCheckpoint) TaskTracker(org.apache.flink.connector.file.table.stream.TaskTracker) CoordinatorOutput(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) Logger(org.slf4j.Logger) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) IOException(java.io.IOException) CoordinatorInput(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorInput) InputFile(org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) FileSystem(org.apache.flink.core.fs.FileSystem) TreeMap(java.util.TreeMap) MapSerializer(org.apache.flink.api.common.typeutils.base.MapSerializer) Internal(org.apache.flink.annotation.Internal) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Comparator(java.util.Comparator) ListSerializer(org.apache.flink.api.common.typeutils.base.ListSerializer) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SupplierWithException(org.apache.flink.util.function.SupplierWithException) TaskTracker(org.apache.flink.connector.file.table.stream.TaskTracker) CoordinatorInput(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorInput) EndCheckpoint(org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCheckpoint) ArrayList(java.util.ArrayList) InputFile(org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile)

Example 2 with InputFile

use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile in project flink by apache.

the class CompactFileWriterTest method testEmitEndCheckpointAfterEndInput.

@Test
public void testEmitEndCheckpointAfterEndInput() throws Exception {
    CompactFileWriter<RowData> compactFileWriter = new CompactFileWriter<>(1000, StreamingFileSink.forRowFormat(folder, new SimpleStringEncoder<>()));
    try (OneInputStreamOperatorTestHarness<RowData, CoordinatorInput> harness = new OneInputStreamOperatorTestHarness<>(compactFileWriter)) {
        harness.setup();
        harness.open();
        harness.processElement(row("test"), 0);
        harness.snapshot(1, 1);
        harness.notifyOfCompletedCheckpoint(1);
        List<CoordinatorInput> coordinatorInputs = harness.extractOutputValues();
        Assert.assertEquals(2, coordinatorInputs.size());
        // assert emit InputFile
        Assert.assertTrue(coordinatorInputs.get(0) instanceof InputFile);
        // assert emit EndCheckpoint
        Assert.assertEquals(1, ((EndCheckpoint) coordinatorInputs.get(1)).getCheckpointId());
        harness.processElement(row("test1"), 0);
        harness.processElement(row("test2"), 0);
        harness.getOutput().clear();
        // end input
        harness.endInput();
        coordinatorInputs = harness.extractOutputValues();
        // assert emit EndCheckpoint with Long.MAX_VALUE lastly
        EndCheckpoint endCheckpoint = (EndCheckpoint) coordinatorInputs.get(coordinatorInputs.size() - 1);
        Assert.assertEquals(Long.MAX_VALUE, endCheckpoint.getCheckpointId());
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) CoordinatorInput(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorInput) EndCheckpoint(org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCheckpoint) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) SimpleStringEncoder(org.apache.flink.api.common.serialization.SimpleStringEncoder) InputFile(org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile) Test(org.junit.Test)

Example 3 with InputFile

use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile in project flink by apache.

the class CompactCoordinatorTest method testCoordinatorCrossCheckpoints.

@Test
public void testCoordinatorCrossCheckpoints() throws Exception {
    AtomicReference<OperatorSubtaskState> state = new AtomicReference<>();
    runCoordinator(harness -> {
        harness.setup();
        harness.open();
        harness.processElement(new InputFile("p0", newFile("f0", 3)), 0);
        harness.processElement(new InputFile("p0", newFile("f1", 2)), 0);
        harness.processElement(new InputFile("p1", newFile("f2", 2)), 0);
        harness.processElement(new InputFile("p0", newFile("f3", 5)), 0);
        harness.processElement(new InputFile("p0", newFile("f4", 1)), 0);
        harness.processElement(new InputFile("p1", newFile("f5", 5)), 0);
        harness.processElement(new InputFile("p1", newFile("f6", 4)), 0);
        state.set(harness.snapshot(1, 0));
    });
    runCoordinator(harness -> {
        harness.setup();
        harness.initializeState(state.get());
        harness.open();
        harness.processElement(new InputFile("p0", newFile("f7", 3)), 0);
        harness.processElement(new InputFile("p0", newFile("f8", 2)), 0);
        state.set(harness.snapshot(2, 0));
    });
    runCoordinator(harness -> {
        harness.setup();
        harness.initializeState(state.get());
        harness.open();
        harness.processElement(new EndCheckpoint(2, 0, 1), 0);
        List<CoordinatorOutput> outputs = harness.extractOutputValues();
        Assert.assertEquals(7, outputs.size());
        List<CompactionUnit> cp1Units = new ArrayList<>();
        for (int i = 0; i < 4; i++) {
            CoordinatorOutput output = outputs.get(i);
            Assert.assertTrue(output instanceof CompactionUnit);
            cp1Units.add((CompactionUnit) output);
        }
        cp1Units.sort(Comparator.comparing(CompactionUnit::getPartition).thenComparingInt(CompactionUnit::getUnitId));
        assertUnit(cp1Units.get(0), 0, "p0", Arrays.asList("f0", "f1", "f4"));
        assertUnit(cp1Units.get(1), 1, "p0", Collections.singletonList("f3"));
        assertUnit(cp1Units.get(2), 2, "p1", Arrays.asList("f2", "f5"));
        assertUnit(cp1Units.get(3), 3, "p1", Collections.singletonList("f6"));
        assertEndCompaction(outputs.get(4), 1);
        assertUnit(outputs.get(5), 0, "p0", Arrays.asList("f7", "f8"));
        assertEndCompaction(outputs.get(6), 2);
    });
}
Also used : CoordinatorOutput(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput) EndCheckpoint(org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCheckpoint) CompactionUnit(org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit) ArrayList(java.util.ArrayList) AtomicReference(java.util.concurrent.atomic.AtomicReference) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) EndCheckpoint(org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCheckpoint) InputFile(org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile) Test(org.junit.Test)

Aggregations

EndCheckpoint (org.apache.flink.connector.file.table.stream.compact.CompactMessages.EndCheckpoint)3 InputFile (org.apache.flink.connector.file.table.stream.compact.CompactMessages.InputFile)3 ArrayList (java.util.ArrayList)2 CompactionUnit (org.apache.flink.connector.file.table.stream.compact.CompactMessages.CompactionUnit)2 CoordinatorInput (org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorInput)2 CoordinatorOutput (org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 UncheckedIOException (java.io.UncheckedIOException)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Function (java.util.function.Function)1 Internal (org.apache.flink.annotation.Internal)1 SimpleStringEncoder (org.apache.flink.api.common.serialization.SimpleStringEncoder)1 ListState (org.apache.flink.api.common.state.ListState)1 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)1