Search in sources :

Example 6 with WriteMetadataEvent

use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.

the class BulkInsertWriteFunction method endInput.

/**
 * End input action for batch source.
 */
public void endInput() {
    final List<WriteStatus> writeStatus = this.writerHelper.getWriteStatuses(this.taskID);
    final WriteMetadataEvent event = WriteMetadataEvent.builder().taskID(taskID).instantTime(this.writerHelper.getInstantTime()).writeStatus(writeStatus).lastBatch(true).endInput(true).build();
    this.eventGateway.sendEventToCoordinator(event);
}
Also used : WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 7 with WriteMetadataEvent

use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.

the class AbstractStreamWriteFunction method restoreWriteMetadata.

// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------
private void restoreWriteMetadata() throws Exception {
    String lastInflight = lastPendingInstant();
    boolean eventSent = false;
    for (WriteMetadataEvent event : this.writeMetadataState.get()) {
        if (Objects.equals(lastInflight, event.getInstantTime())) {
            // The checkpoint succeed but the meta does not commit,
            // re-commit the inflight instant
            this.eventGateway.sendEventToCoordinator(event);
            LOG.info("Send uncommitted write metadata event to coordinator, task[{}].", taskID);
            eventSent = true;
        }
    }
    if (!eventSent) {
        sendBootstrapEvent();
    }
}
Also used : WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent)

Example 8 with WriteMetadataEvent

use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.

the class StreamWriteFunction method flushBucket.

@SuppressWarnings("unchecked, rawtypes")
private boolean flushBucket(DataBucket bucket) {
    String instant = instantToWrite(true);
    if (instant == null) {
        // in case there are empty checkpoints that has no input data
        LOG.info("No inflight instant when flushing data, skip.");
        return false;
    }
    List<HoodieRecord> records = bucket.writeBuffer();
    ValidationUtils.checkState(records.size() > 0, "Data bucket to flush has no buffering records");
    if (config.getBoolean(FlinkOptions.PRE_COMBINE)) {
        records = FlinkWriteHelper.newInstance().deduplicateRecords(records, (HoodieIndex) null, -1);
    }
    bucket.preWrite(records);
    final List<WriteStatus> writeStatus = new ArrayList<>(writeFunction.apply(records, instant));
    records.clear();
    final WriteMetadataEvent event = WriteMetadataEvent.builder().taskID(taskID).instantTime(// the write instant may shift but the event still use the currentInstant.
    instant).writeStatus(writeStatus).lastBatch(false).endInput(false).build();
    this.eventGateway.sendEventToCoordinator(event);
    writeStatuses.addAll(writeStatus);
    return true;
}
Also used : HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 9 with WriteMetadataEvent

use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.

the class TestStreamWriteOperatorCoordinator method testSyncMetadataTable.

@Test
void testSyncMetadataTable() throws Exception {
    // reset
    reset();
    // override the default configuration
    Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
    conf.setBoolean(FlinkOptions.METADATA_ENABLED, true);
    conf.setInteger(FlinkOptions.METADATA_COMPACTION_DELTA_COMMITS, 5);
    OperatorCoordinator.Context context = new MockOperatorCoordinatorContext(new OperatorID(), 1);
    coordinator = new StreamWriteOperatorCoordinator(conf, context);
    coordinator.start();
    coordinator.setExecutor(new MockCoordinatorExecutor(context));
    final WriteMetadataEvent event0 = WriteMetadataEvent.emptyBootstrap(0);
    coordinator.handleEventFromOperator(0, event0);
    String instant = coordinator.getInstant();
    assertNotEquals("", instant);
    final String metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(tempFile.getAbsolutePath());
    HoodieTableMetaClient metadataTableMetaClient = StreamerUtil.createMetaClient(metadataTableBasePath);
    HoodieTimeline completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(1L));
    assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP));
    // write another 3 commits
    for (int i = 1; i < 4; i++) {
        instant = mockWriteWithMetadata();
        metadataTableMetaClient.reloadActiveTimeline();
        completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
        assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(i + 1L));
        assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant));
    }
    // the 5th commit triggers the compaction
    instant = mockWriteWithMetadata();
    metadataTableMetaClient.reloadActiveTimeline();
    completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(6L));
    assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "001"));
    assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
    // write another 2 commits
    for (int i = 6; i < 8; i++) {
        instant = mockWriteWithMetadata();
        metadataTableMetaClient.reloadActiveTimeline();
        completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
        assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(i + 1L));
        assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant));
    }
    // write another commit to trigger clean
    instant = mockWriteWithMetadata();
    metadataTableMetaClient.reloadActiveTimeline();
    completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(10L));
    assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "002"));
    assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.CLEAN_ACTION));
    // write another commit
    mockWriteWithMetadata();
    // write another commit to trigger compaction
    instant = mockWriteWithMetadata();
    metadataTableMetaClient.reloadActiveTimeline();
    completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
    assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(13L));
    assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "001"));
    assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.flink.configuration.Configuration) MockOperatorCoordinatorContext(org.apache.flink.runtime.operators.coordination.MockOperatorCoordinatorContext) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) MockCoordinatorExecutor(org.apache.hudi.sink.utils.MockCoordinatorExecutor) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent) Test(org.junit.jupiter.api.Test)

Example 10 with WriteMetadataEvent

use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.

the class TestStreamWriteOperatorCoordinator method testHiveSyncInvoked.

@Test
public void testHiveSyncInvoked() throws Exception {
    // reset
    reset();
    // override the default configuration
    Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
    conf.setBoolean(FlinkOptions.HIVE_SYNC_ENABLED, true);
    OperatorCoordinator.Context context = new MockOperatorCoordinatorContext(new OperatorID(), 1);
    coordinator = new StreamWriteOperatorCoordinator(conf, context);
    coordinator.start();
    coordinator.setExecutor(new MockCoordinatorExecutor(context));
    final WriteMetadataEvent event0 = WriteMetadataEvent.emptyBootstrap(0);
    coordinator.handleEventFromOperator(0, event0);
    String instant = mockWriteWithMetadata();
    assertNotEquals("", instant);
    // never throw for hive synchronization now
    assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(1));
}
Also used : Configuration(org.apache.flink.configuration.Configuration) MockOperatorCoordinatorContext(org.apache.flink.runtime.operators.coordination.MockOperatorCoordinatorContext) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) MockCoordinatorExecutor(org.apache.hudi.sink.utils.MockCoordinatorExecutor) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) WriteMetadataEvent(org.apache.hudi.sink.event.WriteMetadataEvent) Test(org.junit.jupiter.api.Test)

Aggregations

WriteMetadataEvent (org.apache.hudi.sink.event.WriteMetadataEvent)10 WriteStatus (org.apache.hudi.client.WriteStatus)4 Configuration (org.apache.flink.configuration.Configuration)2 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)2 MockOperatorCoordinatorContext (org.apache.flink.runtime.operators.coordination.MockOperatorCoordinatorContext)2 OperatorCoordinator (org.apache.flink.runtime.operators.coordination.OperatorCoordinator)2 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)2 HoodieIndex (org.apache.hudi.index.HoodieIndex)2 MockCoordinatorExecutor (org.apache.hudi.sink.utils.MockCoordinatorExecutor)2 Test (org.junit.jupiter.api.Test)2 ArrayList (java.util.ArrayList)1 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)1 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)1 HoodieException (org.apache.hudi.exception.HoodieException)1