use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.
the class BulkInsertWriteFunction method endInput.
/**
* End input action for batch source.
*/
public void endInput() {
final List<WriteStatus> writeStatus = this.writerHelper.getWriteStatuses(this.taskID);
final WriteMetadataEvent event = WriteMetadataEvent.builder().taskID(taskID).instantTime(this.writerHelper.getInstantTime()).writeStatus(writeStatus).lastBatch(true).endInput(true).build();
this.eventGateway.sendEventToCoordinator(event);
}
use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.
the class AbstractStreamWriteFunction method restoreWriteMetadata.
// -------------------------------------------------------------------------
// Utilities
// -------------------------------------------------------------------------
private void restoreWriteMetadata() throws Exception {
String lastInflight = lastPendingInstant();
boolean eventSent = false;
for (WriteMetadataEvent event : this.writeMetadataState.get()) {
if (Objects.equals(lastInflight, event.getInstantTime())) {
// The checkpoint succeed but the meta does not commit,
// re-commit the inflight instant
this.eventGateway.sendEventToCoordinator(event);
LOG.info("Send uncommitted write metadata event to coordinator, task[{}].", taskID);
eventSent = true;
}
}
if (!eventSent) {
sendBootstrapEvent();
}
}
use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.
the class StreamWriteFunction method flushBucket.
@SuppressWarnings("unchecked, rawtypes")
private boolean flushBucket(DataBucket bucket) {
String instant = instantToWrite(true);
if (instant == null) {
// in case there are empty checkpoints that has no input data
LOG.info("No inflight instant when flushing data, skip.");
return false;
}
List<HoodieRecord> records = bucket.writeBuffer();
ValidationUtils.checkState(records.size() > 0, "Data bucket to flush has no buffering records");
if (config.getBoolean(FlinkOptions.PRE_COMBINE)) {
records = FlinkWriteHelper.newInstance().deduplicateRecords(records, (HoodieIndex) null, -1);
}
bucket.preWrite(records);
final List<WriteStatus> writeStatus = new ArrayList<>(writeFunction.apply(records, instant));
records.clear();
final WriteMetadataEvent event = WriteMetadataEvent.builder().taskID(taskID).instantTime(// the write instant may shift but the event still use the currentInstant.
instant).writeStatus(writeStatus).lastBatch(false).endInput(false).build();
this.eventGateway.sendEventToCoordinator(event);
writeStatuses.addAll(writeStatus);
return true;
}
use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.
the class TestStreamWriteOperatorCoordinator method testSyncMetadataTable.
@Test
void testSyncMetadataTable() throws Exception {
// reset
reset();
// override the default configuration
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
conf.setBoolean(FlinkOptions.METADATA_ENABLED, true);
conf.setInteger(FlinkOptions.METADATA_COMPACTION_DELTA_COMMITS, 5);
OperatorCoordinator.Context context = new MockOperatorCoordinatorContext(new OperatorID(), 1);
coordinator = new StreamWriteOperatorCoordinator(conf, context);
coordinator.start();
coordinator.setExecutor(new MockCoordinatorExecutor(context));
final WriteMetadataEvent event0 = WriteMetadataEvent.emptyBootstrap(0);
coordinator.handleEventFromOperator(0, event0);
String instant = coordinator.getInstant();
assertNotEquals("", instant);
final String metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(tempFile.getAbsolutePath());
HoodieTableMetaClient metadataTableMetaClient = StreamerUtil.createMetaClient(metadataTableBasePath);
HoodieTimeline completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(1L));
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP));
// write another 3 commits
for (int i = 1; i < 4; i++) {
instant = mockWriteWithMetadata();
metadataTableMetaClient.reloadActiveTimeline();
completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(i + 1L));
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant));
}
// the 5th commit triggers the compaction
instant = mockWriteWithMetadata();
metadataTableMetaClient.reloadActiveTimeline();
completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(6L));
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "001"));
assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
// write another 2 commits
for (int i = 6; i < 8; i++) {
instant = mockWriteWithMetadata();
metadataTableMetaClient.reloadActiveTimeline();
completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedInstants();
assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(i + 1L));
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant));
}
// write another commit to trigger clean
instant = mockWriteWithMetadata();
metadataTableMetaClient.reloadActiveTimeline();
completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(10L));
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "002"));
assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.CLEAN_ACTION));
// write another commit
mockWriteWithMetadata();
// write another commit to trigger compaction
instant = mockWriteWithMetadata();
metadataTableMetaClient.reloadActiveTimeline();
completedTimeline = metadataTableMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
assertThat("One instant need to sync to metadata table", completedTimeline.getInstants().count(), is(13L));
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is(instant + "001"));
assertThat(completedTimeline.lastInstant().get().getAction(), is(HoodieTimeline.COMMIT_ACTION));
}
use of org.apache.hudi.sink.event.WriteMetadataEvent in project hudi by apache.
the class TestStreamWriteOperatorCoordinator method testHiveSyncInvoked.
@Test
public void testHiveSyncInvoked() throws Exception {
// reset
reset();
// override the default configuration
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
conf.setBoolean(FlinkOptions.HIVE_SYNC_ENABLED, true);
OperatorCoordinator.Context context = new MockOperatorCoordinatorContext(new OperatorID(), 1);
coordinator = new StreamWriteOperatorCoordinator(conf, context);
coordinator.start();
coordinator.setExecutor(new MockCoordinatorExecutor(context));
final WriteMetadataEvent event0 = WriteMetadataEvent.emptyBootstrap(0);
coordinator.handleEventFromOperator(0, event0);
String instant = mockWriteWithMetadata();
assertNotEquals("", instant);
// never throw for hive synchronization now
assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(1));
}
Aggregations