use of org.apache.hudi.client.HoodieFlinkWriteClient in project hudi by apache.
the class FlinkHoodieBackedTableMetadataWriter method commit.
@Override
protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap, boolean canTriggerTableService) {
ValidationUtils.checkState(enabled, "Metadata table cannot be committed to as it is not enabled");
ValidationUtils.checkState(metadataMetaClient != null, "Metadata table is not fully initialized yet.");
HoodieData<HoodieRecord> preppedRecords = prepRecords(partitionRecordsMap);
List<HoodieRecord> preppedRecordList = HoodieList.getList(preppedRecords);
try (HoodieFlinkWriteClient writeClient = new HoodieFlinkWriteClient(engineContext, metadataWriteConfig)) {
if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
// if this is a new commit being applied to metadata for the first time
writeClient.startCommitWithTime(instantTime);
metadataMetaClient.getActiveTimeline().transitionRequestedToInflight(HoodieActiveTimeline.DELTA_COMMIT_ACTION, instantTime);
} else {
// this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
// for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
// when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
// are upserts to metadata table and so only a new delta commit will be created.
// once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
// already part of completed commit. So, we have to manually remove the completed instant and proceed.
// and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
HoodieInstant alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
metadataMetaClient.reloadActiveTimeline();
}
List<WriteStatus> statuses = preppedRecordList.size() > 0 ? writeClient.upsertPreppedRecords(preppedRecordList, instantTime) : Collections.emptyList();
statuses.forEach(writeStatus -> {
if (writeStatus.hasErrors()) {
throw new HoodieMetadataException("Failed to commit metadata table records at instant " + instantTime);
}
});
// flink does not support auto-commit yet, also the auto commit logic is not complete as BaseHoodieWriteClient now.
writeClient.commit(instantTime, statuses, Option.empty(), HoodieActiveTimeline.DELTA_COMMIT_ACTION, Collections.emptyMap());
// reload timeline
metadataMetaClient.reloadActiveTimeline();
if (canTriggerTableService) {
compactIfNecessary(writeClient, instantTime);
cleanIfNecessary(writeClient, instantTime);
writeClient.archive();
}
}
// Update total size of the metadata and count of base/log files
metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
}
use of org.apache.hudi.client.HoodieFlinkWriteClient in project hudi by apache.
the class StreamerUtil method createWriteClient.
/**
* Creates the Flink write client.
*
* <p>This expects to be used by client, set flag {@code loadFsViewStorageConfig} to use
* remote filesystem view storage config, or an in-memory filesystem view storage is used.
*/
@SuppressWarnings("rawtypes")
public static HoodieFlinkWriteClient createWriteClient(Configuration conf, RuntimeContext runtimeContext, boolean loadFsViewStorageConfig) {
HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(getHadoopConf()), new FlinkTaskContextSupplier(runtimeContext));
HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, loadFsViewStorageConfig);
return new HoodieFlinkWriteClient<>(context, writeConfig);
}
use of org.apache.hudi.client.HoodieFlinkWriteClient in project hudi by apache.
the class StreamerUtil method createWriteClient.
/**
* Creates the Flink write client.
*
* <p>This expects to be used by the driver, the client can then send requests for files view.
*
* <p>The task context supplier is a constant: the write token is always '0-1-0'.
*/
@SuppressWarnings("rawtypes")
public static HoodieFlinkWriteClient createWriteClient(Configuration conf) throws IOException {
HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, true, false);
// build the write client to start the embedded timeline server
final HoodieFlinkWriteClient writeClient = new HoodieFlinkWriteClient<>(HoodieFlinkEngineContext.DEFAULT, writeConfig);
// create the filesystem view storage properties for client
final FileSystemViewStorageConfig viewStorageConfig = writeConfig.getViewStorageConfig();
// rebuild the view storage config with simplified options.
FileSystemViewStorageConfig rebuilt = FileSystemViewStorageConfig.newBuilder().withStorageType(viewStorageConfig.getStorageType()).withRemoteServerHost(viewStorageConfig.getRemoteViewServerHost()).withRemoteServerPort(viewStorageConfig.getRemoteViewServerPort()).withRemoteTimelineClientTimeoutSecs(viewStorageConfig.getRemoteTimelineClientTimeoutSecs()).build();
ViewStorageProperties.createProperties(conf.getString(FlinkOptions.PATH), rebuilt);
return writeClient;
}
use of org.apache.hudi.client.HoodieFlinkWriteClient in project hudi by apache.
the class TestWriteCopyOnWrite method testReuseEmbeddedServer.
@Test
public void testReuseEmbeddedServer() throws IOException {
conf.setInteger("hoodie.filesystem.view.remote.timeout.secs", 500);
HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf);
FileSystemViewStorageConfig viewStorageConfig = writeClient.getConfig().getViewStorageConfig();
assertSame(viewStorageConfig.getStorageType(), FileSystemViewStorageType.REMOTE_FIRST);
// get another write client
writeClient = StreamerUtil.createWriteClient(conf);
assertSame(writeClient.getConfig().getViewStorageConfig().getStorageType(), FileSystemViewStorageType.REMOTE_FIRST);
assertEquals(viewStorageConfig.getRemoteViewServerPort(), writeClient.getConfig().getViewStorageConfig().getRemoteViewServerPort());
assertEquals(viewStorageConfig.getRemoteTimelineClientTimeoutSecs(), 500);
}
use of org.apache.hudi.client.HoodieFlinkWriteClient in project hudi by apache.
the class ITTestHoodieFlinkCompactor method testHoodieFlinkCompactor.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testHoodieFlinkCompactor(boolean enableChangelog) throws Exception {
// Create hoodie table and insert into data.
EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
TableEnvironment tableEnv = TableEnvironmentImpl.create(settings);
tableEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
Map<String, String> options = new HashMap<>();
options.put(FlinkOptions.COMPACTION_ASYNC_ENABLED.key(), "false");
options.put(FlinkOptions.PATH.key(), tempFile.getAbsolutePath());
options.put(FlinkOptions.TABLE_TYPE.key(), "MERGE_ON_READ");
options.put(FlinkOptions.CHANGELOG_ENABLED.key(), enableChangelog + "");
String hoodieTableDDL = TestConfigurations.getCreateHoodieTableDDL("t1", options);
tableEnv.executeSql(hoodieTableDDL);
tableEnv.executeSql(TestSQL.INSERT_T1).await();
// wait for the asynchronous commit to finish
TimeUnit.SECONDS.sleep(3);
// Make configuration and setAvroSchema.
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
FlinkCompactionConfig cfg = new FlinkCompactionConfig();
cfg.path = tempFile.getAbsolutePath();
Configuration conf = FlinkCompactionConfig.toFlinkConfig(cfg);
conf.setString(FlinkOptions.TABLE_TYPE.key(), "MERGE_ON_READ");
// create metaClient
HoodieTableMetaClient metaClient = StreamerUtil.createMetaClient(conf);
// set the table name
conf.setString(FlinkOptions.TABLE_NAME, metaClient.getTableConfig().getTableName());
// set table schema
CompactionUtil.setAvroSchema(conf, metaClient);
// infer changelog mode
CompactionUtil.inferChangelogMode(conf, metaClient);
HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf);
boolean scheduled = false;
// judge whether have operation
// To compute the compaction instant time and do compaction.
Option<String> compactionInstantTimeOption = CompactionUtil.getCompactionInstantTime(metaClient);
if (compactionInstantTimeOption.isPresent()) {
scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTimeOption.get(), Option.empty());
}
String compactionInstantTime = compactionInstantTimeOption.get();
assertTrue(scheduled, "The compaction plan should be scheduled");
HoodieFlinkTable<?> table = writeClient.getHoodieTable();
// generate compaction plan
// should support configurable commit metadata
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
// Mark instant as compaction inflight
table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
env.addSource(new CompactionPlanSourceFunction(compactionPlan, compactionInstantTime)).name("compaction_source").uid("uid_compaction_source").rebalance().transform("compact_task", TypeInformation.of(CompactionCommitEvent.class), new ProcessOperator<>(new CompactFunction(conf))).setParallelism(compactionPlan.getOperations().size()).addSink(new CompactionCommitSink(conf)).name("clean_commits").uid("uid_clean_commits").setParallelism(1);
env.execute("flink_hudi_compaction");
writeClient.close();
TestData.checkWrittenFullData(tempFile, EXPECTED1);
}
Aggregations