use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.
the class FlinkHoodieBackedTableMetadataWriter method commit.
@Override
protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap, boolean canTriggerTableService) {
ValidationUtils.checkState(enabled, "Metadata table cannot be committed to as it is not enabled");
ValidationUtils.checkState(metadataMetaClient != null, "Metadata table is not fully initialized yet.");
HoodieData<HoodieRecord> preppedRecords = prepRecords(partitionRecordsMap);
List<HoodieRecord> preppedRecordList = HoodieList.getList(preppedRecords);
try (HoodieFlinkWriteClient writeClient = new HoodieFlinkWriteClient(engineContext, metadataWriteConfig)) {
if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
// if this is a new commit being applied to metadata for the first time
writeClient.startCommitWithTime(instantTime);
metadataMetaClient.getActiveTimeline().transitionRequestedToInflight(HoodieActiveTimeline.DELTA_COMMIT_ACTION, instantTime);
} else {
// this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
// for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
// when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
// are upserts to metadata table and so only a new delta commit will be created.
// once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
// already part of completed commit. So, we have to manually remove the completed instant and proceed.
// and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
HoodieInstant alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
metadataMetaClient.reloadActiveTimeline();
}
List<WriteStatus> statuses = preppedRecordList.size() > 0 ? writeClient.upsertPreppedRecords(preppedRecordList, instantTime) : Collections.emptyList();
statuses.forEach(writeStatus -> {
if (writeStatus.hasErrors()) {
throw new HoodieMetadataException("Failed to commit metadata table records at instant " + instantTime);
}
});
// flink does not support auto-commit yet, also the auto commit logic is not complete as BaseHoodieWriteClient now.
writeClient.commit(instantTime, statuses, Option.empty(), HoodieActiveTimeline.DELTA_COMMIT_ACTION, Collections.emptyMap());
// reload timeline
metadataMetaClient.reloadActiveTimeline();
if (canTriggerTableService) {
compactIfNecessary(writeClient, instantTime);
cleanIfNecessary(writeClient, instantTime);
writeClient.archive();
}
}
// Update total size of the metadata and count of base/log files
metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
}
use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.
the class HoodieBackedTableMetadataWriter method createMetadataWriteConfig.
/**
* Create a {@code HoodieWriteConfig} to use for the Metadata Table.
*
* @param writeConfig {@code HoodieWriteConfig} of the main dataset writer
*/
private HoodieWriteConfig createMetadataWriteConfig(HoodieWriteConfig writeConfig) {
int parallelism = writeConfig.getMetadataInsertParallelism();
int minCommitsToKeep = Math.max(writeConfig.getMetadataMinCommitsToKeep(), writeConfig.getMinCommitsToKeep());
int maxCommitsToKeep = Math.max(writeConfig.getMetadataMaxCommitsToKeep(), writeConfig.getMaxCommitsToKeep());
// Create the write config for the metadata table by borrowing options from the main write config.
HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder().withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION).withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(writeConfig.getConsistencyGuardConfig().isConsistencyCheckEnabled()).withInitialConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getInitialConsistencyCheckIntervalMs()).withMaxConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getMaxConsistencyCheckIntervalMs()).withMaxConsistencyChecks(writeConfig.getConsistencyGuardConfig().getMaxConsistencyChecks()).build()).withWriteConcurrencyMode(WriteConcurrencyMode.SINGLE_WRITER).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).withFileListingParallelism(writeConfig.getFileListingParallelism()).build()).withAutoCommit(true).withAvroSchemaValidate(true).withEmbeddedTimelineServerEnabled(false).withMarkersType(MarkerType.DIRECT.name()).withRollbackUsingMarkers(false).withPath(HoodieTableMetadata.getMetadataTableBasePath(writeConfig.getBasePath())).withSchema(HoodieMetadataRecord.getClassSchema().toString()).forTable(tableName).withCompactionConfig(HoodieCompactionConfig.newBuilder().withAsyncClean(writeConfig.isMetadataAsyncClean()).withAutoClean(false).withCleanerParallelism(parallelism).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).retainCommits(writeConfig.getMetadataCleanerCommitsRetained()).archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep).withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax()).withAutoArchive(false).build()).withParallelism(parallelism, parallelism).withDeleteParallelism(parallelism).withRollbackParallelism(parallelism).withFinalizeWriteParallelism(parallelism).withAllowMultiWriteOnSameInstant(true).withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName()).withPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields());
// RecordKey properties are needed for the metadata table records
final Properties properties = new Properties();
properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), RECORD_KEY_FIELD_NAME);
properties.put("hoodie.datasource.write.recordkey.field", RECORD_KEY_FIELD_NAME);
builder.withProperties(properties);
if (writeConfig.isMetricsOn()) {
builder.withMetricsConfig(HoodieMetricsConfig.newBuilder().withReporterType(writeConfig.getMetricsReporterType().toString()).withExecutorMetrics(writeConfig.isExecutorMetricsEnabled()).on(true).build());
switch(writeConfig.getMetricsReporterType()) {
case GRAPHITE:
builder.withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig.newBuilder().onGraphitePort(writeConfig.getGraphiteServerPort()).toGraphiteHost(writeConfig.getGraphiteServerHost()).usePrefix(writeConfig.getGraphiteMetricPrefix()).build());
break;
case JMX:
builder.withMetricsJmxConfig(HoodieMetricsJmxConfig.newBuilder().onJmxPort(writeConfig.getJmxPort()).toJmxHost(writeConfig.getJmxHost()).build());
break;
case DATADOG:
case PROMETHEUS:
case PROMETHEUS_PUSHGATEWAY:
case CONSOLE:
case INMEMORY:
case CLOUDWATCH:
break;
default:
throw new HoodieMetadataException("Unsupported Metrics Reporter type " + writeConfig.getMetricsReporterType());
}
}
return builder.build();
}
use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.
the class HoodieBloomIndex method loadColumnRangesFromMetaIndex.
/**
* Load the column stats index as BloomIndexFileInfo for all the involved files in the partition.
*
* @param partitions - List of partitions for which column stats need to be loaded
* @param context - Engine context
* @param hoodieTable - Hoodie table
* @return List of partition and file column range info pairs
*/
protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
// also obtain file ranges, if range pruning is enabled
context.setJobStatus(this.getClass().getName(), "Load meta index key ranges for file slices");
final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
return context.flatMap(partitions, partitionName -> {
// Partition and file name pairs
List<Pair<String, String>> partitionFileNameList = HoodieIndexUtils.getLatestBaseFilesForPartition(partitionName, hoodieTable).stream().map(baseFile -> Pair.of(partitionName, baseFile.getFileName())).sorted().collect(toList());
if (partitionFileNameList.isEmpty()) {
return Stream.empty();
}
try {
Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap = hoodieTable.getMetadataTable().getColumnStats(partitionFileNameList, keyField);
List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry : fileToColumnStatsMap.entrySet()) {
result.add(Pair.of(entry.getKey().getLeft(), new BloomIndexFileInfo(FSUtils.getFileId(entry.getKey().getRight()), entry.getValue().getMinValue(), entry.getValue().getMaxValue())));
}
return result.stream();
} catch (MetadataNotFoundException me) {
throw new HoodieMetadataException("Unable to find column range metadata for partition:" + partitionName, me);
}
}, Math.max(partitions.size(), 1));
}
use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.
the class HoodieBackedTableMetadata method getRollbackedCommits.
/**
* Returns a list of commits which were rolled back as part of a Rollback or Restore operation.
*
* @param instant The Rollback operation to read
* @param timeline instant of timeline from dataset.
*/
private List<String> getRollbackedCommits(HoodieInstant instant, HoodieActiveTimeline timeline) {
try {
if (instant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION)) {
HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(timeline.getInstantDetails(instant).get());
return rollbackMetadata.getCommitsRollback();
}
List<String> rollbackedCommits = new LinkedList<>();
if (instant.getAction().equals(HoodieTimeline.RESTORE_ACTION)) {
// Restore is made up of several rollbacks
HoodieRestoreMetadata restoreMetadata = TimelineMetadataUtils.deserializeHoodieRestoreMetadata(timeline.getInstantDetails(instant).get());
restoreMetadata.getHoodieRestoreMetadata().values().forEach(rms -> {
rms.forEach(rm -> rollbackedCommits.addAll(rm.getCommitsRollback()));
});
}
return rollbackedCommits;
} catch (IOException e) {
throw new HoodieMetadataException("Error retrieving rollback commits for instant " + instant, e);
}
}
use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.
the class TestHoodieBackedMetadata method testManualRollbacks.
/**
* Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
* timeline.
*/
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testManualRollbacks(final boolean populateMateFields) throws Exception {
HoodieTableType tableType = COPY_ON_WRITE;
init(tableType, false);
// Setting to archive more aggressively on the Metadata Table than the Dataset
final int maxDeltaCommitsBeforeCompaction = 4;
final int minArchiveCommitsMetadata = 2;
final int minArchiveCommitsDataset = 4;
writeConfig = getWriteConfigBuilder(true, true, false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).archiveCommitsWith(minArchiveCommitsMetadata, minArchiveCommitsMetadata + 1).retainCommits(1).withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction).withPopulateMetaFields(populateMateFields).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(minArchiveCommitsDataset, minArchiveCommitsDataset + 1).retainCommits(1).retainFileVersions(1).withAutoClean(false).withAsyncClean(true).build()).build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteInsertAndUpsert(testTable, "000001", "000002", false);
for (int i = 3; i < 10; i++) {
doWriteOperation(testTable, "00000" + i);
archiveDataTable(writeConfig, metaClient);
}
validateMetadata(testTable);
// We can only rollback those commits whose deltacommit have not been archived yet.
int numRollbacks = 0;
boolean exceptionRaised = false;
List<HoodieInstant> allInstants = metaClient.reloadActiveTimeline().getCommitsTimeline().getReverseOrderedInstants().collect(Collectors.toList());
for (HoodieInstant instantToRollback : allInstants) {
try {
testTable.doRollback(instantToRollback.getTimestamp(), String.valueOf(Time.now()));
validateMetadata(testTable);
++numRollbacks;
} catch (HoodieMetadataException e) {
exceptionRaised = true;
break;
}
}
assertTrue(exceptionRaised, "Rollback of archived instants should fail");
// Since each rollback also creates a deltacommit, we can only support rolling back of half of the original
// instants present before rollback started.
assertTrue(numRollbacks >= Math.max(minArchiveCommitsDataset, minArchiveCommitsMetadata) / 2, "Rollbacks of non archived instants should work");
}
Aggregations