Search in sources :

Example 1 with HoodieMetadataException

use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.

the class FlinkHoodieBackedTableMetadataWriter method commit.

@Override
protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap, boolean canTriggerTableService) {
    ValidationUtils.checkState(enabled, "Metadata table cannot be committed to as it is not enabled");
    ValidationUtils.checkState(metadataMetaClient != null, "Metadata table is not fully initialized yet.");
    HoodieData<HoodieRecord> preppedRecords = prepRecords(partitionRecordsMap);
    List<HoodieRecord> preppedRecordList = HoodieList.getList(preppedRecords);
    try (HoodieFlinkWriteClient writeClient = new HoodieFlinkWriteClient(engineContext, metadataWriteConfig)) {
        if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
            // if this is a new commit being applied to metadata for the first time
            writeClient.startCommitWithTime(instantTime);
            metadataMetaClient.getActiveTimeline().transitionRequestedToInflight(HoodieActiveTimeline.DELTA_COMMIT_ACTION, instantTime);
        } else {
            // this code path refers to a re-attempted commit that got committed to metadata table, but failed in datatable.
            // for eg, lets say compaction c1 on 1st attempt succeeded in metadata table and failed before committing to datatable.
            // when retried again, data table will first rollback pending compaction. these will be applied to metadata table, but all changes
            // are upserts to metadata table and so only a new delta commit will be created.
            // once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
            // already part of completed commit. So, we have to manually remove the completed instant and proceed.
            // and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
            HoodieInstant alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
            HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
            metadataMetaClient.reloadActiveTimeline();
        }
        List<WriteStatus> statuses = preppedRecordList.size() > 0 ? writeClient.upsertPreppedRecords(preppedRecordList, instantTime) : Collections.emptyList();
        statuses.forEach(writeStatus -> {
            if (writeStatus.hasErrors()) {
                throw new HoodieMetadataException("Failed to commit metadata table records at instant " + instantTime);
            }
        });
        // flink does not support auto-commit yet, also the auto commit logic is not complete as BaseHoodieWriteClient now.
        writeClient.commit(instantTime, statuses, Option.empty(), HoodieActiveTimeline.DELTA_COMMIT_ACTION, Collections.emptyMap());
        // reload timeline
        metadataMetaClient.reloadActiveTimeline();
        if (canTriggerTableService) {
            compactIfNecessary(writeClient, instantTime);
            cleanIfNecessary(writeClient, instantTime);
            writeClient.archive();
        }
    }
    // Update total size of the metadata and count of base/log files
    metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieFlinkWriteClient(org.apache.hudi.client.HoodieFlinkWriteClient) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 2 with HoodieMetadataException

use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.

the class HoodieBackedTableMetadataWriter method createMetadataWriteConfig.

/**
 * Create a {@code HoodieWriteConfig} to use for the Metadata Table.
 *
 * @param writeConfig {@code HoodieWriteConfig} of the main dataset writer
 */
private HoodieWriteConfig createMetadataWriteConfig(HoodieWriteConfig writeConfig) {
    int parallelism = writeConfig.getMetadataInsertParallelism();
    int minCommitsToKeep = Math.max(writeConfig.getMetadataMinCommitsToKeep(), writeConfig.getMinCommitsToKeep());
    int maxCommitsToKeep = Math.max(writeConfig.getMetadataMaxCommitsToKeep(), writeConfig.getMaxCommitsToKeep());
    // Create the write config for the metadata table by borrowing options from the main write config.
    HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder().withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION).withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(writeConfig.getConsistencyGuardConfig().isConsistencyCheckEnabled()).withInitialConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getInitialConsistencyCheckIntervalMs()).withMaxConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getMaxConsistencyCheckIntervalMs()).withMaxConsistencyChecks(writeConfig.getConsistencyGuardConfig().getMaxConsistencyChecks()).build()).withWriteConcurrencyMode(WriteConcurrencyMode.SINGLE_WRITER).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).withFileListingParallelism(writeConfig.getFileListingParallelism()).build()).withAutoCommit(true).withAvroSchemaValidate(true).withEmbeddedTimelineServerEnabled(false).withMarkersType(MarkerType.DIRECT.name()).withRollbackUsingMarkers(false).withPath(HoodieTableMetadata.getMetadataTableBasePath(writeConfig.getBasePath())).withSchema(HoodieMetadataRecord.getClassSchema().toString()).forTable(tableName).withCompactionConfig(HoodieCompactionConfig.newBuilder().withAsyncClean(writeConfig.isMetadataAsyncClean()).withAutoClean(false).withCleanerParallelism(parallelism).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).retainCommits(writeConfig.getMetadataCleanerCommitsRetained()).archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep).withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax()).withAutoArchive(false).build()).withParallelism(parallelism, parallelism).withDeleteParallelism(parallelism).withRollbackParallelism(parallelism).withFinalizeWriteParallelism(parallelism).withAllowMultiWriteOnSameInstant(true).withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName()).withPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields());
    // RecordKey properties are needed for the metadata table records
    final Properties properties = new Properties();
    properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), RECORD_KEY_FIELD_NAME);
    properties.put("hoodie.datasource.write.recordkey.field", RECORD_KEY_FIELD_NAME);
    builder.withProperties(properties);
    if (writeConfig.isMetricsOn()) {
        builder.withMetricsConfig(HoodieMetricsConfig.newBuilder().withReporterType(writeConfig.getMetricsReporterType().toString()).withExecutorMetrics(writeConfig.isExecutorMetricsEnabled()).on(true).build());
        switch(writeConfig.getMetricsReporterType()) {
            case GRAPHITE:
                builder.withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig.newBuilder().onGraphitePort(writeConfig.getGraphiteServerPort()).toGraphiteHost(writeConfig.getGraphiteServerHost()).usePrefix(writeConfig.getGraphiteMetricPrefix()).build());
                break;
            case JMX:
                builder.withMetricsJmxConfig(HoodieMetricsJmxConfig.newBuilder().onJmxPort(writeConfig.getJmxPort()).toJmxHost(writeConfig.getJmxHost()).build());
                break;
            case DATADOG:
            case PROMETHEUS:
            case PROMETHEUS_PUSHGATEWAY:
            case CONSOLE:
            case INMEMORY:
            case CLOUDWATCH:
                break;
            default:
                throw new HoodieMetadataException("Unsupported Metrics Reporter type " + writeConfig.getMetricsReporterType());
        }
    }
    return builder.build();
}
Also used : HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Properties(java.util.Properties)

Example 3 with HoodieMetadataException

use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.

the class HoodieBloomIndex method loadColumnRangesFromMetaIndex.

/**
 * Load the column stats index as BloomIndexFileInfo for all the involved files in the partition.
 *
 * @param partitions  - List of partitions for which column stats need to be loaded
 * @param context     - Engine context
 * @param hoodieTable - Hoodie table
 * @return List of partition and file column range info pairs
 */
protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
    // also obtain file ranges, if range pruning is enabled
    context.setJobStatus(this.getClass().getName(), "Load meta index key ranges for file slices");
    final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
    return context.flatMap(partitions, partitionName -> {
        // Partition and file name pairs
        List<Pair<String, String>> partitionFileNameList = HoodieIndexUtils.getLatestBaseFilesForPartition(partitionName, hoodieTable).stream().map(baseFile -> Pair.of(partitionName, baseFile.getFileName())).sorted().collect(toList());
        if (partitionFileNameList.isEmpty()) {
            return Stream.empty();
        }
        try {
            Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap = hoodieTable.getMetadataTable().getColumnStats(partitionFileNameList, keyField);
            List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
            for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry : fileToColumnStatsMap.entrySet()) {
                result.add(Pair.of(entry.getKey().getLeft(), new BloomIndexFileInfo(FSUtils.getFileId(entry.getKey().getRight()), entry.getValue().getMinValue(), entry.getValue().getMaxValue())));
            }
            return result.stream();
        } catch (MetadataNotFoundException me) {
            throw new HoodieMetadataException("Unable to find column range metadata for partition:" + partitionName, me);
        }
    }, Math.max(partitions.size(), 1));
}
Also used : HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) MetadataNotFoundException(org.apache.hudi.exception.MetadataNotFoundException) ArrayList(java.util.ArrayList) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) Map(java.util.Map) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair)

Example 4 with HoodieMetadataException

use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.

the class HoodieBackedTableMetadata method getRollbackedCommits.

/**
 * Returns a list of commits which were rolled back as part of a Rollback or Restore operation.
 *
 * @param instant  The Rollback operation to read
 * @param timeline instant of timeline from dataset.
 */
private List<String> getRollbackedCommits(HoodieInstant instant, HoodieActiveTimeline timeline) {
    try {
        if (instant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION)) {
            HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.deserializeHoodieRollbackMetadata(timeline.getInstantDetails(instant).get());
            return rollbackMetadata.getCommitsRollback();
        }
        List<String> rollbackedCommits = new LinkedList<>();
        if (instant.getAction().equals(HoodieTimeline.RESTORE_ACTION)) {
            // Restore is made up of several rollbacks
            HoodieRestoreMetadata restoreMetadata = TimelineMetadataUtils.deserializeHoodieRestoreMetadata(timeline.getInstantDetails(instant).get());
            restoreMetadata.getHoodieRestoreMetadata().values().forEach(rms -> {
                rms.forEach(rm -> rollbackedCommits.addAll(rm.getCommitsRollback()));
            });
        }
        return rollbackedCommits;
    } catch (IOException e) {
        throw new HoodieMetadataException("Error retrieving rollback commits for instant " + instant, e);
    }
}
Also used : HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LinkedList(java.util.LinkedList)

Example 5 with HoodieMetadataException

use of org.apache.hudi.exception.HoodieMetadataException in project hudi by apache.

the class TestHoodieBackedMetadata method testManualRollbacks.

/**
 * Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
 * timeline.
 */
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testManualRollbacks(final boolean populateMateFields) throws Exception {
    HoodieTableType tableType = COPY_ON_WRITE;
    init(tableType, false);
    // Setting to archive more aggressively on the Metadata Table than the Dataset
    final int maxDeltaCommitsBeforeCompaction = 4;
    final int minArchiveCommitsMetadata = 2;
    final int minArchiveCommitsDataset = 4;
    writeConfig = getWriteConfigBuilder(true, true, false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).archiveCommitsWith(minArchiveCommitsMetadata, minArchiveCommitsMetadata + 1).retainCommits(1).withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction).withPopulateMetaFields(populateMateFields).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(minArchiveCommitsDataset, minArchiveCommitsDataset + 1).retainCommits(1).retainFileVersions(1).withAutoClean(false).withAsyncClean(true).build()).build();
    initWriteConfigAndMetatableWriter(writeConfig, true);
    doWriteInsertAndUpsert(testTable, "000001", "000002", false);
    for (int i = 3; i < 10; i++) {
        doWriteOperation(testTable, "00000" + i);
        archiveDataTable(writeConfig, metaClient);
    }
    validateMetadata(testTable);
    // We can only rollback those commits whose deltacommit have not been archived yet.
    int numRollbacks = 0;
    boolean exceptionRaised = false;
    List<HoodieInstant> allInstants = metaClient.reloadActiveTimeline().getCommitsTimeline().getReverseOrderedInstants().collect(Collectors.toList());
    for (HoodieInstant instantToRollback : allInstants) {
        try {
            testTable.doRollback(instantToRollback.getTimestamp(), String.valueOf(Time.now()));
            validateMetadata(testTable);
            ++numRollbacks;
        } catch (HoodieMetadataException e) {
            exceptionRaised = true;
            break;
        }
    }
    assertTrue(exceptionRaised, "Rollback of archived instants should fail");
    // Since each rollback also creates a deltacommit, we can only support rolling back of half of the original
    // instants present before rollback started.
    assertTrue(numRollbacks >= Math.max(minArchiveCommitsDataset, minArchiveCommitsMetadata) / 2, "Rollbacks of non archived instants should work");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieMetadataException (org.apache.hudi.exception.HoodieMetadataException)10 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)4 IOException (java.io.IOException)3 Path (org.apache.hadoop.fs.Path)3 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)3 Pair (org.apache.hudi.common.util.collection.Pair)3 HoodieIOException (org.apache.hudi.exception.HoodieIOException)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Properties (java.util.Properties)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 HoodieAvroUtils.getNestedFieldValAsString (org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString)2 HoodieMetadataColumnStats (org.apache.hudi.avro.model.HoodieMetadataColumnStats)2 HoodieRestoreMetadata (org.apache.hudi.avro.model.HoodieRestoreMetadata)2 HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)2 WriteStatus (org.apache.hudi.client.WriteStatus)2 Option (org.apache.hudi.common.util.Option)2 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)2