Search in sources :

Example 96 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class HoodieTestReplaceCommitMetadataGenerator method generateReplaceCommitMetadata.

private static HoodieReplaceCommitMetadata generateReplaceCommitMetadata(HashMap<String, List<String>> partitionToFilePaths, Option<Integer> writes, Option<Integer> updates) {
    HoodieReplaceCommitMetadata metadata = new HoodieReplaceCommitMetadata();
    partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(key);
        writeStat.setPath(DEFAULT_PATH);
        writeStat.setFileId(DEFAULT_FILEID);
        writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES);
        writeStat.setPrevCommit(DEFAULT_PRE_COMMIT);
        writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES));
        writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES));
        writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS);
        writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS);
        metadata.addWriteStat(key, writeStat);
    }));
    metadata.setPartitionToReplaceFileIds(new HashMap<String, List<String>>() {

        {
            // TODO fix
            put(DEFAULT_FIRST_PARTITION_PATH, createImmutableList(baseFileName(DEFAULT_FIRST_PARTITION_PATH, "1")));
        }
    });
    return metadata;
}
Also used : HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) UUID(java.util.UUID) FileCreateUtils.baseFileName(org.apache.hudi.common.testutils.FileCreateUtils.baseFileName) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) List(java.util.List) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) CollectionUtils.createImmutableList(org.apache.hudi.common.util.CollectionUtils.createImmutableList) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) Collections(java.util.Collections) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) List(java.util.List) CollectionUtils.createImmutableList(org.apache.hudi.common.util.CollectionUtils.createImmutableList) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 97 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class CompactionUtil method rollbackEarliestCompaction.

/**
 * Rolls back the earliest compaction if there exists.
 *
 * <p>Makes the strategy not that radical: firstly check whether there exists inflight compaction instants,
 * rolls back the first inflight instant only if it has timed out. That means, if there are
 * multiple timed out instants on the timeline, we only roll back the first one at a time.
 */
public static void rollbackEarliestCompaction(HoodieFlinkTable<?> table, Configuration conf) {
    Option<HoodieInstant> earliestInflight = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.INFLIGHT).firstInstant();
    if (earliestInflight.isPresent()) {
        HoodieInstant instant = earliestInflight.get();
        String currentTime = HoodieActiveTimeline.createNewInstantTime();
        int timeout = conf.getInteger(FlinkOptions.COMPACTION_TIMEOUT_SECONDS);
        if (StreamerUtil.instantTimeDiffSeconds(currentTime, instant.getTimestamp()) >= timeout) {
            LOG.info("Rollback the inflight compaction instant: " + instant + " for timeout(" + timeout + "s)");
            table.rollbackInflightCompaction(instant);
            table.getMetaClient().reloadActiveTimeline();
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Configuration(org.apache.flink.configuration.Configuration) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Locale(java.util.Locale) Path(org.apache.hadoop.fs.Path) HoodieFlinkWriteClient(org.apache.hudi.client.HoodieFlinkWriteClient) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FlinkCompactionConfig(org.apache.hudi.sink.compact.FlinkCompactionConfig) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FlinkOptions(org.apache.hudi.configuration.FlinkOptions)

Example 98 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class CompactionPlanOperator method scheduleCompaction.

private void scheduleCompaction(HoodieFlinkTable<?> table, long checkpointId) throws IOException {
    // the first instant takes the highest priority.
    Option<HoodieInstant> firstRequested = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).firstInstant();
    if (!firstRequested.isPresent()) {
        // do nothing.
        LOG.info("No compaction plan for checkpoint " + checkpointId);
        return;
    }
    String compactionInstantTime = firstRequested.get().getTimestamp();
    // generate compaction plan
    // should support configurable commit metadata
    HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
    if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) {
        // do nothing.
        LOG.info("Empty compaction plan for instant " + compactionInstantTime);
    } else {
        HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
        // Mark instant as compaction inflight
        table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
        table.getMetaClient().reloadActiveTimeline();
        List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
        LOG.info("Execute compaction plan for instant {} as {} file groups", compactionInstantTime, operations.size());
        for (CompactionOperation operation : operations) {
            output.collect(new StreamRecord<>(new CompactionPlanEvent(compactionInstantTime, operation)));
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Configuration(org.apache.flink.configuration.Configuration) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) Output(org.apache.flink.streaming.api.operators.Output) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) CompactionUtil(org.apache.hudi.util.CompactionUtil) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation)

Example 99 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class SparkBootstrapCommitActionExecutor method metadataBootstrap.

/**
 * Perform Metadata Bootstrap.
 * @param partitionFilesList List of partitions and files within that partitions
 */
protected Option<HoodieWriteMetadata<HoodieData<WriteStatus>>> metadataBootstrap(List<Pair<String, List<HoodieFileStatus>>> partitionFilesList) {
    if (null == partitionFilesList || partitionFilesList.isEmpty()) {
        return Option.empty();
    }
    HoodieTableMetaClient metaClient = table.getMetaClient();
    metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(State.REQUESTED, metaClient.getCommitActionType(), HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS));
    table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, metaClient.getCommitActionType(), HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS), Option.empty());
    HoodieData<BootstrapWriteStatus> bootstrapWriteStatuses = runMetadataBootstrap(partitionFilesList);
    HoodieWriteMetadata<HoodieData<WriteStatus>> result = new HoodieWriteMetadata<>();
    updateIndexAndCommitIfNeeded(bootstrapWriteStatuses.map(w -> w), result);
    return Option.of(result);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieTable(org.apache.hudi.table.HoodieTable) BootstrapMode(org.apache.hudi.client.bootstrap.BootstrapMode) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BaseCommitActionExecutor(org.apache.hudi.table.action.commit.BaseCommitActionExecutor) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) Logger(org.apache.log4j.Logger) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Duration(java.time.Duration) Map(java.util.Map) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapPartitionPathTranslator(org.apache.hudi.client.bootstrap.translator.BootstrapPartitionPathTranslator) Collection(java.util.Collection) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) WRITE_STATUS_STORAGE_LEVEL_VALUE(org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) SparkBulkInsertCommitActionExecutor(org.apache.hudi.table.action.commit.SparkBulkInsertCommitActionExecutor) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) FullRecordBootstrapDataProvider(org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider) MetadataBootstrapHandlerFactory.getMetadataHandler(org.apache.hudi.table.action.bootstrap.MetadataBootstrapHandlerFactory.getMetadataHandler) HoodieBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieBootstrapSchemaProvider) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) BaseSparkCommitActionExecutor(org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.BootstrapModeSelector) HoodieData(org.apache.hudi.common.data.HoodieData) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Iterator(java.util.Iterator) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieSparkBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieSparkBootstrapSchemaProvider) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SparkValidatorUtils(org.apache.hudi.client.utils.SparkValidatorUtils) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata)

Example 100 with Option

use of org.apache.hudi.common.util.Option in project hudi by apache.

the class SparkBootstrapCommitActionExecutor method commit.

@Override
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<HoodieData<WriteStatus>> result) {
    // Perform bootstrap index write and then commit. Make sure both record-key and bootstrap-index
    // is all done in a single job DAG.
    Map<String, List<Pair<BootstrapFileMapping, HoodieWriteStat>>> bootstrapSourceAndStats = result.getWriteStatuses().collectAsList().stream().map(w -> {
        BootstrapWriteStatus ws = (BootstrapWriteStatus) w;
        return Pair.of(ws.getBootstrapSourceFileMapping(), ws.getStat());
    }).collect(Collectors.groupingBy(w -> w.getKey().getPartitionPath()));
    HoodieTableMetaClient metaClient = table.getMetaClient();
    try (BootstrapIndex.IndexWriter indexWriter = BootstrapIndex.getBootstrapIndex(metaClient).createWriter(metaClient.getTableConfig().getBootstrapBasePath().get())) {
        LOG.info("Starting to write bootstrap index for source " + config.getBootstrapSourceBasePath() + " in table " + config.getBasePath());
        indexWriter.begin();
        bootstrapSourceAndStats.forEach((key, value) -> indexWriter.appendNextPartition(key, value.stream().map(Pair::getKey).collect(Collectors.toList())));
        indexWriter.finish();
        LOG.info("Finished writing bootstrap index for source " + config.getBootstrapSourceBasePath() + " in table " + config.getBasePath());
    }
    commit(extraMetadata, result, bootstrapSourceAndStats.values().stream().flatMap(f -> f.stream().map(Pair::getValue)).collect(Collectors.toList()));
    LOG.info("Committing metadata bootstrap !!");
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BootstrapMode(org.apache.hudi.client.bootstrap.BootstrapMode) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BaseCommitActionExecutor(org.apache.hudi.table.action.commit.BaseCommitActionExecutor) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) Logger(org.apache.log4j.Logger) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Duration(java.time.Duration) Map(java.util.Map) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapPartitionPathTranslator(org.apache.hudi.client.bootstrap.translator.BootstrapPartitionPathTranslator) Collection(java.util.Collection) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) WRITE_STATUS_STORAGE_LEVEL_VALUE(org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) SparkBulkInsertCommitActionExecutor(org.apache.hudi.table.action.commit.SparkBulkInsertCommitActionExecutor) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) FullRecordBootstrapDataProvider(org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider) MetadataBootstrapHandlerFactory.getMetadataHandler(org.apache.hudi.table.action.bootstrap.MetadataBootstrapHandlerFactory.getMetadataHandler) HoodieBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieBootstrapSchemaProvider) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieKeyGeneratorException(org.apache.hudi.exception.HoodieKeyGeneratorException) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) BaseSparkCommitActionExecutor(org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BootstrapModeSelector(org.apache.hudi.client.bootstrap.selector.BootstrapModeSelector) HoodieData(org.apache.hudi.common.data.HoodieData) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Iterator(java.util.Iterator) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieSparkBootstrapSchemaProvider(org.apache.hudi.client.bootstrap.HoodieSparkBootstrapSchemaProvider) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SparkValidatorUtils(org.apache.hudi.client.utils.SparkValidatorUtils) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) BootstrapWriteStatus(org.apache.hudi.client.bootstrap.BootstrapWriteStatus) List(java.util.List) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping)

Aggregations

Option (org.apache.hudi.common.util.Option)105 List (java.util.List)84 IOException (java.io.IOException)70 Collectors (java.util.stream.Collectors)69 Map (java.util.Map)67 ArrayList (java.util.ArrayList)61 Path (org.apache.hadoop.fs.Path)59 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)59 Pair (org.apache.hudi.common.util.collection.Pair)59 HashMap (java.util.HashMap)58 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)56 LogManager (org.apache.log4j.LogManager)54 Logger (org.apache.log4j.Logger)54 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)53 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)46 HoodieIOException (org.apache.hudi.exception.HoodieIOException)44 Arrays (java.util.Arrays)43 FSUtils (org.apache.hudi.common.fs.FSUtils)43 Collections (java.util.Collections)39