Search in sources :

Example 76 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class HoodieBackedTableMetadataWriter method initializeIfNeeded.

/**
 * Initialize the metadata table if needed.
 *
 * @param dataMetaClient           - meta client for the data table
 * @param actionMetadata           - optional action metadata
 * @param inflightInstantTimestamp - timestamp of an instant in progress on the dataset
 * @param <T>                      - action metadata types extending Avro generated SpecificRecordBase
 * @throws IOException
 */
protected <T extends SpecificRecordBase> void initializeIfNeeded(HoodieTableMetaClient dataMetaClient, Option<T> actionMetadata, Option<String> inflightInstantTimestamp) throws IOException {
    HoodieTimer timer = new HoodieTimer().startTimer();
    boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME));
    boolean reInitialize = false;
    // the metadata table will need to be initialized again.
    if (exists) {
        HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get()).setBasePath(metadataWriteConfig.getBasePath()).build();
        if (dataWriteConfig.getMetadataConfig().populateMetaFields() != metadataMetaClient.getTableConfig().populateMetaFields()) {
            LOG.info("Re-initiating metadata table properties since populate meta fields have changed");
            metadataMetaClient = initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
        }
        final Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
        reInitialize = isBootstrapNeeded(latestMetadataInstant, actionMetadata);
    }
    if (reInitialize) {
        metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.REBOOTSTRAP_STR, 1));
        LOG.info("Deleting Metadata Table directory so that it can be re-initialized");
        dataMetaClient.getFs().delete(new Path(metadataWriteConfig.getBasePath()), true);
        exists = false;
    }
    if (!exists) {
        // Initialize for the first time by listing partitions and files directly from the file system
        if (initializeFromFilesystem(dataMetaClient, inflightInstantTimestamp)) {
            metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.INITIALIZE_STR, timer.endTimer()));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimer(org.apache.hudi.common.util.HoodieTimer)

Example 77 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class HoodieJavaMergeOnReadTableCompactor method preCompact.

@Override
public void preCompact(HoodieTable table, HoodieTimeline pendingCompactionTimeline, String compactionInstantTime) {
    HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
    if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
        table.rollbackInflightCompaction(inflightInstant);
        table.getMetaClient().reloadActiveTimeline();
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant)

Example 78 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class SparkRDDWriteClient method cluster.

@Override
public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstant, boolean shouldComplete) {
    HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, config.isMetadataTableEnabled());
    preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient());
    HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
    if (pendingClusteringTimeline.containsInstant(inflightInstant)) {
        rollbackInflightClustering(inflightInstant, table);
        table.getMetaClient().reloadActiveTimeline();
    }
    clusteringTimer = metrics.getClusteringCtx();
    LOG.info("Starting clustering at " + clusteringInstant);
    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = table.cluster(context, clusteringInstant);
    HoodieWriteMetadata<JavaRDD<WriteStatus>> clusteringMetadata = writeMetadata.clone(HoodieJavaRDD.getJavaRDD(writeMetadata.getWriteStatuses()));
    // TODO : Where is shouldComplete used ?
    if (shouldComplete && clusteringMetadata.getCommitMetadata().isPresent()) {
        completeTableService(TableServiceType.CLUSTER, clusteringMetadata.getCommitMetadata().get(), table, clusteringInstant);
    }
    return clusteringMetadata;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 79 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class SparkRDDWriteClient method completeClustering.

private void completeClustering(HoodieReplaceCommitMetadata metadata, HoodieTable table, String clusteringCommitTime) {
    List<HoodieWriteStat> writeStats = metadata.getPartitionToWriteStats().entrySet().stream().flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
    if (writeStats.stream().mapToLong(s -> s.getTotalWriteErrors()).sum() > 0) {
        throw new HoodieClusteringException("Clustering failed to write to files:" + writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(s -> s.getFileId()).collect(Collectors.joining(",")));
    }
    final HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringCommitTime);
    try {
        this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());
        finalizeWrite(table, clusteringCommitTime, writeStats);
        // Update table's metadata (table)
        updateTableMetadata(table, metadata, clusteringInstant);
        // Update tables' metadata indexes
        // NOTE: This overlaps w/ metadata table (above) and will be reconciled in the future
        table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
        LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
        table.getActiveTimeline().transitionReplaceInflightToComplete(HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    } catch (Exception e) {
        throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
    } finally {
        this.txnManager.endTransaction(Option.of(clusteringInstant));
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (clusteringTimer != null) {
        long durationInMs = metrics.getDurationInMs(clusteringTimer.stop());
        try {
            metrics.updateCommitMetrics(HoodieActiveTimeline.parseDateFromInstantTime(clusteringCommitTime).getTime(), durationInMs, metadata, HoodieActiveTimeline.REPLACE_COMMIT_ACTION);
        } catch (ParseException e) {
            throw new HoodieCommitException("Commit time is not of valid format. Failed to commit compaction " + config.getBasePath() + " at time " + clusteringCommitTime, e);
        }
    }
    LOG.info("Clustering successfully on commit " + clusteringCommitTime);
}
Also used : DistributedRegistry(org.apache.hudi.metrics.DistributedRegistry) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Logger(org.apache.log4j.Logger) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Registry(org.apache.hudi.common.metrics.Registry) ParseException(java.text.ParseException) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) TableServiceType(org.apache.hudi.common.model.TableServiceType) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) SparkUpgradeDowngradeHelper(org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper) CompactHelpers(org.apache.hudi.table.action.compact.CompactHelpers) SparkConf(org.apache.spark.SparkConf) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) TransactionUtils(org.apache.hudi.client.utils.TransactionUtils) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) EmbeddedTimelineService(org.apache.hudi.client.embedded.EmbeddedTimelineService) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Timer(com.codahale.metrics.Timer) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) LogManager(org.apache.log4j.LogManager) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ParseException(java.text.ParseException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) ParseException(java.text.ParseException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException)

Example 80 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class SparkRDDWriteClient method compact.

@Override
protected HoodieWriteMetadata<JavaRDD<WriteStatus>> compact(String compactionInstantTime, boolean shouldComplete) {
    HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, true);
    preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
    HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
    if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
        table.rollbackInflightCompaction(inflightInstant, commitToRollback -> getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
        table.getMetaClient().reloadActiveTimeline();
    }
    compactionTimer = metrics.getCompactionCtx();
    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = table.compact(context, compactionInstantTime);
    HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeMetadata.clone(HoodieJavaRDD.getJavaRDD(writeMetadata.getWriteStatuses()));
    if (shouldComplete && compactionMetadata.getCommitMetadata().isPresent()) {
        completeTableService(TableServiceType.COMPACT, compactionMetadata.getCommitMetadata().get(), table, compactionInstantTime);
    }
    return compactionMetadata;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) JavaRDD(org.apache.spark.api.java.JavaRDD)

Aggregations

HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)323 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)129 ArrayList (java.util.ArrayList)118 List (java.util.List)116 IOException (java.io.IOException)112 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)104 Test (org.junit.jupiter.api.Test)97 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)96 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)89 Map (java.util.Map)84 Option (org.apache.hudi.common.util.Option)84 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)84 Collectors (java.util.stream.Collectors)83 HashMap (java.util.HashMap)81 Path (org.apache.hadoop.fs.Path)78 Pair (org.apache.hudi.common.util.collection.Pair)71 Logger (org.apache.log4j.Logger)67 LogManager (org.apache.log4j.LogManager)66 HoodieIOException (org.apache.hudi.exception.HoodieIOException)65 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)61