Search in sources :

Example 11 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestHoodieRealtimeRecordReader method createReplaceCommitFile.

private void createReplaceCommitFile(java.nio.file.Path basePath, String commitNumber, String partitionPath, String filePath, String fileId, Map<String, List<String>> partitionToReplaceFileIds) throws IOException {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    HoodieWriteStat writeStat = createHoodieWriteStat(basePath, commitNumber, partitionPath, filePath, fileId);
    writeStats.add(writeStat);
    HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
    replaceMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
    writeStats.forEach(stat -> replaceMetadata.addWriteStat(partitionPath, stat));
    File file = basePath.resolve(".hoodie").resolve(commitNumber + ".replacecommit").toFile();
    file.createNewFile();
    FileOutputStream fileOutputStream = new FileOutputStream(file);
    fileOutputStream.write(replaceMetadata.toJsonString().getBytes(StandardCharsets.UTF_8));
    fileOutputStream.flush();
    fileOutputStream.close();
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 12 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class HoodieTestTable method doCluster.

public HoodieReplaceCommitMetadata doCluster(String commitTime, Map<String, List<String>> partitionToReplaceFileIds, List<String> partitions, int filesPerPartition) throws Exception {
    HoodieTestTableState testTableState = getTestTableStateWithPartitionFileInfo(CLUSTER, metaClient.getTableType(), commitTime, partitions, filesPerPartition);
    this.currentInstantTime = commitTime;
    Map<String, List<Pair<String, Integer>>> partitionToReplaceFileIdsWithLength = new HashMap<>();
    for (Map.Entry<String, List<String>> entry : partitionToReplaceFileIds.entrySet()) {
        String partition = entry.getKey();
        partitionToReplaceFileIdsWithLength.put(entry.getKey(), new ArrayList<>());
        for (String fileId : entry.getValue()) {
            int length = 100 + RANDOM.nextInt(500);
            partitionToReplaceFileIdsWithLength.get(partition).add(Pair.of(fileId, length));
        }
    }
    List<HoodieWriteStat> writeStats = generateHoodieWriteStatForPartition(testTableState.getPartitionToBaseFileInfoMap(commitTime), commitTime, false);
    for (String partition : testTableState.getPartitionToBaseFileInfoMap(commitTime).keySet()) {
        this.withBaseFilesInPartition(partition, testTableState.getPartitionToBaseFileInfoMap(commitTime).get(partition));
    }
    HoodieReplaceCommitMetadata replaceMetadata = (HoodieReplaceCommitMetadata) buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), CLUSTER, EMPTY_STRING, REPLACE_COMMIT_ACTION);
    addReplaceCommit(commitTime, Option.empty(), Option.empty(), replaceMetadata);
    return replaceMetadata;
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 13 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestCommitUtils method testReplaceMetadataCreation.

@Test
public void testReplaceMetadataCreation() {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    writeStats.add(createWriteStat("p1", "f1"));
    writeStats.add(createWriteStat("p2", "f2"));
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add("f0");
    partitionToReplaceFileIds.put("p1", replacedFileIds);
    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT, TRIP_SCHEMA, HoodieTimeline.REPLACE_COMMIT_ACTION);
    assertTrue(commitMetadata instanceof HoodieReplaceCommitMetadata);
    HoodieReplaceCommitMetadata replaceCommitMetadata = (HoodieReplaceCommitMetadata) commitMetadata;
    assertEquals(1, replaceCommitMetadata.getPartitionToReplaceFileIds().size());
    assertEquals("f0", replaceCommitMetadata.getPartitionToReplaceFileIds().get("p1").get(0));
    assertEquals(2, commitMetadata.getPartitionToWriteStats().size());
    assertEquals("f1", commitMetadata.getPartitionToWriteStats().get("p1").get(0).getFileId());
    assertEquals("f2", commitMetadata.getPartitionToWriteStats().get("p2").get(0).getFileId());
    assertEquals(WriteOperationType.INSERT, commitMetadata.getOperationType());
    assertEquals(TRIP_SCHEMA, commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY));
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) Test(org.junit.jupiter.api.Test)

Example 14 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestHoodieDeltaStreamer method testCleanerDeleteReplacedDataWithArchive.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean) throws Exception {
    String tableBasePath = dfsBasePath + "/cleanerDeleteReplacedDataWithArchive" + asyncClean;
    int totalRecords = 3000;
    // Step 1 : Prepare and insert data without archival and cleaner.
    // Make sure that there are 6 commits including 2 replacecommits completed.
    HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
    cfg.continuousMode = true;
    cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
    cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "true", "2", "", ""));
    cfg.configs.add(String.format("%s=%s", HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT.key(), "0"));
    cfg.configs.add(String.format("%s=%s", HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key(), "1"));
    HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
    deltaStreamerTestRunner(ds, cfg, (r) -> {
        TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
        return true;
    });
    TestHelpers.assertAtLeastNCommits(6, tableBasePath, dfs);
    TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
    // Step 2 : Get the first replacecommit and extract the corresponding replaced file IDs.
    HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(dfs.getConf()).setBasePath(tableBasePath).build();
    HoodieTimeline replacedTimeline = meta.reloadActiveTimeline().getCompletedReplaceTimeline();
    Option<HoodieInstant> firstReplaceHoodieInstant = replacedTimeline.nthFromLastInstant(1);
    assertTrue(firstReplaceHoodieInstant.isPresent());
    Option<byte[]> firstReplaceHoodieInstantDetails = replacedTimeline.getInstantDetails(firstReplaceHoodieInstant.get());
    HoodieReplaceCommitMetadata firstReplaceMetadata = HoodieReplaceCommitMetadata.fromBytes(firstReplaceHoodieInstantDetails.get(), HoodieReplaceCommitMetadata.class);
    Map<String, List<String>> partitionToReplaceFileIds = firstReplaceMetadata.getPartitionToReplaceFileIds();
    String partitionName = null;
    List replacedFileIDs = null;
    for (Map.Entry entry : partitionToReplaceFileIds.entrySet()) {
        partitionName = String.valueOf(entry.getKey());
        replacedFileIDs = (List) entry.getValue();
    }
    assertNotNull(partitionName);
    assertNotNull(replacedFileIDs);
    // Step 3 : Based to replacedFileIDs , get the corresponding complete path.
    ArrayList<String> replacedFilePaths = new ArrayList<>();
    Path partitionPath = new Path(meta.getBasePath(), partitionName);
    RemoteIterator<LocatedFileStatus> hoodieFiles = meta.getFs().listFiles(partitionPath, true);
    while (hoodieFiles.hasNext()) {
        LocatedFileStatus f = hoodieFiles.next();
        String file = f.getPath().toUri().toString();
        for (Object replacedFileID : replacedFileIDs) {
            if (file.contains(String.valueOf(replacedFileID))) {
                replacedFilePaths.add(file);
            }
        }
    }
    assertFalse(replacedFilePaths.isEmpty());
    // Step 4 : Insert 1 record and trigger sync/async cleaner and archive.
    List<String> configs = getAsyncServicesConfigs(1, "true", "true", "2", "", "");
    configs.add(String.format("%s=%s", HoodieCompactionConfig.CLEANER_POLICY.key(), "KEEP_LATEST_COMMITS"));
    configs.add(String.format("%s=%s", HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1"));
    configs.add(String.format("%s=%s", HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2"));
    configs.add(String.format("%s=%s", HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "3"));
    configs.add(String.format("%s=%s", HoodieCompactionConfig.ASYNC_CLEAN.key(), asyncClean));
    configs.add(String.format("%s=%s", HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key(), "1"));
    if (asyncClean) {
        configs.add(String.format("%s=%s", HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name()));
        configs.add(String.format("%s=%s", HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(), HoodieFailedWritesCleaningPolicy.LAZY.name()));
        configs.add(String.format("%s=%s", HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), InProcessLockProvider.class.getName()));
    }
    cfg.configs = configs;
    cfg.continuousMode = false;
    ds = new HoodieDeltaStreamer(cfg, jsc);
    ds.sync();
    // Step 5 : Make sure that firstReplaceHoodieInstant is archived.
    long count = meta.reloadActiveTimeline().getCompletedReplaceTimeline().getInstants().filter(instant -> firstReplaceHoodieInstant.get().equals(instant)).count();
    assertEquals(0, count);
    // Step 6 : All the replaced files in firstReplaceHoodieInstant should be deleted through sync/async cleaner.
    for (String replacedFilePath : replacedFilePaths) {
        assertFalse(meta.getFs().exists(new Path(replacedFilePath)));
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieHiveClient(org.apache.hudi.hive.HoodieHiveClient) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieException(org.apache.hudi.exception.HoodieException) DataSourceWriteOptions(org.apache.hudi.DataSourceWriteOptions) AfterAll(org.junit.jupiter.api.AfterAll) Future(java.util.concurrent.Future) DFSPropertiesConfiguration(org.apache.hudi.common.config.DFSPropertiesConfiguration) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) WriteConcurrencyMode(org.apache.hudi.common.model.WriteConcurrencyMode) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataTypes(org.apache.spark.sql.types.DataTypes) StructField(org.apache.spark.sql.types.StructField) Schema(org.apache.avro.Schema) UtilitiesTestBase(org.apache.hudi.utilities.testutils.UtilitiesTestBase) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) Arguments(org.junit.jupiter.params.provider.Arguments) Executors(java.util.concurrent.Executors) Stream(java.util.stream.Stream) SqlQueryBasedTransformer(org.apache.hudi.utilities.transform.SqlQueryBasedTransformer) DataSourceReadOptions(org.apache.hudi.DataSourceReadOptions) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) org.apache.spark.sql.functions(org.apache.spark.sql.functions) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Dataset(org.apache.spark.sql.Dataset) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) AvroConversionUtils(org.apache.hudi.AvroConversionUtils) Option(org.apache.hudi.common.util.Option) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CsvDFSSource(org.apache.hudi.utilities.sources.CsvDFSSource) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) SparkSession(org.apache.spark.sql.SparkSession) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) JsonKafkaSource(org.apache.hudi.utilities.sources.JsonKafkaSource) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) TestDataSource(org.apache.hudi.utilities.sources.TestDataSource) Connection(java.sql.Connection) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) HoodieDeltaStreamer(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer) OverwriteWithLatestAvroPayload(org.apache.hudi.common.model.OverwriteWithLatestAvroPayload) HOODIE_RECORD_NAMESPACE(org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieConfig(org.apache.hudi.common.config.HoodieConfig) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Transformer(org.apache.hudi.utilities.transform.Transformer) Path(org.apache.hadoop.fs.Path) HoodieIncrSource(org.apache.hudi.utilities.sources.HoodieIncrSource) MethodSource(org.junit.jupiter.params.provider.MethodSource) HOODIE_RECORD_STRUCT_NAME(org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME) TableNotFoundException(org.apache.hudi.exception.TableNotFoundException) SchemaProvider(org.apache.hudi.utilities.schema.SchemaProvider) SparkAvroPostProcessor(org.apache.hudi.utilities.schema.SparkAvroPostProcessor) DummySchemaProvider(org.apache.hudi.utilities.DummySchemaProvider) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) InputBatch(org.apache.hudi.utilities.sources.InputBatch) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) TopicExistsException(org.apache.kafka.common.errors.TopicExistsException) List(java.util.List) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) AnalysisException(org.apache.spark.sql.AnalysisException) SourceConfigs(org.apache.hudi.utilities.testutils.sources.config.SourceConfigs) InProcessLockProvider(org.apache.hudi.client.transaction.lock.InProcessLockProvider) CHECKPOINT_KEY(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY) HashMap(java.util.HashMap) ParquetDFSSource(org.apache.hudi.utilities.sources.ParquetDFSSource) Function(java.util.function.Function) DistributedTestDataSource(org.apache.hudi.utilities.testutils.sources.DistributedTestDataSource) TestParquetDFSSourceEmptyBatch(org.apache.hudi.utilities.sources.TestParquetDFSSourceEmptyBatch) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) SqlSource(org.apache.hudi.utilities.sources.SqlSource) GenericRecord(org.apache.avro.generic.GenericRecord) TypedProperties(org.apache.hudi.common.config.TypedProperties) JdbcTestUtils(org.apache.hudi.utilities.testutils.JdbcTestUtils) DeltaSync(org.apache.hudi.utilities.deltastreamer.DeltaSync) SQLContext(org.apache.spark.sql.SQLContext) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig) HoodieLockConfig(org.apache.hudi.config.HoodieLockConfig) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) TimeUnit(java.util.concurrent.TimeUnit) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) FilebasedSchemaProvider(org.apache.hudi.utilities.schema.FilebasedSchemaProvider) UDF4(org.apache.spark.sql.api.java.UDF4) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) JdbcSource(org.apache.hudi.utilities.sources.JdbcSource) LogManager(org.apache.log4j.LogManager) HoodieClusteringJob(org.apache.hudi.utilities.HoodieClusteringJob) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) ORCDFSSource(org.apache.hudi.utilities.sources.ORCDFSSource) DriverManager(java.sql.DriverManager) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieDeltaStreamer(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) HashMap(java.util.HashMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 15 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class MetadataConversionUtils method createMetaWrapper.

public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInstant, HoodieTableMetaClient metaClient) throws IOException {
    HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
    archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
    archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
    switch(hoodieInstant.getAction()) {
        case HoodieTimeline.CLEAN_ACTION:
            {
                if (hoodieInstant.isCompleted()) {
                    archivedMetaWrapper.setHoodieCleanMetadata(CleanerUtils.getCleanerMetadata(metaClient, hoodieInstant));
                } else {
                    archivedMetaWrapper.setHoodieCleanerPlan(CleanerUtils.getCleanerPlan(metaClient, hoodieInstant));
                }
                archivedMetaWrapper.setActionType(ActionType.clean.name());
                break;
            }
        case HoodieTimeline.COMMIT_ACTION:
            {
                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class);
                archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(commitMetadata));
                archivedMetaWrapper.setActionType(ActionType.commit.name());
                break;
            }
        case HoodieTimeline.DELTA_COMMIT_ACTION:
            {
                HoodieCommitMetadata deltaCommitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class);
                archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(deltaCommitMetadata));
                archivedMetaWrapper.setActionType(ActionType.deltacommit.name());
                break;
            }
        case HoodieTimeline.REPLACE_COMMIT_ACTION:
            {
                if (hoodieInstant.isCompleted()) {
                    HoodieReplaceCommitMetadata replaceCommitMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieReplaceCommitMetadata.class);
                    archivedMetaWrapper.setHoodieReplaceCommitMetadata(ReplaceArchivalHelper.convertReplaceCommitMetadata(replaceCommitMetadata));
                } else if (hoodieInstant.isInflight()) {
                    // inflight replacecommit files have the same meta data body as HoodieCommitMetadata
                    // so we could re-use it without further creating an inflight extension.
                    // Or inflight replacecommit files are empty under clustering circumstance
                    Option<HoodieCommitMetadata> inflightCommitMetadata = getInflightReplaceMetadata(metaClient, hoodieInstant);
                    if (inflightCommitMetadata.isPresent()) {
                        archivedMetaWrapper.setHoodieInflightReplaceMetadata(convertCommitMetadata(inflightCommitMetadata.get()));
                    }
                } else {
                    // we may have cases with empty HoodieRequestedReplaceMetadata e.g. insert_overwrite_table or insert_overwrite
                    // without clustering. However, we should revisit the requested commit file standardization
                    Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata = getRequestedReplaceMetadata(metaClient, hoodieInstant);
                    if (requestedReplaceMetadata.isPresent()) {
                        archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata.get());
                    }
                }
                archivedMetaWrapper.setActionType(ActionType.replacecommit.name());
                break;
            }
        case HoodieTimeline.ROLLBACK_ACTION:
            {
                if (hoodieInstant.isCompleted()) {
                    archivedMetaWrapper.setHoodieRollbackMetadata(TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class));
                }
                archivedMetaWrapper.setActionType(ActionType.rollback.name());
                break;
            }
        case HoodieTimeline.SAVEPOINT_ACTION:
            {
                archivedMetaWrapper.setHoodieSavePointMetadata(TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class));
                archivedMetaWrapper.setActionType(ActionType.savepoint.name());
                break;
            }
        case HoodieTimeline.COMPACTION_ACTION:
            {
                HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, hoodieInstant.getTimestamp());
                archivedMetaWrapper.setHoodieCompactionPlan(plan);
                archivedMetaWrapper.setActionType(ActionType.compaction.name());
                break;
            }
        default:
            {
                throw new UnsupportedOperationException("Action not fully supported yet");
            }
    }
    return archivedMetaWrapper;
}
Also used : HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)19 List (java.util.List)14 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)13 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)11 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)10 Map (java.util.Map)8 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)8 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)6 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)6 Option (org.apache.hudi.common.util.Option)6 LogManager (org.apache.log4j.LogManager)6 Logger (org.apache.log4j.Logger)6 Collectors (java.util.stream.Collectors)5 WriteOperationType (org.apache.hudi.common.model.WriteOperationType)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 IOException (java.io.IOException)4 Path (org.apache.hadoop.fs.Path)4 FSUtils (org.apache.hudi.common.fs.FSUtils)4 Set (java.util.Set)3