Search in sources :

Example 6 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class CommitUtils method buildMetadataFromStats.

private static HoodieCommitMetadata buildMetadataFromStats(List<HoodieWriteStat> writeStats, Map<String, List<String>> partitionToReplaceFileIds, String commitActionType, WriteOperationType operationType) {
    final HoodieCommitMetadata commitMetadata;
    if (commitActionType == HoodieTimeline.REPLACE_COMMIT_ACTION) {
        HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
        replaceMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
        commitMetadata = replaceMetadata;
    } else {
        commitMetadata = new HoodieCommitMetadata();
    }
    for (HoodieWriteStat writeStat : writeStats) {
        String partition = writeStat.getPartitionPath();
        commitMetadata.addWriteStat(partition, writeStat);
    }
    LOG.info("Creating  metadata for " + operationType + " numWriteStats:" + writeStats.size() + "numReplaceFileIds:" + partitionToReplaceFileIds.values().stream().mapToInt(e -> e.size()).sum());
    return commitMetadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HashMap(java.util.HashMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Map(java.util.Map) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) LogManager(org.apache.log4j.LogManager) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 7 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class HiveTestUtil method createReplaceCommit.

public static void createReplaceCommit(String instantTime, String partitions, WriteOperationType type, boolean isParquetSchemaSimple, boolean useSchemaFromCommitMetadata) throws IOException {
    HoodieReplaceCommitMetadata replaceCommitMetadata = new HoodieReplaceCommitMetadata();
    addSchemaToCommitMetadata(replaceCommitMetadata, isParquetSchemaSimple, useSchemaFromCommitMetadata);
    replaceCommitMetadata.setOperationType(type);
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    partitionToReplaceFileIds.put(partitions, new ArrayList<>());
    replaceCommitMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
    createReplaceCommitFile(replaceCommitMetadata, instantTime);
}
Also used : HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 8 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class SparkRDDWriteClient method completeClustering.

private void completeClustering(HoodieReplaceCommitMetadata metadata, HoodieTable table, String clusteringCommitTime) {
    List<HoodieWriteStat> writeStats = metadata.getPartitionToWriteStats().entrySet().stream().flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
    if (writeStats.stream().mapToLong(s -> s.getTotalWriteErrors()).sum() > 0) {
        throw new HoodieClusteringException("Clustering failed to write to files:" + writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(s -> s.getFileId()).collect(Collectors.joining(",")));
    }
    final HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringCommitTime);
    try {
        this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());
        finalizeWrite(table, clusteringCommitTime, writeStats);
        // Update table's metadata (table)
        updateTableMetadata(table, metadata, clusteringInstant);
        // Update tables' metadata indexes
        // NOTE: This overlaps w/ metadata table (above) and will be reconciled in the future
        table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
        LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
        table.getActiveTimeline().transitionReplaceInflightToComplete(HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    } catch (Exception e) {
        throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
    } finally {
        this.txnManager.endTransaction(Option.of(clusteringInstant));
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (clusteringTimer != null) {
        long durationInMs = metrics.getDurationInMs(clusteringTimer.stop());
        try {
            metrics.updateCommitMetrics(HoodieActiveTimeline.parseDateFromInstantTime(clusteringCommitTime).getTime(), durationInMs, metadata, HoodieActiveTimeline.REPLACE_COMMIT_ACTION);
        } catch (ParseException e) {
            throw new HoodieCommitException("Commit time is not of valid format. Failed to commit compaction " + config.getBasePath() + " at time " + clusteringCommitTime, e);
        }
    }
    LOG.info("Clustering successfully on commit " + clusteringCommitTime);
}
Also used : DistributedRegistry(org.apache.hudi.metrics.DistributedRegistry) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Logger(org.apache.log4j.Logger) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Registry(org.apache.hudi.common.metrics.Registry) ParseException(java.text.ParseException) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) TableServiceType(org.apache.hudi.common.model.TableServiceType) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) SparkUpgradeDowngradeHelper(org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper) CompactHelpers(org.apache.hudi.table.action.compact.CompactHelpers) SparkConf(org.apache.spark.SparkConf) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) TransactionUtils(org.apache.hudi.client.utils.TransactionUtils) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) EmbeddedTimelineService(org.apache.hudi.client.embedded.EmbeddedTimelineService) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Timer(com.codahale.metrics.Timer) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) LogManager(org.apache.log4j.LogManager) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ParseException(java.text.ParseException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) ParseException(java.text.ParseException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException)

Example 9 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestIncrementalFSViewSync method addReplaceInstant.

private List<String> addReplaceInstant(HoodieTableMetaClient metaClient, String instant, List<Pair<String, HoodieWriteStat>> writeStats, Map<String, List<String>> partitionToReplaceFileIds) throws IOException {
    // created requested
    HoodieInstant newRequestedInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, instant);
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setOperationType(WriteOperationType.UNKNOWN.name()).build();
    metaClient.getActiveTimeline().saveToPendingReplaceCommit(newRequestedInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
    metaClient.reloadActiveTimeline();
    // transition to inflight
    HoodieInstant inflightInstant = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(newRequestedInstant, Option.empty());
    // transition to replacecommit
    HoodieReplaceCommitMetadata replaceCommitMetadata = new HoodieReplaceCommitMetadata();
    writeStats.forEach(e -> replaceCommitMetadata.addWriteStat(e.getKey(), e.getValue()));
    replaceCommitMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
    metaClient.getActiveTimeline().saveAsComplete(inflightInstant, Option.of(replaceCommitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    return writeStats.stream().map(e -> e.getValue().getPath()).collect(Collectors.toList());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 10 with HoodieReplaceCommitMetadata

use of org.apache.hudi.common.model.HoodieReplaceCommitMetadata in project hudi by apache.

the class TestCommitUtils method testCommitMetadataCreation.

@Test
public void testCommitMetadataCreation() {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    writeStats.add(createWriteStat("p1", "f1"));
    writeStats.add(createWriteStat("p2", "f2"));
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add("f0");
    partitionToReplaceFileIds.put("p1", replacedFileIds);
    HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeStats, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT, TRIP_SCHEMA, HoodieTimeline.DELTA_COMMIT_ACTION);
    assertFalse(commitMetadata instanceof HoodieReplaceCommitMetadata);
    assertEquals(2, commitMetadata.getPartitionToWriteStats().size());
    assertEquals("f1", commitMetadata.getPartitionToWriteStats().get("p1").get(0).getFileId());
    assertEquals("f2", commitMetadata.getPartitionToWriteStats().get("p2").get(0).getFileId());
    assertEquals(WriteOperationType.INSERT, commitMetadata.getOperationType());
    assertEquals(TRIP_SCHEMA, commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY));
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)19 List (java.util.List)14 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)13 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)11 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)10 Map (java.util.Map)8 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)8 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)6 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)6 Option (org.apache.hudi.common.util.Option)6 LogManager (org.apache.log4j.LogManager)6 Logger (org.apache.log4j.Logger)6 Collectors (java.util.stream.Collectors)5 WriteOperationType (org.apache.hudi.common.model.WriteOperationType)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 IOException (java.io.IOException)4 Path (org.apache.hadoop.fs.Path)4 FSUtils (org.apache.hudi.common.fs.FSUtils)4 Set (java.util.Set)3