Search in sources :

Example 21 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestIncrementalFSViewSync method addInstant.

private List<String> addInstant(HoodieTableMetaClient metaClient, String instant, boolean deltaCommit, String baseInstant) throws IOException {
    List<Pair<String, HoodieWriteStat>> writeStats = generateDataForInstant(baseInstant, instant, deltaCommit);
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    writeStats.forEach(e -> metadata.addWriteStat(e.getKey(), e.getValue()));
    HoodieInstant inflightInstant = new HoodieInstant(true, deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION, instant);
    metaClient.getActiveTimeline().createNewInstant(inflightInstant);
    metaClient.getActiveTimeline().saveAsComplete(inflightInstant, Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    /*
    // Delete pending compaction if present
    metaClient.getFs().delete(new Path(metaClient.getMetaPath(),
        new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant).getFileName()));
     */
    return writeStats.stream().map(e -> e.getValue().getPath()).collect(Collectors.toList());
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Pair(org.apache.hudi.common.util.collection.Pair)

Example 22 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestIncrementalFSViewSync method generateDataForInstant.

private List<Pair<String, HoodieWriteStat>> generateDataForInstant(String baseInstant, String instant, boolean deltaCommit, List<String> fileIds) {
    return partitions.stream().flatMap(p -> fileIds.stream().map(f -> {
        try {
            File file = new File(basePath + "/" + p + "/" + (deltaCommit ? FSUtils.makeLogFileName(f, ".log", baseInstant, Integer.parseInt(instant), TEST_WRITE_TOKEN) : FSUtils.makeDataFileName(instant, TEST_WRITE_TOKEN, f)));
            file.createNewFile();
            HoodieWriteStat w = new HoodieWriteStat();
            w.setFileId(f);
            w.setPath(String.format("%s/%s", p, file.getName()));
            return Pair.of(p, w);
        } catch (IOException e) {
            throw new HoodieException(e);
        }
    })).collect(Collectors.toList());
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) File(java.io.File)

Example 23 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestTimelineUtils method getCommitMetadata.

private byte[] getCommitMetadata(String basePath, String partition, String commitTs, int count, Map<String, String> extraMetadata) throws IOException {
    HoodieCommitMetadata commit = new HoodieCommitMetadata();
    for (int i = 1; i <= count; i++) {
        HoodieWriteStat stat = new HoodieWriteStat();
        stat.setFileId(i + "");
        stat.setPartitionPath(Paths.get(basePath, partition).toString());
        stat.setPath(commitTs + "." + i + metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
        commit.addWriteStat(partition, stat);
    }
    for (Map.Entry<String, String> extraEntries : extraMetadata.entrySet()) {
        commit.addMetadata(extraEntries.getKey(), extraEntries.getValue());
    }
    return commit.toJsonString().getBytes(StandardCharsets.UTF_8);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) Map(java.util.Map)

Example 24 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieTestTable method generateHoodieWriteStatForPartition.

public static List<HoodieWriteStat> generateHoodieWriteStatForPartition(Map<String, List<Pair<String, Integer>>> partitionToFileIdMap, String commitTime, boolean bootstrap) {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    for (Map.Entry<String, List<Pair<String, Integer>>> entry : partitionToFileIdMap.entrySet()) {
        String partition = entry.getKey();
        for (Pair<String, Integer> fileIdInfo : entry.getValue()) {
            HoodieWriteStat writeStat = new HoodieWriteStat();
            String fileName = bootstrap ? fileIdInfo.getKey() : FileCreateUtils.baseFileName(commitTime, fileIdInfo.getKey());
            writeStat.setFileId(fileName);
            writeStat.setPartitionPath(partition);
            writeStat.setPath(partition + "/" + fileName);
            writeStat.setTotalWriteBytes(fileIdInfo.getValue());
            writeStat.setFileSizeInBytes(fileIdInfo.getValue());
            writeStats.add(writeStat);
        }
    }
    return writeStats;
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap)

Example 25 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieTestTable method generateHoodieWriteStatForPartitionLogFiles.

/**
 * Returns the write stats for log files in the partition. Since log file has version associated with it, the {@param partitionToFileIdMap}
 * contains list of Pair<String, Integer[]> where the Integer[] array has both file version and file size.
 */
private static List<HoodieWriteStat> generateHoodieWriteStatForPartitionLogFiles(Map<String, List<Pair<String, Integer[]>>> partitionToFileIdMap, String commitTime, boolean bootstrap) {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    if (partitionToFileIdMap == null) {
        return writeStats;
    }
    for (Map.Entry<String, List<Pair<String, Integer[]>>> entry : partitionToFileIdMap.entrySet()) {
        String partition = entry.getKey();
        for (Pair<String, Integer[]> fileIdInfo : entry.getValue()) {
            HoodieWriteStat writeStat = new HoodieWriteStat();
            String fileName = bootstrap ? fileIdInfo.getKey() : FileCreateUtils.logFileName(commitTime, fileIdInfo.getKey(), fileIdInfo.getValue()[0]);
            writeStat.setFileId(fileName);
            writeStat.setPartitionPath(partition);
            writeStat.setPath(partition + "/" + fileName);
            writeStat.setTotalWriteBytes(fileIdInfo.getValue()[1]);
            writeStat.setFileSizeInBytes(fileIdInfo.getValue()[1]);
            writeStats.add(writeStat);
        }
    }
    return writeStats;
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap)

Aggregations

HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)74 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)42 List (java.util.List)38 ArrayList (java.util.ArrayList)33 HashMap (java.util.HashMap)32 Map (java.util.Map)32 Path (org.apache.hadoop.fs.Path)28 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 IOException (java.io.IOException)22 Option (org.apache.hudi.common.util.Option)19 Collectors (java.util.stream.Collectors)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 WriteStatus (org.apache.hudi.client.WriteStatus)17 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)17 LogManager (org.apache.log4j.LogManager)16 Logger (org.apache.log4j.Logger)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)15 FileSlice (org.apache.hudi.common.model.FileSlice)14 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)14