Search in sources :

Example 16 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestCluster method createPartitions.

private HoodieCommitMetadata createPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, ZonedDateTime startFrom, String commitTime, String basePath) throws IOException, URISyntaxException {
    startFrom = startFrom.truncatedTo(ChronoUnit.DAYS);
    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
    for (int i = 0; i < numberOfPartitions; i++) {
        String partitionPath = startFrom.format(dtfOut);
        Path partPath = new Path(basePath + "/" + partitionPath);
        dfsCluster.getFileSystem().makeQualified(partPath);
        dfsCluster.getFileSystem().mkdirs(partPath);
        List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime);
        startFrom = startFrom.minusDays(1);
        writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
    }
    return commitMetadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat)

Example 17 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieMergeHandle method init.

/**
 * Extract old file path, initialize StorageWriter and WriteStatus.
 */
private void init(String fileId, String partitionPath, HoodieBaseFile baseFileToMerge) {
    LOG.info("partitionPath:" + partitionPath + ", fileId to be merged:" + fileId);
    this.baseFileToMerge = baseFileToMerge;
    this.writtenRecordKeys = new HashSet<>();
    writeStatus.setStat(new HoodieWriteStat());
    try {
        String latestValidFilePath = baseFileToMerge.getFileName();
        writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
        HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
        partitionMetadata.trySave(getPartitionId());
        String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
        makeOldAndNewFilePaths(partitionPath, latestValidFilePath, newFileName);
        LOG.info(String.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(), newFilePath.toString()));
        // file name is same for all records, in this bunch
        writeStatus.setFileId(fileId);
        writeStatus.setPartitionPath(partitionPath);
        writeStatus.getStat().setPartitionPath(partitionPath);
        writeStatus.getStat().setFileId(fileId);
        setWriteStatusPath();
        // Create Marker file
        createMarkerFile(partitionPath, newFileName);
        // Create the writer for writing the new version file
        fileWriter = createNewFileWriter(instantTime, newFilePath, hoodieTable, config, writeSchemaWithMetaFields, taskContextSupplier);
    } catch (IOException io) {
        LOG.error("Error in update task at commit " + instantTime, io);
        writeStatus.setGlobalError(io);
        throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit " + instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 18 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class SparkRDDWriteClient method completeClustering.

private void completeClustering(HoodieReplaceCommitMetadata metadata, HoodieTable table, String clusteringCommitTime) {
    List<HoodieWriteStat> writeStats = metadata.getPartitionToWriteStats().entrySet().stream().flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
    if (writeStats.stream().mapToLong(s -> s.getTotalWriteErrors()).sum() > 0) {
        throw new HoodieClusteringException("Clustering failed to write to files:" + writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(s -> s.getFileId()).collect(Collectors.joining(",")));
    }
    final HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringCommitTime);
    try {
        this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());
        finalizeWrite(table, clusteringCommitTime, writeStats);
        // Update table's metadata (table)
        updateTableMetadata(table, metadata, clusteringInstant);
        // Update tables' metadata indexes
        // NOTE: This overlaps w/ metadata table (above) and will be reconciled in the future
        table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
        LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
        table.getActiveTimeline().transitionReplaceInflightToComplete(HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    } catch (Exception e) {
        throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
    } finally {
        this.txnManager.endTransaction(Option.of(clusteringInstant));
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (clusteringTimer != null) {
        long durationInMs = metrics.getDurationInMs(clusteringTimer.stop());
        try {
            metrics.updateCommitMetrics(HoodieActiveTimeline.parseDateFromInstantTime(clusteringCommitTime).getTime(), durationInMs, metadata, HoodieActiveTimeline.REPLACE_COMMIT_ACTION);
        } catch (ParseException e) {
            throw new HoodieCommitException("Commit time is not of valid format. Failed to commit compaction " + config.getBasePath() + " at time " + clusteringCommitTime, e);
        }
    }
    LOG.info("Clustering successfully on commit " + clusteringCommitTime);
}
Also used : DistributedRegistry(org.apache.hudi.metrics.DistributedRegistry) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Logger(org.apache.log4j.Logger) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Registry(org.apache.hudi.common.metrics.Registry) ParseException(java.text.ParseException) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) TableServiceType(org.apache.hudi.common.model.TableServiceType) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) SparkUpgradeDowngradeHelper(org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper) CompactHelpers(org.apache.hudi.table.action.compact.CompactHelpers) SparkConf(org.apache.spark.SparkConf) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) TransactionUtils(org.apache.hudi.client.utils.TransactionUtils) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) EmbeddedTimelineService(org.apache.hudi.client.embedded.EmbeddedTimelineService) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) Timer(com.codahale.metrics.Timer) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) LogManager(org.apache.log4j.LogManager) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) ParseException(java.text.ParseException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) ParseException(java.text.ParseException) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException)

Example 19 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieTableFileSystemView method testHoodieTableFileSystemViewWithPendingClustering.

/**
 * create hoodie table like
 * .
 * ├── .hoodie
 * │   ├── .aux
 * │   │   └── .bootstrap
 * │   │       ├── .fileids
 * │   │       └── .partitions
 * │   ├── .temp
 * │   ├── 1.commit
 * │   ├── 1.commit.requested
 * │   ├── 1.inflight
 * │   ├── 2.replacecommit
 * │   ├── 2.replacecommit.inflight
 * │   ├── 2.replacecommit.requested
 * │   ├── 3.commit
 * │   ├── 3.commit.requested
 * │   ├── 3.inflight
 * │   ├── archived
 * │   └── hoodie.properties
 * └── 2020
 *     └── 06
 *         └── 27
 *             ├── 5fe477d2-0150-46d4-833c-1e9cc8da9948_1-0-1_3.parquet
 *             ├── 7e3208c8-fdec-4254-9682-8fff1e51ee8d_1-0-1_2.parquet
 *             ├── e04b0e2d-1467-46b2-8ea6-f4fe950965a5_1-0-1_1.parquet
 *             └── f3936b66-b3db-4fc8-a6d0-b1a7559016e6_1-0-1_1.parquet
 *
 * First test fsView API with finished clustering:
 *  1. getLatestBaseFilesBeforeOrOn
 *  2. getBaseFileOn
 *  3. getLatestBaseFilesInRange
 *  4. getAllBaseFiles
 *  5. getLatestBaseFiles
 *
 * Then remove 2.replacecommit, 1.commit, 1.commit.requested, 1.inflight to simulate
 * pending clustering at the earliest position in the active timeline and test these APIs again.
 *
 * @throws IOException
 */
@Test
public void testHoodieTableFileSystemViewWithPendingClustering() throws IOException {
    List<String> latestBaseFilesBeforeOrOn;
    Option<HoodieBaseFile> baseFileOn;
    List<String> latestBaseFilesInRange;
    List<String> allBaseFiles;
    List<String> latestBaseFiles;
    List<String> latestBaseFilesPerPartition;
    String partitionPath = "2020/06/27";
    new File(basePath + "/" + partitionPath).mkdirs();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    // will create 5 fileId in partition.
    // fileId1 and fileId2 will be replaced by fileID3
    // fileId4 and fileId5 will be committed after clustering finished.
    String fileId1 = UUID.randomUUID().toString();
    String fileId2 = UUID.randomUUID().toString();
    String fileId3 = UUID.randomUUID().toString();
    String fileId4 = UUID.randomUUID().toString();
    String fileId5 = UUID.randomUUID().toString();
    assertFalse(roView.getLatestBaseFiles(partitionPath).anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2) || dfile.getFileId().equals(fileId3) || dfile.getFileId().equals(fileId4) || dfile.getFileId().equals(fileId5)), "No commit, should not find any data file");
    // first insert commit
    String commitTime1 = "1";
    String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
    String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
    new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
    new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
    // build writeStats
    HashMap<String, List<String>> partitionToFile1 = new HashMap<>();
    ArrayList<String> files1 = new ArrayList<>();
    files1.add(fileId1);
    files1.add(fileId2);
    partitionToFile1.put(partitionPath, files1);
    List<HoodieWriteStat> writeStats1 = buildWriteStats(partitionToFile1, commitTime1);
    HoodieCommitMetadata commitMetadata1 = CommitUtils.buildMetadata(writeStats1, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata1.toJsonString().getBytes(StandardCharsets.UTF_8)));
    commitTimeline.reload();
    // replace commit
    String commitTime2 = "2";
    String fileName3 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
    new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
    List<String> replacedFileIds = new ArrayList<>();
    replacedFileIds.add(fileId1);
    replacedFileIds.add(fileId2);
    partitionToReplaceFileIds.put(partitionPath, replacedFileIds);
    HashMap<String, List<String>> partitionToFile2 = new HashMap<>();
    ArrayList<String> files2 = new ArrayList<>();
    files2.add(fileId3);
    partitionToFile2.put(partitionPath, files2);
    List<HoodieWriteStat> writeStats2 = buildWriteStats(partitionToFile2, commitTime2);
    HoodieCommitMetadata commitMetadata2 = CommitUtils.buildMetadata(writeStats2, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata2.toJsonString().getBytes(StandardCharsets.UTF_8)));
    // another insert commit
    String commitTime3 = "3";
    String fileName4 = FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId4);
    new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
    HoodieInstant instant3 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime3);
    // build writeStats
    HashMap<String, List<String>> partitionToFile3 = new HashMap<>();
    ArrayList<String> files3 = new ArrayList<>();
    files3.add(fileId4);
    partitionToFile3.put(partitionPath, files3);
    List<HoodieWriteStat> writeStats3 = buildWriteStats(partitionToFile3, commitTime3);
    HoodieCommitMetadata commitMetadata3 = CommitUtils.buildMetadata(writeStats3, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
    saveAsComplete(commitTimeline, instant3, Option.of(commitMetadata3.toJsonString().getBytes(StandardCharsets.UTF_8)));
    metaClient.reloadActiveTimeline();
    refreshFsView();
    ArrayList<String> commits = new ArrayList<>();
    commits.add(commitTime1);
    commits.add(commitTime2);
    commits.add(commitTime3);
    // do check
    latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFilesBeforeOrOn.size());
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId3));
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
    // could see fileId3 because clustering is committed.
    baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
    assertTrue(baseFileOn.isPresent());
    assertEquals(baseFileOn.get().getFileId(), fileId3);
    latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFilesInRange.size());
    assertTrue(latestBaseFilesInRange.contains(fileId3));
    assertTrue(latestBaseFilesInRange.contains(fileId4));
    allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, allBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId3));
    assertTrue(allBaseFiles.contains(fileId4));
    // could see fileId3 because clustering is committed.
    latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId3));
    assertTrue(allBaseFiles.contains(fileId4));
    // could see fileId3 because clustering is committed.
    latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(2, latestBaseFiles.size());
    assertTrue(latestBaseFilesPerPartition.contains(fileId3));
    assertTrue(latestBaseFilesPerPartition.contains(fileId4));
    HoodieWrapperFileSystem fs = metaClient.getFs();
    fs.delete(new Path(basePath + "/.hoodie", "1.commit"), false);
    fs.delete(new Path(basePath + "/.hoodie", "1.inflight"), false);
    fs.delete(new Path(basePath + "/.hoodie", "1.commit.requested"), false);
    fs.delete(new Path(basePath + "/.hoodie", "2.replacecommit"), false);
    metaClient.reloadActiveTimeline();
    refreshFsView();
    // do check after delete some commit file
    latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFilesBeforeOrOn.size());
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId1));
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId2));
    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
    // couldn't see fileId3 because clustering is not committed.
    baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
    assertFalse(baseFileOn.isPresent());
    latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFilesInRange.size());
    assertTrue(latestBaseFilesInRange.contains(fileId1));
    assertTrue(latestBaseFilesInRange.contains(fileId2));
    assertTrue(latestBaseFilesInRange.contains(fileId4));
    allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, allBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId1));
    assertTrue(allBaseFiles.contains(fileId2));
    assertTrue(allBaseFiles.contains(fileId4));
    // couldn't see fileId3 because clustering is not committed.
    latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFiles.size());
    assertTrue(allBaseFiles.contains(fileId1));
    assertTrue(allBaseFiles.contains(fileId2));
    assertTrue(allBaseFiles.contains(fileId4));
    // couldn't see fileId3 because clustering is not committed.
    latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
    assertEquals(3, latestBaseFiles.size());
    assertTrue(latestBaseFilesPerPartition.contains(fileId1));
    assertTrue(latestBaseFilesPerPartition.contains(fileId2));
    assertTrue(latestBaseFilesPerPartition.contains(fileId4));
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HashMap(java.util.HashMap) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) ArrayList(java.util.ArrayList) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) ArrayList(java.util.ArrayList) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 20 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestIncrementalFSViewSync method addReplaceInstant.

private List<String> addReplaceInstant(HoodieTableMetaClient metaClient, String instant, List<Pair<String, HoodieWriteStat>> writeStats, Map<String, List<String>> partitionToReplaceFileIds) throws IOException {
    // created requested
    HoodieInstant newRequestedInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, instant);
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setOperationType(WriteOperationType.UNKNOWN.name()).build();
    metaClient.getActiveTimeline().saveToPendingReplaceCommit(newRequestedInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
    metaClient.reloadActiveTimeline();
    // transition to inflight
    HoodieInstant inflightInstant = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(newRequestedInstant, Option.empty());
    // transition to replacecommit
    HoodieReplaceCommitMetadata replaceCommitMetadata = new HoodieReplaceCommitMetadata();
    writeStats.forEach(e -> replaceCommitMetadata.addWriteStat(e.getKey(), e.getValue()));
    replaceCommitMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
    metaClient.getActiveTimeline().saveAsComplete(inflightInstant, Option.of(replaceCommitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    return writeStats.stream().map(e -> e.getValue().getPath()).collect(Collectors.toList());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Aggregations

HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)74 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)42 List (java.util.List)38 ArrayList (java.util.ArrayList)33 HashMap (java.util.HashMap)32 Map (java.util.Map)32 Path (org.apache.hadoop.fs.Path)28 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 IOException (java.io.IOException)22 Option (org.apache.hudi.common.util.Option)19 Collectors (java.util.stream.Collectors)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 WriteStatus (org.apache.hudi.client.WriteStatus)17 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)17 LogManager (org.apache.log4j.LogManager)16 Logger (org.apache.log4j.Logger)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)15 FileSlice (org.apache.hudi.common.model.FileSlice)14 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)14