Search in sources :

Example 6 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class HoodieClientTestHarness method validateMetadata.

/**
 * Validate the metadata tables contents to ensure it matches what is on the file system.
 */
public void validateMetadata(HoodieTestTable testTable, List<String> inflightCommits, HoodieWriteConfig writeConfig, String metadataTableBasePath, boolean doFullValidation) throws IOException {
    HoodieTableMetadata tableMetadata = metadata(writeConfig, context);
    assertNotNull(tableMetadata, "MetadataReader should have been initialized");
    if (!writeConfig.isMetadataTableEnabled()) {
        return;
    }
    if (!tableMetadata.getSyncedInstantTime().isPresent() || tableMetadata instanceof FileSystemBackedTableMetadata) {
        throw new IllegalStateException("Metadata should have synced some commits or tableMetadata should not be an instance " + "of FileSystemBackedTableMetadata");
    }
    assertEquals(inflightCommits, testTable.inflightCommits());
    HoodieTimer timer = new HoodieTimer().startTimer();
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    // Partitions should match
    List<java.nio.file.Path> fsPartitionPaths = testTable.getAllPartitionPaths();
    List<String> fsPartitions = new ArrayList<>();
    fsPartitionPaths.forEach(entry -> fsPartitions.add(entry.getFileName().toString()));
    if (fsPartitions.isEmpty()) {
        fsPartitions.add("");
    }
    List<String> metadataPartitions = tableMetadata.getAllPartitionPaths();
    Collections.sort(fsPartitions);
    Collections.sort(metadataPartitions);
    assertEquals(fsPartitions.size(), metadataPartitions.size(), "Partitions should match");
    assertEquals(fsPartitions, metadataPartitions, "Partitions should match");
    // Files within each partition should match
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieSparkTable.create(writeConfig, engineContext, true);
    TableFileSystemView tableView = table.getHoodieView();
    List<String> fullPartitionPaths = fsPartitions.stream().map(partition -> basePath + "/" + partition).collect(Collectors.toList());
    Map<String, FileStatus[]> partitionToFilesMap = tableMetadata.getAllFilesInPartitions(fullPartitionPaths);
    assertEquals(fsPartitions.size(), partitionToFilesMap.size());
    fsPartitions.forEach(partition -> {
        try {
            validateFilesPerPartition(testTable, tableMetadata, tableView, partitionToFilesMap, partition);
        } catch (IOException e) {
            fail("Exception should not be raised: " + e);
        }
    });
    if (doFullValidation) {
        runFullValidation(writeConfig, metadataTableBasePath, engineContext);
    }
    LOG.info("Validation time=" + timer.endTimer());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Random(java.util.Random) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) AfterAll(org.junit.jupiter.api.AfterAll) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Assertions.assertLinesMatch(org.junit.jupiter.api.Assertions.assertLinesMatch) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) CleanPlanV2MigrationHandler(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV2MigrationHandler) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) WorkloadStat(org.apache.hudi.table.WorkloadStat) CleanerUtils.convertCleanMetadata(org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) Tuple2(scala.Tuple2) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) TestInfo(org.junit.jupiter.api.TestInfo) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) FileSystemBackedTableMetadata(org.apache.hudi.metadata.FileSystemBackedTableMetadata) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HoodieBackedTableMetadataWriter(org.apache.hudi.metadata.HoodieBackedTableMetadataWriter) FileSlice(org.apache.hudi.common.model.FileSlice) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) TimelineService(org.apache.hudi.timeline.service.TimelineService) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) ExecutorService(java.util.concurrent.ExecutorService) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) SparkSession(org.apache.spark.sql.SparkSession) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HdfsTestService(org.apache.hudi.common.testutils.minicluster.HdfsTestService) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) SQLContext(org.apache.spark.sql.SQLContext) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) Assertions(org.junit.jupiter.api.Assertions) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) ArrayList(java.util.ArrayList) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) IOException(java.io.IOException) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) FileSystemBackedTableMetadata(org.apache.hudi.metadata.FileSystemBackedTableMetadata) HoodieTable(org.apache.hudi.table.HoodieTable) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 7 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class HoodieClientTestHarness method validateFilesPerPartition.

protected void validateFilesPerPartition(HoodieTestTable testTable, HoodieTableMetadata tableMetadata, TableFileSystemView tableView, Map<String, FileStatus[]> partitionToFilesMap, String partition) throws IOException {
    Path partitionPath;
    if (partition.equals("")) {
        // Should be the non-partitioned case
        partitionPath = new Path(basePath);
    } else {
        partitionPath = new Path(basePath, partition);
    }
    FileStatus[] fsStatuses = testTable.listAllFilesInPartition(partition);
    FileStatus[] metaStatuses = tableMetadata.getAllFilesInPartition(partitionPath);
    List<String> fsFileNames = Arrays.stream(fsStatuses).map(s -> s.getPath().getName()).collect(Collectors.toList());
    List<String> metadataFilenames = Arrays.stream(metaStatuses).map(s -> s.getPath().getName()).collect(Collectors.toList());
    Collections.sort(fsFileNames);
    Collections.sort(metadataFilenames);
    assertLinesMatch(fsFileNames, metadataFilenames);
    assertEquals(fsStatuses.length, partitionToFilesMap.get(partitionPath.toString()).length);
    // Block sizes should be valid
    Arrays.stream(metaStatuses).forEach(s -> assertTrue(s.getBlockSize() > 0));
    List<Long> fsBlockSizes = Arrays.stream(fsStatuses).map(FileStatus::getBlockSize).sorted().collect(Collectors.toList());
    List<Long> metadataBlockSizes = Arrays.stream(metaStatuses).map(FileStatus::getBlockSize).sorted().collect(Collectors.toList());
    assertEquals(fsBlockSizes, metadataBlockSizes);
    assertEquals(fsFileNames.size(), metadataFilenames.size(), "Files within partition " + partition + " should match");
    assertEquals(fsFileNames, metadataFilenames, "Files within partition " + partition + " should match");
    // FileSystemView should expose the same data
    List<HoodieFileGroup> fileGroups = tableView.getAllFileGroups(partition).collect(Collectors.toList());
    fileGroups.addAll(tableView.getAllReplacedFileGroups(partition).collect(Collectors.toList()));
    fileGroups.forEach(g -> LogManager.getLogger(getClass()).info(g));
    fileGroups.forEach(g -> g.getAllBaseFiles().forEach(b -> LogManager.getLogger(getClass()).info(b)));
    fileGroups.forEach(g -> g.getAllFileSlices().forEach(s -> LogManager.getLogger(getClass()).info(s)));
    long numFiles = fileGroups.stream().mapToLong(g -> g.getAllBaseFiles().count() + g.getAllFileSlices().mapToLong(s -> s.getLogFiles().count()).sum()).sum();
    assertEquals(metadataFilenames.size(), numFiles);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Random(java.util.Random) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) AfterAll(org.junit.jupiter.api.AfterAll) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Assertions.assertLinesMatch(org.junit.jupiter.api.Assertions.assertLinesMatch) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) CleanPlanV2MigrationHandler(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV2MigrationHandler) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) WorkloadStat(org.apache.hudi.table.WorkloadStat) CleanerUtils.convertCleanMetadata(org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) SimpleKeyGenerator(org.apache.hudi.keygen.SimpleKeyGenerator) Tuple2(scala.Tuple2) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) TestInfo(org.junit.jupiter.api.TestInfo) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) FileSystemBackedTableMetadata(org.apache.hudi.metadata.FileSystemBackedTableMetadata) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HoodieBackedTableMetadataWriter(org.apache.hudi.metadata.HoodieBackedTableMetadataWriter) FileSlice(org.apache.hudi.common.model.FileSlice) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) TimelineService(org.apache.hudi.timeline.service.TimelineService) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) ExecutorService(java.util.concurrent.ExecutorService) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) SparkSession(org.apache.spark.sql.SparkSession) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HdfsTestService(org.apache.hudi.common.testutils.minicluster.HdfsTestService) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) SQLContext(org.apache.spark.sql.SQLContext) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) IOException(java.io.IOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) Assertions(org.junit.jupiter.api.Assertions) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup)

Example 8 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestMarkerBasedRollbackStrategy method testCopyOnWriteRollbackWithTestTable.

@Test
public void testCopyOnWriteRollbackWithTestTable() throws Exception {
    // given: wrote some base files and corresponding markers
    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
    String f0 = testTable.addRequestedCommit("000").getFileIdsWithBaseFilesInPartitions("partA").get("partA");
    String f1 = testTable.addCommit("001").withBaseFilesInPartition("partA", f0).getFileIdsWithBaseFilesInPartitions("partB").get("partB");
    String f2 = "f2";
    testTable.forCommit("001").withMarkerFile("partA", f0, IOType.MERGE).withMarkerFile("partB", f1, IOType.CREATE).withMarkerFile("partA", f2, IOType.CREATE);
    // when
    HoodieTable hoodieTable = HoodieSparkTable.create(getConfig(), context, metaClient);
    List<HoodieRollbackRequest> rollbackRequests = new MarkerBasedRollbackStrategy(hoodieTable, context, getConfig(), "002").getRollbackRequests(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"));
    List<HoodieRollbackStat> stats = new BaseRollbackHelper(hoodieTable.getMetaClient(), getConfig()).performRollback(context, new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"), rollbackRequests);
    // then: ensure files are deleted correctly, non-existent files reported as failed deletes
    assertEquals(2, stats.size());
    FileStatus[] partAFiles = testTable.listAllFilesInPartition("partA");
    FileStatus[] partBFiles = testTable.listAllFilesInPartition("partB");
    assertEquals(0, partBFiles.length);
    assertEquals(1, partAFiles.length);
    assertEquals(2, stats.stream().mapToInt(r -> r.getSuccessDeleteFiles().size()).sum());
    assertEquals(1, stats.stream().mapToInt(r -> r.getFailedDeleteFiles().size()).sum());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BaseRollbackHelper(org.apache.hudi.table.action.rollback.BaseRollbackHelper) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRollbackRequest(org.apache.hudi.avro.model.HoodieRollbackRequest) MarkerBasedRollbackStrategy(org.apache.hudi.table.action.rollback.MarkerBasedRollbackStrategy) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestCopyOnWriteRollbackActionExecutor method testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile.

@Test
public void testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile() throws Exception {
    final String p1 = "2015/03/16";
    final String p2 = "2015/03/17";
    final String p3 = "2016/03/15";
    // Let's create some commit files and base files
    HoodieTestTable testTable = HoodieTestTable.of(metaClient).withPartitionMetaFiles(p1, p2, p3).addCommit("001").withBaseFilesInPartition(p1, "id11").withBaseFilesInPartition(p2, "id12").withLogFile(p1, "id11", 3).addCommit("002").withBaseFilesInPartition(p1, "id21").withBaseFilesInPartition(p2, "id22");
    HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(false).build();
    HoodieTable table = this.getHoodieTable(metaClient, writeConfig);
    HoodieInstant needRollBackInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "002");
    // execute CopyOnWriteRollbackActionExecutor with filelisting mode
    BaseRollbackPlanActionExecutor copyOnWriteRollbackPlanActionExecutor = new BaseRollbackPlanActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, false, table.getConfig().shouldRollbackUsingMarkers());
    HoodieRollbackPlan rollbackPlan = (HoodieRollbackPlan) copyOnWriteRollbackPlanActionExecutor.execute().get();
    CopyOnWriteRollbackActionExecutor copyOnWriteRollbackActionExecutor = new CopyOnWriteRollbackActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, true, false);
    List<HoodieRollbackStat> hoodieRollbackStats = copyOnWriteRollbackActionExecutor.executeRollback(rollbackPlan);
    // assert hoodieRollbackStats
    assertEquals(hoodieRollbackStats.size(), 3);
    for (HoodieRollbackStat stat : hoodieRollbackStats) {
        switch(stat.getPartitionPath()) {
            case p1:
                assertEquals(1, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                assertEquals(testTable.forCommit("002").getBaseFilePath(p1, "id21").toString(), this.fs.getScheme() + ":" + stat.getSuccessDeleteFiles().get(0));
                break;
            case p2:
                assertEquals(1, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                assertEquals(testTable.forCommit("002").getBaseFilePath(p2, "id22").toString(), this.fs.getScheme() + ":" + stat.getSuccessDeleteFiles().get(0));
                break;
            case p3:
                assertEquals(0, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                break;
            default:
                fail("Unexpected partition: " + stat.getPartitionPath());
        }
    }
    assertTrue(testTable.inflightCommitExists("001"));
    assertTrue(testTable.commitExists("001"));
    assertTrue(testTable.baseFileExists(p1, "001", "id11"));
    assertTrue(testTable.baseFileExists(p2, "001", "id12"));
    assertFalse(testTable.inflightCommitExists("002"));
    assertFalse(testTable.commitExists("002"));
    assertFalse(testTable.baseFileExists(p1, "002", "id21"));
    assertFalse(testTable.baseFileExists(p2, "002", "id22"));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 10 with HoodieTestTable

use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.

the class TestCleanPlanExecutor method testKeepXHoursWithCleaning.

/**
 * Tests cleaning service based on number of hours retained.
 */
@ParameterizedTest
@MethodSource("argumentsForTestKeepLatestCommits")
public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withIncrementalCleaningMode(enableIncrementalClean).withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER).withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(2).build()).build();
    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
    String p0 = "2020/01/01";
    String p1 = "2020/01/02";
    Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
    String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId() : UUID.randomUUID().toString();
    String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId() : UUID.randomUUID().toString();
    Instant instant = Instant.now();
    ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
    int minutesForFirstCommit = 150;
    String firstCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
    testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
    HoodieCommitMetadata commitMetadata = generateCommitMetadata(firstCommitTs, Collections.unmodifiableMap(new HashMap<String, List<String>>() {

        {
            put(p0, CollectionUtils.createImmutableList(file1P0C0));
            put(p1, CollectionUtils.createImmutableList(file1P1C0));
        }
    }));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    metaClient = HoodieTableMetaClient.reload(metaClient);
    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
    assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
    assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
    assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
    // make next commit, with 1 insert & 1 update per partition
    int minutesForSecondCommit = 90;
    String secondCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
    Map<String, String> partitionAndFileId002 = testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0, p1);
    String file2P0C1 = partitionAndFileId002.get(p0);
    String file2P1C1 = partitionAndFileId002.get(p1);
    testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
    commitMetadata = generateCommitMetadata(secondCommitTs, new HashMap<String, List<String>>() {

        {
            put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
            put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
        }
    });
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
    assertEquals(2, hoodieCleanStatsTwo.size(), "Should clean one file each from both the partitions");
    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
    assertFalse(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
    assertFalse(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HashMap(java.util.HashMap) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Instant(java.time.Instant) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) ZonedDateTime(java.time.ZonedDateTime) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) List(java.util.List) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

HoodieTestTable (org.apache.hudi.common.testutils.HoodieTestTable)22 Test (org.junit.jupiter.api.Test)17 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)14 List (java.util.List)12 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)11 HashMap (java.util.HashMap)10 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)10 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)9 Map (java.util.Map)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 IOException (java.io.IOException)7 Arrays (java.util.Arrays)7 Collections (java.util.Collections)7 Collectors (java.util.stream.Collectors)7 Path (org.apache.hadoop.fs.Path)7 HoodieCommonTestHarness (org.apache.hudi.common.testutils.HoodieCommonTestHarness)7 HoodieTableMetadataWriter (org.apache.hudi.metadata.HoodieTableMetadataWriter)7 Assertions (org.junit.jupiter.api.Assertions)7 BeforeEach (org.junit.jupiter.api.BeforeEach)7 ArrayList (java.util.ArrayList)5