Search in sources :

Example 16 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestCompactionUtils method testGetAllPendingCompactionOperationsWithDupFileId.

@Test
public void testGetAllPendingCompactionOperationsWithDupFileId() throws IOException {
    // Case where there is duplicate fileIds in compaction requests
    HoodieCompactionPlan plan1 = createCompactionPlan(metaClient, "000", "001", 10, true, true);
    HoodieCompactionPlan plan2 = createCompactionPlan(metaClient, "002", "003", 0, false, false);
    scheduleCompaction(metaClient, "001", plan1);
    scheduleCompaction(metaClient, "003", plan2);
    // schedule similar plan again so that there will be duplicates
    plan1.getOperations().get(0).setDataFilePath("bla");
    scheduleCompaction(metaClient, "005", plan1);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    assertThrows(IllegalStateException.class, () -> {
        CompactionUtils.getAllPendingCompactionOperations(metaClient);
    });
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 17 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestCompactionUtils method testCompactionTransformation.

@Test
public void testCompactionTransformation() {
    // check HoodieCompactionOperation <=> CompactionOperation transformation function
    Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
    HoodieCompactionPlan plan = inputAndPlan.getRight();
    List<HoodieCompactionOperation> originalOps = plan.getOperations();
    // Convert to CompactionOperation
    // Convert back to HoodieCompactionOperation and check for equality
    List<HoodieCompactionOperation> regeneratedOps = originalOps.stream().map(CompactionUtils::buildCompactionOperation).map(CompactionUtils::buildHoodieCompactionOperation).collect(Collectors.toList());
    assertTrue(originalOps.size() > 0, "Transformation did get tested");
    assertEquals(originalOps, regeneratedOps, "All fields set correctly in transformations");
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 18 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestCompactionUtils method buildCompactionPlan.

/**
 * Generate input for compaction plan tests.
 */
private Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> buildCompactionPlan() {
    String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
    Path fullPartitionPath = new Path(new Path(metaClient.getBasePath()), DEFAULT_PARTITION_PATHS[0]);
    FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
    FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
    fileSlice.setBaseFile(new DummyHoodieBaseFile(fullPartitionPath.toString() + "/data1_1_000" + extension));
    fileSlice.addLogFile(new HoodieLogFile(new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN)))));
    fileSlice.addLogFile(new HoodieLogFile(new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN)))));
    FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
    noLogFileSlice.setBaseFile(new DummyHoodieBaseFile(fullPartitionPath.toString() + "/noLog_1_000" + extension));
    FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
    noDataFileSlice.addLogFile(new HoodieLogFile(new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN)))));
    noDataFileSlice.addLogFile(new HoodieLogFile(new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN)))));
    List<FileSlice> fileSliceList = Arrays.asList(emptyFileSlice, noDataFileSlice, fileSlice, noLogFileSlice);
    List<Pair<String, FileSlice>> input = fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f)).collect(Collectors.toList());
    return Pair.of(input, CompactionUtils.buildFromFileSlices(input, Option.empty(), Option.of(metricsCaptureFn)));
}
Also used : Path(org.apache.hadoop.fs.Path) IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) CompactionPlanMigrator(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator) CompactionTestUtils.setupAndValidateCompactionOperations(org.apache.hudi.common.testutils.CompactionTestUtils.setupAndValidateCompactionOperations) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HashMap(java.util.HashMap) COMPACTION_METADATA_VERSION_1(org.apache.hudi.common.util.CompactionUtils.COMPACTION_METADATA_VERSION_1) LATEST_COMPACTION_METADATA_VERSION(org.apache.hudi.common.util.CompactionUtils.LATEST_COMPACTION_METADATA_VERSION) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionTestUtils.createCompactionPlan(org.apache.hudi.common.testutils.CompactionTestUtils.createCompactionPlan) ValueSource(org.junit.jupiter.params.provider.ValueSource) DummyHoodieBaseFile(org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile) IOException(java.io.IOException) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionTestUtils.scheduleCompaction(org.apache.hudi.common.testutils.CompactionTestUtils.scheduleCompaction) DEFAULT_PARTITION_PATHS(org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS) Comparator(java.util.Comparator) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) DummyHoodieBaseFile(org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile) Pair(org.apache.hudi.common.util.collection.Pair)

Example 19 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestCompactionUtils method testGetAllPendingCompactionOperationsWithFullDupFileId.

@Test
public void testGetAllPendingCompactionOperationsWithFullDupFileId() throws IOException {
    // Case where there is duplicate fileIds in compaction requests
    HoodieCompactionPlan plan1 = createCompactionPlan(metaClient, "000", "001", 10, true, true);
    HoodieCompactionPlan plan2 = createCompactionPlan(metaClient, "002", "003", 0, false, false);
    scheduleCompaction(metaClient, "001", plan1);
    scheduleCompaction(metaClient, "003", plan2);
    // schedule same plan again so that there will be duplicates. It should not fail as it is a full duplicate
    scheduleCompaction(metaClient, "005", plan1);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> res = CompactionUtils.getAllPendingCompactionOperations(metaClient);
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Pair(org.apache.hudi.common.util.collection.Pair) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 20 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestCompactionUtils method testUpgradeDowngrade.

@Test
public void testUpgradeDowngrade() {
    Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), inputAndPlan.getValue());
    CompactionPlanMigrator migrator = new CompactionPlanMigrator(metaClient);
    HoodieCompactionPlan plan = inputAndPlan.getRight();
    System.out.println("Plan=" + plan.getOperations());
    assertEquals(LATEST_COMPACTION_METADATA_VERSION, plan.getVersion());
    HoodieCompactionPlan oldPlan = migrator.migrateToVersion(plan, plan.getVersion(), COMPACTION_METADATA_VERSION_1);
    // Check with older version of compaction plan
    assertEquals(COMPACTION_METADATA_VERSION_1, oldPlan.getVersion());
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), oldPlan);
    HoodieCompactionPlan newPlan = migrator.upgradeToLatest(plan, plan.getVersion());
    assertEquals(LATEST_COMPACTION_METADATA_VERSION, newPlan.getVersion());
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), newPlan);
    HoodieCompactionPlan latestPlan = migrator.migrateToVersion(oldPlan, oldPlan.getVersion(), newPlan.getVersion());
    testFileSlicesCompactionPlanEquality(inputAndPlan.getKey(), latestPlan);
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) List(java.util.List) CompactionPlanMigrator(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)41 IOException (java.io.IOException)20 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)18 List (java.util.List)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)16 Pair (org.apache.hudi.common.util.collection.Pair)16 ArrayList (java.util.ArrayList)15 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)15 Path (org.apache.hadoop.fs.Path)14 HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)13 Option (org.apache.hudi.common.util.Option)13 LogManager (org.apache.log4j.LogManager)13 Logger (org.apache.log4j.Logger)13 Map (java.util.Map)12 Set (java.util.Set)12 Collectors (java.util.stream.Collectors)12 CompactionOperation (org.apache.hudi.common.model.CompactionOperation)12 CompactionUtils (org.apache.hudi.common.util.CompactionUtils)12 FileSlice (org.apache.hudi.common.model.FileSlice)11 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)11