Search in sources :

Example 1 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method createCompactionRequested.

private void createCompactionRequested(String instantTime) throws Exception {
    String fileId1 = "file-1";
    HoodieCompactionPlan compactionPlan = new HoodieCompactionPlan();
    compactionPlan.setVersion(TimelineLayoutVersion.CURR_VERSION);
    HoodieCompactionOperation operation = new HoodieCompactionOperation();
    operation.setFileId(fileId1);
    operation.setPartitionPath(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    operation.setDataFilePath("/file-1");
    operation.setDeltaFilePaths(Arrays.asList("/file-1"));
    compactionPlan.setOperations(Arrays.asList(operation));
    HoodieTestTable.of(metaClient).addRequestedCompaction(instantTime, compactionPlan);
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation)

Example 2 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class CompactionTestBase method validateDeltaCommit.

/**
 * HELPER METHODS FOR TESTING.
 */
protected void validateDeltaCommit(String latestDeltaCommit, final Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> fgIdToCompactionOperation, HoodieWriteConfig cfg) {
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    HoodieTable table = getHoodieTable(metaClient, cfg);
    List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
    fileSliceList.forEach(fileSlice -> {
        Pair<String, HoodieCompactionOperation> opPair = fgIdToCompactionOperation.get(fileSlice.getFileGroupId());
        if (opPair != null) {
            assertEquals(fileSlice.getBaseInstantTime(), opPair.getKey(), "Expect baseInstant to match compaction Instant");
            assertTrue(fileSlice.getLogFiles().count() > 0, "Expect atleast one log file to be present where the latest delta commit was written");
            assertFalse(fileSlice.getBaseFile().isPresent(), "Expect no data-file to be present");
        } else {
            assertTrue(fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0, "Expect baseInstant to be less than or equal to latestDeltaCommit");
        }
    });
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation)

Example 3 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestHoodieCompactionStrategy method testUnboundedPartitionAwareCompactionSimple.

@Test
public void testUnboundedPartitionAwareCompactionSimple() {
    Map<Long, List<Long>> sizesMap = new HashMap<>();
    sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
    sizesMap.put(110 * MB, new ArrayList<>());
    sizesMap.put(100 * MB, Collections.singletonList(MB));
    sizesMap.put(80 * MB, Collections.singletonList(MB));
    sizesMap.put(70 * MB, Collections.singletonList(MB));
    sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
    SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd");
    Date today = new Date();
    String currentDay = format.format(today);
    String currentDayMinus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-1));
    String currentDayMinus2 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-2));
    String currentDayMinus3 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-3));
    String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
    String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
    Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {

        {
            put(120 * MB, currentDay);
            put(110 * MB, currentDayMinus1);
            put(100 * MB, currentDayMinus2);
            put(80 * MB, currentDayMinus3);
            put(90 * MB, currentDayPlus1);
            put(70 * MB, currentDayPlus5);
        }
    });
    UnBoundedPartitionAwareCompactionStrategy strategy = new UnBoundedPartitionAwareCompactionStrategy();
    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
    List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
    List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
    assertTrue(returned.size() < operations.size(), "UnBoundedPartitionAwareCompactionStrategy should not include last " + writeConfig.getTargetPartitionsPerDayBasedCompaction() + " partitions or later partitions from today");
    assertEquals(1, returned.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in 1 compaction");
}
Also used : HashMap(java.util.HashMap) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Date(java.util.Date) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) List(java.util.List) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.jupiter.api.Test)

Example 4 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestHoodieCompactionStrategy method createCompactionOperations.

private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config, Map<Long, List<Long>> sizesMap, Map<Long, String> keyToPartitionMap) {
    List<HoodieCompactionOperation> operations = new ArrayList<>(sizesMap.size());
    sizesMap.forEach((k, v) -> {
        HoodieBaseFile df = TestHoodieBaseFile.newDataFile(k);
        String partitionPath = keyToPartitionMap.get(k);
        List<HoodieLogFile> logFiles = v.stream().map(TestHoodieLogFile::newLogFile).collect(Collectors.toList());
        FileSlice slice = new FileSlice(new HoodieFileGroupId(partitionPath, df.getFileId()), df.getCommitTime());
        slice.setBaseFile(df);
        logFiles.stream().forEach(f -> slice.addLogFile(f));
        operations.add(new HoodieCompactionOperation(df.getCommitTime(), logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()), df.getPath(), df.getFileId(), partitionPath, config.getCompactionStrategy().captureMetrics(config, slice), df.getBootstrapBaseFile().map(BaseFile::getPath).orElse(null)));
    });
    return operations;
}
Also used : Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Date(java.util.Date) FileSlice(org.apache.hudi.common.model.FileSlice) SimpleDateFormat(java.text.SimpleDateFormat) HashMap(java.util.HashMap) Random(java.util.Random) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Collections(java.util.Collections) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Pair(org.apache.hudi.common.util.collection.Pair) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 5 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestHoodieCompactionStrategy method testUnBounded.

@Test
public void testUnBounded() {
    Map<Long, List<Long>> sizesMap = new HashMap<>();
    sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
    sizesMap.put(110 * MB, new ArrayList<>());
    sizesMap.put(100 * MB, Collections.singletonList(MB));
    sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
    UnBoundedCompactionStrategy strategy = new UnBoundedCompactionStrategy();
    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).build()).build();
    List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
    List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
    assertEquals(operations, returned, "UnBounded should not re-order or filter");
}
Also used : HashMap(java.util.HashMap) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)21 ArrayList (java.util.ArrayList)16 List (java.util.List)13 HashMap (java.util.HashMap)11 Test (org.junit.jupiter.api.Test)10 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)9 FileSlice (org.apache.hudi.common.model.FileSlice)9 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)9 Map (java.util.Map)7 Collectors (java.util.stream.Collectors)7 Path (org.apache.hadoop.fs.Path)7 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)7 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)7 Pair (org.apache.hudi.common.util.collection.Pair)7 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)6 IOException (java.io.IOException)5 SimpleDateFormat (java.text.SimpleDateFormat)5 Date (java.util.Date)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)5