Search in sources :

Example 16 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestHoodieCompactionStrategy method testLogFileSizeCompactionSimple.

@Test
public void testLogFileSizeCompactionSimple() {
    Map<Long, List<Long>> sizesMap = new HashMap<>();
    sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
    sizesMap.put(110 * MB, new ArrayList<>());
    sizesMap.put(100 * MB, Collections.singletonList(MB));
    sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
    LogFileSizeBasedCompactionStrategy strategy = new LogFileSizeBasedCompactionStrategy();
    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetIOPerCompactionInMB(1205).withLogFileSizeThresholdBasedCompaction(100 * 1024 * 1024).build()).build();
    List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
    List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
    assertTrue(returned.size() < operations.size(), "LogFileSizeBasedCompactionStrategy should have resulted in fewer compactions");
    assertEquals(2, returned.size(), "LogFileSizeBasedCompactionStrategy should have resulted in 2 compaction");
    // Total size of all the log files
    Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB)).map(Double::longValue).reduce(Long::sum).orElse(0L);
    assertEquals(1594, (long) returnedSize, "Should chose the first 2 compactions which should result in a total IO of 1594 MB");
}
Also used : Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Date(java.util.Date) FileSlice(org.apache.hudi.common.model.FileSlice) SimpleDateFormat(java.text.SimpleDateFormat) HashMap(java.util.HashMap) Random(java.util.Random) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Collections(java.util.Collections) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.jupiter.api.Test)

Example 17 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestHoodieCompactionStrategy method testBoundedPartitionAwareCompactionSimple.

@Test
public void testBoundedPartitionAwareCompactionSimple() {
    Map<Long, List<Long>> sizesMap = new HashMap<>();
    sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
    sizesMap.put(110 * MB, new ArrayList<>());
    sizesMap.put(100 * MB, Collections.singletonList(MB));
    sizesMap.put(70 * MB, Collections.singletonList(MB));
    sizesMap.put(80 * MB, Collections.singletonList(MB));
    sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
    SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd");
    Date today = new Date();
    String currentDay = format.format(today);
    String currentDayMinus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-1));
    String currentDayMinus2 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-2));
    String currentDayMinus3 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-3));
    String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
    String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
    Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {

        {
            put(120 * MB, currentDay);
            put(110 * MB, currentDayMinus1);
            put(100 * MB, currentDayMinus2);
            put(80 * MB, currentDayMinus3);
            put(90 * MB, currentDayPlus1);
            put(70 * MB, currentDayPlus5);
        }
    });
    BoundedPartitionAwareCompactionStrategy strategy = new BoundedPartitionAwareCompactionStrategy();
    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
    List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
    List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
    assertTrue(returned.size() < operations.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions");
    assertEquals(5, returned.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions");
    int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned.get(0).getPartitionPath());
    // Either the partition paths are sorted in descending order or they are equal
    assertTrue(comparison >= 0, "BoundedPartitionAwareCompactionStrategy should sort partitions in descending order");
}
Also used : HashMap(java.util.HashMap) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Date(java.util.Date) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) List(java.util.List) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.jupiter.api.Test)

Example 18 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestHoodieCompactionStrategy method testDayBasedCompactionSimple.

@Test
public void testDayBasedCompactionSimple() {
    Map<Long, List<Long>> sizesMap = new HashMap<>();
    sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
    sizesMap.put(110 * MB, new ArrayList<>());
    sizesMap.put(100 * MB, Collections.singletonList(MB));
    sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
    Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {

        {
            put(120 * MB, partitionPaths[2]);
            put(110 * MB, partitionPaths[2]);
            put(100 * MB, partitionPaths[1]);
            put(90 * MB, partitionPaths[0]);
        }
    });
    DayBasedCompactionStrategy strategy = new DayBasedCompactionStrategy();
    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(1).build()).build();
    List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
    List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
    assertTrue(returned.size() < operations.size(), "DayBasedCompactionStrategy should have resulted in fewer compactions");
    assertEquals(2, returned.size(), "DayBasedCompactionStrategy should have resulted in fewer compactions");
    int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned.get(0).getPartitionPath());
    // Either the partition paths are sorted in descending order or they are equal
    assertTrue(comparison >= 0, "DayBasedCompactionStrategy should sort partitions in descending order");
}
Also used : HashMap(java.util.HashMap) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.jupiter.api.Test)

Example 19 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class CompactionOperation method convertFromAvroRecordInstance.

/**
 * Convert Avro generated Compaction operation to POJO for Spark RDD operation.
 *
 * @param operation Hoodie Compaction Operation
 * @return
 */
public static CompactionOperation convertFromAvroRecordInstance(HoodieCompactionOperation operation) {
    CompactionOperation op = new CompactionOperation();
    op.baseInstantTime = operation.getBaseInstantTime();
    op.dataFileName = Option.ofNullable(operation.getDataFilePath());
    op.dataFileCommitTime = op.dataFileName.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
    op.deltaFileNames = new ArrayList<>(operation.getDeltaFilePaths());
    op.id = new HoodieFileGroupId(operation.getPartitionPath(), operation.getFileId());
    op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
    op.bootstrapFilePath = Option.ofNullable(operation.getBootstrapFilePath());
    return op;
}
Also used : Objects(java.util.Objects) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) List(java.util.List) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) FSUtils(org.apache.hudi.common.fs.FSUtils) Serializable(java.io.Serializable) ArrayList(java.util.ArrayList) Path(org.apache.hadoop.fs.Path) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HashMap(java.util.HashMap)

Example 20 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class CompactionCommand method printCompaction.

protected String printCompaction(HoodieCompactionPlan compactionPlan, String sortByField, boolean descending, int limit, boolean headerOnly) {
    List<Comparable[]> rows = new ArrayList<>();
    if ((null != compactionPlan) && (null != compactionPlan.getOperations())) {
        for (HoodieCompactionOperation op : compactionPlan.getOperations()) {
            rows.add(new Comparable[] { op.getPartitionPath(), op.getFileId(), op.getBaseInstantTime(), op.getDataFilePath(), op.getDeltaFilePaths().size(), op.getMetrics() == null ? "" : op.getMetrics().toString() });
        }
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_METRICS);
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
Also used : BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList)

Aggregations

HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)21 ArrayList (java.util.ArrayList)16 List (java.util.List)13 HashMap (java.util.HashMap)11 Test (org.junit.jupiter.api.Test)10 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)9 FileSlice (org.apache.hudi.common.model.FileSlice)9 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)9 Map (java.util.Map)7 Collectors (java.util.stream.Collectors)7 Path (org.apache.hadoop.fs.Path)7 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)7 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)7 Pair (org.apache.hudi.common.util.collection.Pair)7 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)6 IOException (java.io.IOException)5 SimpleDateFormat (java.text.SimpleDateFormat)5 Date (java.util.Date)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)5