use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class TestHoodieCompactionStrategy method testLogFileSizeCompactionSimple.
@Test
public void testLogFileSizeCompactionSimple() {
Map<Long, List<Long>> sizesMap = new HashMap<>();
sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
sizesMap.put(110 * MB, new ArrayList<>());
sizesMap.put(100 * MB, Collections.singletonList(MB));
sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
LogFileSizeBasedCompactionStrategy strategy = new LogFileSizeBasedCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetIOPerCompactionInMB(1205).withLogFileSizeThresholdBasedCompaction(100 * 1024 * 1024).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue(returned.size() < operations.size(), "LogFileSizeBasedCompactionStrategy should have resulted in fewer compactions");
assertEquals(2, returned.size(), "LogFileSizeBasedCompactionStrategy should have resulted in 2 compaction");
// Total size of all the log files
Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB)).map(Double::longValue).reduce(Long::sum).orElse(0L);
assertEquals(1594, (long) returnedSize, "Should chose the first 2 compactions which should result in a total IO of 1594 MB");
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class TestHoodieCompactionStrategy method testBoundedPartitionAwareCompactionSimple.
@Test
public void testBoundedPartitionAwareCompactionSimple() {
Map<Long, List<Long>> sizesMap = new HashMap<>();
sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
sizesMap.put(110 * MB, new ArrayList<>());
sizesMap.put(100 * MB, Collections.singletonList(MB));
sizesMap.put(70 * MB, Collections.singletonList(MB));
sizesMap.put(80 * MB, Collections.singletonList(MB));
sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd");
Date today = new Date();
String currentDay = format.format(today);
String currentDayMinus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-1));
String currentDayMinus2 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-2));
String currentDayMinus3 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-3));
String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {
{
put(120 * MB, currentDay);
put(110 * MB, currentDayMinus1);
put(100 * MB, currentDayMinus2);
put(80 * MB, currentDayMinus3);
put(90 * MB, currentDayPlus1);
put(70 * MB, currentDayPlus5);
}
});
BoundedPartitionAwareCompactionStrategy strategy = new BoundedPartitionAwareCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue(returned.size() < operations.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions");
assertEquals(5, returned.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions");
int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned.get(0).getPartitionPath());
// Either the partition paths are sorted in descending order or they are equal
assertTrue(comparison >= 0, "BoundedPartitionAwareCompactionStrategy should sort partitions in descending order");
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class TestHoodieCompactionStrategy method testDayBasedCompactionSimple.
@Test
public void testDayBasedCompactionSimple() {
Map<Long, List<Long>> sizesMap = new HashMap<>();
sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
sizesMap.put(110 * MB, new ArrayList<>());
sizesMap.put(100 * MB, Collections.singletonList(MB));
sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {
{
put(120 * MB, partitionPaths[2]);
put(110 * MB, partitionPaths[2]);
put(100 * MB, partitionPaths[1]);
put(90 * MB, partitionPaths[0]);
}
});
DayBasedCompactionStrategy strategy = new DayBasedCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(1).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue(returned.size() < operations.size(), "DayBasedCompactionStrategy should have resulted in fewer compactions");
assertEquals(2, returned.size(), "DayBasedCompactionStrategy should have resulted in fewer compactions");
int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned.get(0).getPartitionPath());
// Either the partition paths are sorted in descending order or they are equal
assertTrue(comparison >= 0, "DayBasedCompactionStrategy should sort partitions in descending order");
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class CompactionOperation method convertFromAvroRecordInstance.
/**
* Convert Avro generated Compaction operation to POJO for Spark RDD operation.
*
* @param operation Hoodie Compaction Operation
* @return
*/
public static CompactionOperation convertFromAvroRecordInstance(HoodieCompactionOperation operation) {
CompactionOperation op = new CompactionOperation();
op.baseInstantTime = operation.getBaseInstantTime();
op.dataFileName = Option.ofNullable(operation.getDataFilePath());
op.dataFileCommitTime = op.dataFileName.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
op.deltaFileNames = new ArrayList<>(operation.getDeltaFilePaths());
op.id = new HoodieFileGroupId(operation.getPartitionPath(), operation.getFileId());
op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
op.bootstrapFilePath = Option.ofNullable(operation.getBootstrapFilePath());
return op;
}
use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.
the class CompactionCommand method printCompaction.
protected String printCompaction(HoodieCompactionPlan compactionPlan, String sortByField, boolean descending, int limit, boolean headerOnly) {
List<Comparable[]> rows = new ArrayList<>();
if ((null != compactionPlan) && (null != compactionPlan.getOperations())) {
for (HoodieCompactionOperation op : compactionPlan.getOperations()) {
rows.add(new Comparable[] { op.getPartitionPath(), op.getFileId(), op.getBaseInstantTime(), op.getDataFilePath(), op.getDeltaFilePaths().size(), op.getMetrics() == null ? "" : op.getMetrics().toString() });
}
}
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_METRICS);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
Aggregations