use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestRepairUtils method testGetBaseAndLogFilePathsFromTimeline.
@Test
public void testGetBaseAndLogFilePathsFromTimeline() throws IOException {
setupTimelineInFS();
HoodieTimeline timeline = metaClient.getActiveTimeline();
HoodieInstant commitInstant = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001");
HoodieInstant inflightInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "005");
HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMPACTION_ACTION, "006");
Map<String, List<Pair<String, String>>> partitionToFileIdAndNameMap = instantInfoMap.get(commitInstant.getTimestamp());
Set<String> expectedPaths = partitionToFileIdAndNameMap.entrySet().stream().flatMap(entry -> entry.getValue().stream().map(fileInfo -> new Path(entry.getKey(), fileInfo.getValue()).toString()).collect(Collectors.toList()).stream()).collect(Collectors.toSet());
assertEquals(Option.of(expectedPaths), RepairUtils.getBaseAndLogFilePathsFromTimeline(timeline, commitInstant));
assertThrows(HoodieException.class, () -> RepairUtils.getBaseAndLogFilePathsFromTimeline(timeline, inflightInstant));
assertEquals(Option.empty(), RepairUtils.getBaseAndLogFilePathsFromTimeline(timeline, compactionInstant));
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestUpsertPartitioner method testAverageBytesPerRecordForEmptyCommitTimeLine.
@Test
public void testAverageBytesPerRecordForEmptyCommitTimeLine() throws Exception {
HoodieTimeline commitTimeLine = mock(HoodieTimeline.class);
HoodieWriteConfig config = makeHoodieClientConfigBuilder().build();
when(commitTimeLine.empty()).thenReturn(true);
long expectAvgSize = config.getCopyOnWriteRecordSizeEstimate();
long actualAvgSize = averageBytesPerRecord(commitTimeLine, config);
assertEquals(expectAvgSize, actualAvgSize);
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class CompactionTestBase method executeCompaction.
protected void executeCompaction(String compactionInstantTime, SparkRDDWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
client.compact(compactionInstantTime);
assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, compactionInstantTime).doesMarkerDirExist());
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)), "Verify all file-slices have base-instant same as compaction instant");
assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()), "Verify all file-slices have data-files");
if (hasDeltaCommitAfterPendingCompaction) {
assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0), "Verify all file-slices have atleast one log-file");
} else {
assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0), "Verify all file-slices have no log-files");
}
// verify that there is a commit
table = getHoodieTable(HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).setLoadActiveTimelineOnLoad(true).build(), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
assertEquals(latestCompactionCommitTime, compactionInstantTime, "Expect compaction instant time to be the latest commit time");
assertEquals(expectedNumRecs, HoodieClientTestUtils.countRecordsOptionallySince(jsc, basePath, sqlContext, timeline, Option.of("000")), "Must contain expected records");
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieSparkMergeOnReadTableClustering method doClusteringAndValidate.
private void doClusteringAndValidate(SparkRDDWriteClient client, String clusteringCommitTime, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, HoodieTestDataGenerator dataGen) {
client.cluster(clusteringCommitTime, true);
metaClient = HoodieTableMetaClient.reload(metaClient);
final HoodieTable clusteredTable = HoodieSparkTable.create(cfg, context(), metaClient);
clusteredTable.getHoodieView().sync();
Stream<HoodieBaseFile> dataFilesToRead = Arrays.stream(dataGen.getPartitionPaths()).flatMap(p -> clusteredTable.getBaseFileOnlyView().getLatestBaseFiles(p));
assertEquals(dataGen.getPartitionPaths().length, dataFilesToRead.count());
HoodieTimeline timeline = metaClient.getCommitTimeline().filterCompletedInstants();
assertEquals(1, timeline.findInstantsAfter("003", Integer.MAX_VALUE).countInstants(), "Expecting a single commit.");
assertEquals(clusteringCommitTime, timeline.lastInstant().get().getTimestamp());
assertEquals(HoodieTimeline.REPLACE_COMMIT_ACTION, timeline.lastInstant().get().getAction());
if (cfg.populateMetaFields()) {
assertEquals(400, HoodieClientTestUtils.countRecordsOptionallySince(jsc(), basePath(), sqlContext(), timeline, Option.of("000")), "Must contain 200 records");
} else {
assertEquals(400, HoodieClientTestUtils.countRecordsOptionallySince(jsc(), basePath(), sqlContext(), timeline, Option.empty()));
}
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class CompactionTestBase method executeCompactionWithReplacedFiles.
protected void executeCompactionWithReplacedFiles(String compactionInstantTime, SparkRDDWriteClient client, HoodieTable table, HoodieWriteConfig cfg, String[] partitions, Set<HoodieFileGroupId> replacedFileIds) throws IOException {
client.compact(compactionInstantTime);
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
assertFalse(fileSliceList.stream().anyMatch(fs -> replacedFileIds.contains(fs.getFileGroupId())), "Compacted files should not show up in latest slices");
// verify that there is a commit
table = getHoodieTable(HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).setLoadActiveTimelineOnLoad(true).build(), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
// verify compaction commit is visible in timeline
assertTrue(timeline.filterCompletedInstants().getInstants().filter(instant -> compactionInstantTime.equals(instant.getTimestamp())).findFirst().isPresent());
for (String partition : partitions) {
table.getSliceView().getLatestFileSlicesBeforeOrOn(partition, compactionInstantTime, true).forEach(fs -> {
// verify that all log files are merged
assertEquals(0, fs.getLogFiles().count());
assertTrue(fs.getBaseFile().isPresent());
});
}
}
Aggregations