use of org.apache.hudi.common.table.view.SyncableFileSystemView in project hudi by apache.
the class ClusteringPlanStrategy method getFileSlicesEligibleForClustering.
/**
* Return file slices eligible for clustering. FileIds in pending clustering/compaction are not eligible for clustering.
*/
protected Stream<FileSlice> getFileSlicesEligibleForClustering(String partition) {
SyncableFileSystemView fileSystemView = (SyncableFileSystemView) getHoodieTable().getSliceView();
Set<HoodieFileGroupId> fgIdsInPendingCompactionAndClustering = fileSystemView.getPendingCompactionOperations().map(instantTimeOpPair -> instantTimeOpPair.getValue().getFileGroupId()).collect(Collectors.toSet());
fgIdsInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getKey).collect(Collectors.toSet()));
return hoodieTable.getSliceView().getLatestFileSlices(partition).filter(slice -> !fgIdsInPendingCompactionAndClustering.contains(slice.getFileGroupId()));
}
use of org.apache.hudi.common.table.view.SyncableFileSystemView in project hudi by apache.
the class ScheduleCompactionActionExecutor method scheduleCompaction.
private HoodieCompactionPlan scheduleCompaction() {
LOG.info("Checking if compaction needs to be run on " + config.getBasePath());
// judge if we need to compact according to num delta commits and time elapsed
boolean compactable = needCompact(config.getInlineCompactTriggerStrategy());
if (compactable) {
LOG.info("Generating compaction plan for merge on read table " + config.getBasePath());
try {
SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView();
Set<HoodieFileGroupId> fgInPendingCompactionAndClustering = fileSystemView.getPendingCompactionOperations().map(instantTimeOpPair -> instantTimeOpPair.getValue().getFileGroupId()).collect(Collectors.toSet());
// exclude files in pending clustering from compaction.
fgInPendingCompactionAndClustering.addAll(fileSystemView.getFileGroupsInPendingClustering().map(Pair::getLeft).collect(Collectors.toSet()));
context.setJobStatus(this.getClass().getSimpleName(), "Compaction: generating compaction plan");
return compactor.generateCompactionPlan(context, table, config, instantTime, fgInPendingCompactionAndClustering);
} catch (IOException e) {
throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e);
}
}
return new HoodieCompactionPlan();
}
use of org.apache.hudi.common.table.view.SyncableFileSystemView in project hudi by apache.
the class TimelineServerPerf method run.
public void run() throws IOException {
JavaSparkContext jsc = UtilHelpers.buildSparkContext("hudi-view-perf-" + cfg.basePath, cfg.sparkMaster);
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
List<String> allPartitionPaths = FSUtils.getAllPartitionPaths(engineContext, cfg.basePath, cfg.useFileListingFromMetadata, true);
Collections.shuffle(allPartitionPaths);
List<String> selected = allPartitionPaths.stream().filter(p -> !p.contains("error")).limit(cfg.maxPartitions).collect(Collectors.toList());
if (!useExternalTimelineServer) {
this.timelineServer.startService();
setHostAddrFromSparkConf(jsc.getConf());
} else {
this.hostAddr = cfg.serverHost;
}
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(timelineServer.getConf()).setBasePath(cfg.basePath).setLoadActiveTimelineOnLoad(true).build();
SyncableFileSystemView fsView = new RemoteHoodieTableFileSystemView(this.hostAddr, cfg.serverPort, metaClient);
String reportDir = cfg.reportDir;
metaClient.getFs().mkdirs(new Path(reportDir));
String dumpPrefix = UUID.randomUUID().toString();
System.out.println("First Iteration to load all partitions");
Dumper d = new Dumper(metaClient.getFs(), new Path(reportDir, String.format("1_%s.csv", dumpPrefix)));
d.init();
d.dump(runLookups(jsc, selected, fsView, 1, 0));
d.close();
System.out.println("\n\n\n First Iteration is done");
Dumper d2 = new Dumper(metaClient.getFs(), new Path(reportDir, String.format("2_%s.csv", dumpPrefix)));
d2.init();
d2.dump(runLookups(jsc, selected, fsView, cfg.numIterations, cfg.numCoresPerExecutor));
d2.close();
System.out.println("\n\n\nDumping all File Slices");
selected.forEach(p -> fsView.getAllFileSlices(p).forEach(s -> System.out.println("\tMyFileSlice=" + s)));
// Waiting for curl queries
if (!useExternalTimelineServer && cfg.waitForManualQueries) {
System.out.println("Timeline Server Host Address=" + hostAddr + ", port=" + timelineServer.getServerPort());
while (true) {
try {
Thread.sleep(60000);
} catch (InterruptedException e) {
// skip it
}
}
}
}
use of org.apache.hudi.common.table.view.SyncableFileSystemView in project hudi by apache.
the class RequestHandler method syncIfLocalViewBehind.
/**
* Syncs data-set view if local view is behind.
*/
private boolean syncIfLocalViewBehind(Context ctx) {
if (isLocalViewBehind(ctx)) {
String basePath = ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM);
String lastKnownInstantFromClient = ctx.queryParam(RemoteHoodieTableFileSystemView.LAST_INSTANT_TS, HoodieTimeline.INVALID_INSTANT_TS);
SyncableFileSystemView view = viewManager.getFileSystemView(basePath);
synchronized (view) {
if (isLocalViewBehind(ctx)) {
HoodieTimeline localTimeline = viewManager.getFileSystemView(basePath).getTimeline();
LOG.info("Syncing view as client passed last known instant " + lastKnownInstantFromClient + " as last known instant but server has the following last instant on timeline :" + localTimeline.lastInstant());
view.sync();
return true;
}
}
}
return false;
}
use of org.apache.hudi.common.table.view.SyncableFileSystemView in project hudi by apache.
the class HoodieClientRollbackTestBase method twoUpsertCommitDataWithTwoPartitions.
protected void twoUpsertCommitDataWithTwoPartitions(List<FileSlice> firstPartitionCommit2FileSlices, List<FileSlice> secondPartitionCommit2FileSlices, HoodieWriteConfig cfg, boolean commitSecondUpsert) throws IOException {
// just generate two partitions
dataGen = new HoodieTestDataGenerator(new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH });
// 1. prepare data
HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[] { DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH }, basePath);
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
/**
* Write 1 (only inserts)
*/
String newCommitTime = "001";
client.startCommitWithTime(newCommitTime);
List<HoodieRecord> records = dataGen.generateInsertsContainsAllPartitions(newCommitTime, 2);
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
JavaRDD<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime);
Assertions.assertNoWriteErrors(statuses.collect());
client.commit(newCommitTime, statuses);
/**
* Write 2 (updates)
*/
newCommitTime = "002";
client.startCommitWithTime(newCommitTime);
records = dataGen.generateUpdates(newCommitTime, records);
statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime);
Assertions.assertNoWriteErrors(statuses.collect());
if (commitSecondUpsert) {
client.commit(newCommitTime, statuses);
}
// 2. assert file group and get the first partition file slice
HoodieTable table = this.getHoodieTable(metaClient, cfg);
SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient());
List<HoodieFileGroup> firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
assertEquals(1, firstPartitionCommit2FileGroups.size());
firstPartitionCommit2FileSlices.addAll(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
// 3. assert file group and get the second partition file slice
List<HoodieFileGroup> secondPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList());
assertEquals(1, secondPartitionCommit2FileGroups.size());
secondPartitionCommit2FileSlices.addAll(secondPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
// 4. assert file slice
HoodieTableType tableType = this.getTableType();
if (tableType.equals(HoodieTableType.COPY_ON_WRITE)) {
assertEquals(2, firstPartitionCommit2FileSlices.size());
assertEquals(2, secondPartitionCommit2FileSlices.size());
} else {
assertEquals(1, firstPartitionCommit2FileSlices.size());
assertEquals(1, secondPartitionCommit2FileSlices.size());
}
}
Aggregations