use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.
the class HoodieInputFormatUtils method filterInstantsTimeline.
/**
* Filter any specific instants that we do not want to process.
* example timeline:
*
* t0 -> create bucket1.parquet
* t1 -> create and append updates bucket1.log
* t2 -> request compaction
* t3 -> create bucket2.parquet
*
* if compaction at t2 takes a long time, incremental readers on RO tables can move to t3 and would skip updates in t1
*
* To workaround this problem, we want to stop returning data belonging to commits > t2.
* After compaction is complete, incremental reader would see updates in t2, t3, so on.
* @param timeline
* @return
*/
public static HoodieDefaultTimeline filterInstantsTimeline(HoodieDefaultTimeline timeline) {
HoodieDefaultTimeline commitsAndCompactionTimeline = timeline.getWriteTimeline();
Option<HoodieInstant> pendingCompactionInstant = commitsAndCompactionTimeline.filterPendingCompactionTimeline().firstInstant();
if (pendingCompactionInstant.isPresent()) {
HoodieDefaultTimeline instantsTimeline = commitsAndCompactionTimeline.findInstantsBefore(pendingCompactionInstant.get().getTimestamp());
int numCommitsFilteredByCompaction = commitsAndCompactionTimeline.getCommitsTimeline().countInstants() - instantsTimeline.getCommitsTimeline().countInstants();
LOG.info("Earliest pending compaction instant is: " + pendingCompactionInstant.get().getTimestamp() + " skipping " + numCommitsFilteredByCompaction + " commits");
return instantsTimeline;
} else {
return timeline;
}
}
use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.
the class HoodieInputFormatUtils method getFilteredCommitsTimeline.
/**
* Extract HoodieTimeline based on HoodieTableMetaClient.
* @param job
* @param tableMetaClient
* @return
*/
public static Option<HoodieTimeline> getFilteredCommitsTimeline(JobContext job, HoodieTableMetaClient tableMetaClient) {
String tableName = tableMetaClient.getTableConfig().getTableName();
HoodieDefaultTimeline baseTimeline;
if (HoodieHiveUtils.stopAtCompaction(job, tableName)) {
baseTimeline = filterInstantsTimeline(tableMetaClient.getActiveTimeline());
} else {
baseTimeline = tableMetaClient.getActiveTimeline();
}
return Option.of(baseTimeline.getCommitsTimeline().filterCompletedInstants());
}
use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.
the class FileSystemViewCommand method buildFileSystemView.
/**
* Build File System View.
*
* @param globRegex Path Regex
* @param maxInstant Max Instants to be used for displaying file-instants
* @param basefileOnly Include only base file view
* @param includeMaxInstant Include Max instant
* @param includeInflight Include inflight instants
* @param excludeCompaction Exclude Compaction instants
* @return
* @throws IOException
*/
private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant, boolean basefileOnly, boolean includeMaxInstant, boolean includeInflight, boolean excludeCompaction) throws IOException {
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(client.getHadoopConf()).setBasePath(client.getBasePath()).setLoadActiveTimelineOnLoad(true).build();
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*", client.getBasePath(), globRegex);
List<FileStatus> statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
Stream<HoodieInstant> instantsStream;
HoodieTimeline timeline;
if (basefileOnly) {
timeline = metaClient.getActiveTimeline().getCommitTimeline();
} else if (excludeCompaction) {
timeline = metaClient.getActiveTimeline().getCommitsTimeline();
} else {
timeline = metaClient.getActiveTimeline().getWriteTimeline();
}
if (!includeInflight) {
timeline = timeline.filterCompletedInstants();
}
instantsStream = timeline.getInstants();
if (!maxInstant.isEmpty()) {
final BiPredicate<String, String> predicate;
if (includeMaxInstant) {
predicate = HoodieTimeline.GREATER_THAN_OR_EQUALS;
} else {
predicate = HoodieTimeline.GREATER_THAN;
}
instantsStream = instantsStream.filter(is -> predicate.test(maxInstant, is.getTimestamp()));
}
HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream, (Function<HoodieInstant, Option<byte[]>> & Serializable) metaClient.getActiveTimeline()::getInstantDetails);
return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses.toArray(new FileStatus[0]));
}
use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.
the class CompactionCommand method printAllCompactions.
/**
* Prints all compaction details.
*/
private String printAllCompactions(HoodieDefaultTimeline timeline, Function<HoodieInstant, HoodieCompactionPlan> compactionPlanReader, boolean includeExtraMetadata, String sortByField, boolean descending, int limit, boolean headerOnly) {
Stream<HoodieInstant> instantsStream = timeline.getWriteTimeline().getReverseOrderedInstants();
List<Pair<HoodieInstant, HoodieCompactionPlan>> compactionPlans = instantsStream.map(instant -> Pair.of(instant, compactionPlanReader.apply(instant))).filter(pair -> pair.getRight() != null).collect(Collectors.toList());
Set<String> committedInstants = timeline.getCommitTimeline().filterCompletedInstants().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
List<Comparable[]> rows = new ArrayList<>();
for (Pair<HoodieInstant, HoodieCompactionPlan> compactionPlan : compactionPlans) {
HoodieCompactionPlan plan = compactionPlan.getRight();
HoodieInstant instant = compactionPlan.getLeft();
final HoodieInstant.State state;
if (committedInstants.contains(instant.getTimestamp())) {
state = HoodieInstant.State.COMPLETED;
} else {
state = instant.getState();
}
if (includeExtraMetadata) {
rows.add(new Comparable[] { instant.getTimestamp(), state.toString(), plan.getOperations() == null ? 0 : plan.getOperations().size(), plan.getExtraMetadata().toString() });
} else {
rows.add(new Comparable[] { instant.getTimestamp(), state.toString(), plan.getOperations() == null ? 0 : plan.getOperations().size() });
}
}
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMPACTION_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_STATE).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_TO_BE_COMPACTED);
if (includeExtraMetadata) {
header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_EXTRA_METADATA);
}
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
use of org.apache.hudi.common.table.timeline.HoodieDefaultTimeline in project hudi by apache.
the class HoodieTableMetadataUtil method getFileSystemView.
/**
* Get metadata table file system view.
*
* @param metaClient - Metadata table meta client
* @return Filesystem view for the metadata table
*/
public static HoodieTableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient) {
// If there are no commits on the metadata table then the table's
// default FileSystemView will not return any file slices even
// though we may have initialized them.
HoodieTimeline timeline = metaClient.getActiveTimeline();
if (timeline.empty()) {
final HoodieInstant instant = new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieActiveTimeline.createNewInstantTime());
timeline = new HoodieDefaultTimeline(Stream.of(instant), metaClient.getActiveTimeline()::getInstantDetails);
}
return new HoodieTableFileSystemView(metaClient, timeline);
}
Aggregations