use of org.apache.gobblin.time.TimeIterator in project incubator-gobblin by apache.
the class IcebergMetadataWriter method computeCompletenessWatermark.
/**
* NOTE: completion watermark for a window [t1, t2] is marked as t2 if audit counts match
* for that window (aka its is set to the beginning of next window)
* For each timestamp in sorted collection of timestamps in descending order
* if timestamp is greater than previousWatermark
* and hour(now) > hour(prevWatermark)
* check audit counts for completeness between
* a source and reference tier for [timestamp -1 , timstamp unit of granularity]
* If the audit count matches update the watermark to the timestamp and break
* else continue
* else
* break
* Using a {@link TimeIterator} that operates over a range of time in 1 unit
* given the start, end and granularity
* @param table
* @param timestamps a sorted set of timestamps in decreasing order
* @param previousWatermark previous completion watermark for the table
* @return updated completion watermark
*/
private long computeCompletenessWatermark(String table, SortedSet<ZonedDateTime> timestamps, long previousWatermark) {
log.info(String.format("Compute completion watermark for %s and timestamps %s with previous watermark %s", table, timestamps, previousWatermark));
long completionWatermark = previousWatermark;
ZonedDateTime now = ZonedDateTime.now(ZoneId.of(this.timeZone));
try {
if (timestamps == null || timestamps.size() <= 0) {
log.error("Cannot create time iterator. Empty for null timestamps");
return previousWatermark;
}
TimeIterator.Granularity granularity = TimeIterator.Granularity.valueOf(this.auditCheckGranularity);
ZonedDateTime prevWatermarkDT = Instant.ofEpochMilli(previousWatermark).atZone(ZoneId.of(this.timeZone));
ZonedDateTime startDT = timestamps.first();
ZonedDateTime endDT = timestamps.last();
TimeIterator iterator = new TimeIterator(startDT, endDT, granularity, true);
while (iterator.hasNext()) {
ZonedDateTime timestampDT = iterator.next();
if (timestampDT.isAfter(prevWatermarkDT) && TimeIterator.durationBetween(prevWatermarkDT, now, granularity) > 0) {
long timestampMillis = timestampDT.toInstant().toEpochMilli();
if (auditCountVerifier.get().isComplete(table, TimeIterator.dec(timestampDT, granularity, 1).toInstant().toEpochMilli(), timestampMillis)) {
completionWatermark = timestampMillis;
break;
}
} else {
break;
}
}
} catch (IOException e) {
log.warn("Exception during audit count check: ", e);
}
return completionWatermark;
}
use of org.apache.gobblin.time.TimeIterator in project incubator-gobblin by apache.
the class TimePartitionGlobFinder method computePartitions.
private Collection<String> computePartitions(FileSystemDataset dataset) {
List<String> partitions = new ArrayList<>();
TimeIterator iterator = new TimeIterator(startTime, endTime, granularity);
while (iterator.hasNext()) {
partitions.add(new Path(dataset.datasetRoot(), partitionPrefix + timeFormatter.format(iterator.next())).toString());
}
return partitions;
}
Aggregations