use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.
the class DetectNewPartitionsAction method outputBatch.
private void outputBatch(OutputReceiver<PartitionMetadata> receiver, Timestamp minWatermark, List<PartitionMetadata> batchPartitions, Timestamp scheduledAt) {
for (PartitionMetadata partition : batchPartitions) {
final Timestamp createdAt = partition.getCreatedAt();
final PartitionMetadata updatedPartition = partition.toBuilder().setScheduledAt(scheduledAt).build();
LOG.info("[" + updatedPartition.getPartitionToken() + "] Scheduled partition at " + updatedPartition.getScheduledAt() + " with start time " + updatedPartition.getStartTimestamp() + " and end time " + updatedPartition.getEndTimestamp());
receiver.outputWithTimestamp(partition, new Instant(minWatermark.toSqlTimestamp()));
metrics.incPartitionRecordCount();
metrics.updatePartitionCreatedToScheduled(new Duration(createdAt.toSqlTimestamp().getTime(), scheduledAt.toSqlTimestamp().getTime()));
}
}
use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.
the class DetectNewPartitionsAction method schedulePartitions.
private ProcessContinuation schedulePartitions(RestrictionTracker<OffsetRange, Long> tracker, OutputReceiver<PartitionMetadata> receiver, Timestamp minWatermark, TreeMap<Timestamp, List<PartitionMetadata>> batches) {
for (Map.Entry<Timestamp, List<PartitionMetadata>> batch : batches.entrySet()) {
final Timestamp batchCreatedAt = batch.getKey();
final List<PartitionMetadata> batchPartitions = batch.getValue();
final Timestamp scheduledAt = updateBatchToScheduled(batchPartitions);
// FIXME: Should be nanos precision
if (!tracker.tryClaim(TimestampConverter.timestampToMicros(batchCreatedAt))) {
return ProcessContinuation.stop();
}
outputBatch(receiver, minWatermark, batchPartitions, scheduledAt);
}
return ProcessContinuation.resume().withResumeDelay(resumeDuration);
}
use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.
the class ReadChangeStreamPartitionDoFn method initialRestriction.
/**
* The restriction for a partition will be defined from the start and end timestamp to query the
* partition for. These timestamps are converted to microseconds. The {@link OffsetRange}
* restriction represents a closed-open interval, while the start / end timestamps represent a
* closed-closed interval, so we add 1 microsecond to the end timestamp to convert it to
* closed-open.
*
* <p>In this function we also update the partition state to {@link
* PartitionMetadata.State#RUNNING}.
*
* @param partition the partition to be queried
* @return the offset range from the partition start timestamp to the partition end timestamp + 1
* microsecond
*/
@GetInitialRestriction
public OffsetRange initialRestriction(@Element PartitionMetadata partition) {
final String token = partition.getPartitionToken();
final com.google.cloud.Timestamp startTimestamp = partition.getStartTimestamp();
final long startMicros = TimestampConverter.timestampToMicros(startTimestamp);
// Offset range represents closed-open interval
final long endMicros = Optional.ofNullable(partition.getEndTimestamp()).map(TimestampConverter::timestampToMicros).map(micros -> micros + 1).orElse(TimestampConverter.MAX_MICROS + 1);
final com.google.cloud.Timestamp partitionScheduledAt = partition.getScheduledAt();
final com.google.cloud.Timestamp partitionRunningAt = daoFactory.getPartitionMetadataDao().updateToRunning(token);
if (partitionScheduledAt != null && partitionRunningAt != null) {
metrics.updatePartitionScheduledToRunning(new Duration(partitionScheduledAt.toSqlTimestamp().getTime(), partitionRunningAt.toSqlTimestamp().getTime()));
}
return new OffsetRange(startMicros, endMicros);
}
use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.
the class InitializeDoFn method createFakeParentPartition.
/**
* Creates an initial partition in the partition metadata table to serve as the parent of all the
* partitions in the change stream query. This initial partition will be used to dispatch the
* first change streams query in the job. The heartbeat interval to be used will be the one
* specified in {@link InitializeDoFn#DEFAULT_HEARTBEAT_MILLIS}.
*/
private void createFakeParentPartition() {
PartitionMetadata parentPartition = PartitionMetadata.newBuilder().setPartitionToken(InitialPartition.PARTITION_TOKEN).setParentTokens(InitialPartition.PARENT_TOKENS).setStartTimestamp(startTimestamp).setEndTimestamp(endTimestamp).setHeartbeatMillis(DEFAULT_HEARTBEAT_MILLIS).setState(State.CREATED).setWatermark(startTimestamp).build();
daoFactory.getPartitionMetadataDao().insert(parentPartition);
}
Aggregations