Search in sources :

Example 16 with PartitionMetadata

use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.

the class DetectNewPartitionsAction method outputBatch.

private void outputBatch(OutputReceiver<PartitionMetadata> receiver, Timestamp minWatermark, List<PartitionMetadata> batchPartitions, Timestamp scheduledAt) {
    for (PartitionMetadata partition : batchPartitions) {
        final Timestamp createdAt = partition.getCreatedAt();
        final PartitionMetadata updatedPartition = partition.toBuilder().setScheduledAt(scheduledAt).build();
        LOG.info("[" + updatedPartition.getPartitionToken() + "] Scheduled partition at " + updatedPartition.getScheduledAt() + " with start time " + updatedPartition.getStartTimestamp() + " and end time " + updatedPartition.getEndTimestamp());
        receiver.outputWithTimestamp(partition, new Instant(minWatermark.toSqlTimestamp()));
        metrics.incPartitionRecordCount();
        metrics.updatePartitionCreatedToScheduled(new Duration(createdAt.toSqlTimestamp().getTime(), scheduledAt.toSqlTimestamp().getTime()));
    }
}
Also used : Instant(org.joda.time.Instant) PartitionMetadata(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata) Duration(org.joda.time.Duration) Timestamp(com.google.cloud.Timestamp)

Example 17 with PartitionMetadata

use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.

the class DetectNewPartitionsAction method schedulePartitions.

private ProcessContinuation schedulePartitions(RestrictionTracker<OffsetRange, Long> tracker, OutputReceiver<PartitionMetadata> receiver, Timestamp minWatermark, TreeMap<Timestamp, List<PartitionMetadata>> batches) {
    for (Map.Entry<Timestamp, List<PartitionMetadata>> batch : batches.entrySet()) {
        final Timestamp batchCreatedAt = batch.getKey();
        final List<PartitionMetadata> batchPartitions = batch.getValue();
        final Timestamp scheduledAt = updateBatchToScheduled(batchPartitions);
        // FIXME: Should be nanos precision
        if (!tracker.tryClaim(TimestampConverter.timestampToMicros(batchCreatedAt))) {
            return ProcessContinuation.stop();
        }
        outputBatch(receiver, minWatermark, batchPartitions, scheduledAt);
    }
    return ProcessContinuation.resume().withResumeDelay(resumeDuration);
}
Also used : PartitionMetadata(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata) ArrayList(java.util.ArrayList) List(java.util.List) TreeMap(java.util.TreeMap) Map(java.util.Map) Timestamp(com.google.cloud.Timestamp)

Example 18 with PartitionMetadata

use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.

the class ReadChangeStreamPartitionDoFn method initialRestriction.

/**
 * The restriction for a partition will be defined from the start and end timestamp to query the
 * partition for. These timestamps are converted to microseconds. The {@link OffsetRange}
 * restriction represents a closed-open interval, while the start / end timestamps represent a
 * closed-closed interval, so we add 1 microsecond to the end timestamp to convert it to
 * closed-open.
 *
 * <p>In this function we also update the partition state to {@link
 * PartitionMetadata.State#RUNNING}.
 *
 * @param partition the partition to be queried
 * @return the offset range from the partition start timestamp to the partition end timestamp + 1
 *     microsecond
 */
@GetInitialRestriction
public OffsetRange initialRestriction(@Element PartitionMetadata partition) {
    final String token = partition.getPartitionToken();
    final com.google.cloud.Timestamp startTimestamp = partition.getStartTimestamp();
    final long startMicros = TimestampConverter.timestampToMicros(startTimestamp);
    // Offset range represents closed-open interval
    final long endMicros = Optional.ofNullable(partition.getEndTimestamp()).map(TimestampConverter::timestampToMicros).map(micros -> micros + 1).orElse(TimestampConverter.MAX_MICROS + 1);
    final com.google.cloud.Timestamp partitionScheduledAt = partition.getScheduledAt();
    final com.google.cloud.Timestamp partitionRunningAt = daoFactory.getPartitionMetadataDao().updateToRunning(token);
    if (partitionScheduledAt != null && partitionRunningAt != null) {
        metrics.updatePartitionScheduledToRunning(new Duration(partitionScheduledAt.toSqlTimestamp().getTime(), partitionRunningAt.toSqlTimestamp().getTime()));
    }
    return new OffsetRange(startMicros, endMicros);
}
Also used : AttributeValue(io.opencensus.trace.AttributeValue) DaoFactory(org.apache.beam.sdk.io.gcp.spanner.changestreams.dao.DaoFactory) Manual(org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimators.Manual) ChangeStreamMetrics(org.apache.beam.sdk.io.gcp.spanner.changestreams.ChangeStreamMetrics) PartitionMetadataDao(org.apache.beam.sdk.io.gcp.spanner.changestreams.dao.PartitionMetadataDao) Duration(org.joda.time.Duration) LoggerFactory(org.slf4j.LoggerFactory) PARTITION_ID_ATTRIBUTE_LABEL(org.apache.beam.sdk.io.gcp.spanner.changestreams.ChangeStreamMetrics.PARTITION_ID_ATTRIBUTE_LABEL) TimestampConverter(org.apache.beam.sdk.io.gcp.spanner.changestreams.TimestampConverter) DataChangeRecord(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.DataChangeRecord) DataChangeRecordAction(org.apache.beam.sdk.io.gcp.spanner.changestreams.action.DataChangeRecordAction) QueryChangeStreamAction(org.apache.beam.sdk.io.gcp.spanner.changestreams.action.QueryChangeStreamAction) Tracing(io.opencensus.trace.Tracing) RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) DoFn(org.apache.beam.sdk.transforms.DoFn) Tracer(io.opencensus.trace.Tracer) ChangeStreamDao(org.apache.beam.sdk.io.gcp.spanner.changestreams.dao.ChangeStreamDao) Logger(org.slf4j.Logger) HeartbeatRecordAction(org.apache.beam.sdk.io.gcp.spanner.changestreams.action.HeartbeatRecordAction) Scope(io.opencensus.common.Scope) UnboundedPerElement(org.apache.beam.sdk.transforms.DoFn.UnboundedPerElement) ManualWatermarkEstimator(org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator) Serializable(java.io.Serializable) ChildPartitionsRecordAction(org.apache.beam.sdk.io.gcp.spanner.changestreams.action.ChildPartitionsRecordAction) PartitionMetadata(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata) Instant(org.joda.time.Instant) ReadChangeStreamPartitionRangeTracker(org.apache.beam.sdk.io.gcp.spanner.changestreams.restriction.ReadChangeStreamPartitionRangeTracker) Optional(java.util.Optional) PartitionMetadataMapper(org.apache.beam.sdk.io.gcp.spanner.changestreams.mapper.PartitionMetadataMapper) ActionFactory(org.apache.beam.sdk.io.gcp.spanner.changestreams.action.ActionFactory) MapperFactory(org.apache.beam.sdk.io.gcp.spanner.changestreams.mapper.MapperFactory) OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) ChangeStreamRecordMapper(org.apache.beam.sdk.io.gcp.spanner.changestreams.mapper.ChangeStreamRecordMapper) OffsetRange(org.apache.beam.sdk.io.range.OffsetRange) TimestampConverter(org.apache.beam.sdk.io.gcp.spanner.changestreams.TimestampConverter) Duration(org.joda.time.Duration)

Example 19 with PartitionMetadata

use of org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata in project beam by apache.

the class InitializeDoFn method createFakeParentPartition.

/**
 * Creates an initial partition in the partition metadata table to serve as the parent of all the
 * partitions in the change stream query. This initial partition will be used to dispatch the
 * first change streams query in the job. The heartbeat interval to be used will be the one
 * specified in {@link InitializeDoFn#DEFAULT_HEARTBEAT_MILLIS}.
 */
private void createFakeParentPartition() {
    PartitionMetadata parentPartition = PartitionMetadata.newBuilder().setPartitionToken(InitialPartition.PARTITION_TOKEN).setParentTokens(InitialPartition.PARENT_TOKENS).setStartTimestamp(startTimestamp).setEndTimestamp(endTimestamp).setHeartbeatMillis(DEFAULT_HEARTBEAT_MILLIS).setState(State.CREATED).setWatermark(startTimestamp).build();
    daoFactory.getPartitionMetadataDao().insert(parentPartition);
}
Also used : PartitionMetadata(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata)

Aggregations

PartitionMetadata (org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata)19 Timestamp (com.google.cloud.Timestamp)11 Test (org.junit.Test)10 ChildPartitionsRecord (org.apache.beam.sdk.io.gcp.spanner.changestreams.model.ChildPartitionsRecord)8 Instant (org.joda.time.Instant)8 ChildPartition (org.apache.beam.sdk.io.gcp.spanner.changestreams.model.ChildPartition)7 ProcessContinuation (org.apache.beam.sdk.transforms.DoFn.ProcessContinuation)7 Struct (com.google.cloud.spanner.Struct)5 OffsetRange (org.apache.beam.sdk.io.range.OffsetRange)4 Scope (io.opencensus.common.Scope)3 ArrayList (java.util.ArrayList)3 PartitionMetadataDao (org.apache.beam.sdk.io.gcp.spanner.changestreams.dao.PartitionMetadataDao)3 AttributeValue (io.opencensus.trace.AttributeValue)2 Tracer (io.opencensus.trace.Tracer)2 Tracing (io.opencensus.trace.Tracing)2 List (java.util.List)2 Optional (java.util.Optional)2 ChangeStreamMetrics (org.apache.beam.sdk.io.gcp.spanner.changestreams.ChangeStreamMetrics)2 PARTITION_ID_ATTRIBUTE_LABEL (org.apache.beam.sdk.io.gcp.spanner.changestreams.ChangeStreamMetrics.PARTITION_ID_ATTRIBUTE_LABEL)2 TimestampConverter (org.apache.beam.sdk.io.gcp.spanner.changestreams.TimestampConverter)2