Search in sources :

Example 6 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class QueryChangeStreamAction method run.

/**
 * This method will dispatch a change stream query for the given partition, it delegate the
 * processing of the records to one of the corresponding action classes registered and it will
 * keep the state of the partition up to date in the Connector's metadata table.
 *
 * <p>The algorithm is as follows:
 *
 * <ol>
 *   <li>A change stream query for the partition is performed.
 *   <li>For each record, we check the type of the record and dispatch the processing to one of
 *       the actions registered.
 *   <li>If an {@link Optional} with a {@link ProcessContinuation#stop()} is returned from the
 *       actions, we stop processing and return.
 *   <li>Before returning we register a bundle finalizer callback to update the watermark of the
 *       partition in the metadata tables to the latest processed timestamp.
 *   <li>When a change stream query finishes successfully (no more records) we update the
 *       partition state to FINISHED.
 * </ol>
 *
 * There might be cases where due to a split at the exact end timestamp of a partition's change
 * stream query, this function could process a residual with an invalid timestamp. In this case,
 * the error is ignored and no work is done for the residual.
 *
 * @param partition the current partition being processed
 * @param tracker the restriction tracker of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @param receiver the output receiver of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @param watermarkEstimator the watermark estimator of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @param bundleFinalizer the bundle finalizer for {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 *     bundles
 * @return a {@link ProcessContinuation#stop()} if a record timestamp could not be claimed or if
 *     the partition processing has finished
 */
@SuppressWarnings("nullness")
@VisibleForTesting
public ProcessContinuation run(PartitionMetadata partition, RestrictionTracker<OffsetRange, Long> tracker, OutputReceiver<DataChangeRecord> receiver, ManualWatermarkEstimator<Instant> watermarkEstimator, BundleFinalizer bundleFinalizer) {
    final String token = partition.getPartitionToken();
    final Timestamp endTimestamp = partition.getEndTimestamp();
    /*
     * FIXME(b/202802422): Workaround until the backend is fixed.
     * The change stream API returns invalid argument if we try to use a child partition start
     * timestamp for a previously returned query. If we split at that exact time, we won't be able
     * to obtain the child partition on the residual restriction, since it will start at the child
     * partition start time.
     * To circumvent this, we always start querying one microsecond before the restriction start
     * time, and ignore any records that are before the restriction start time. This way the child
     * partition should be returned within the query.
     */
    final Timestamp restrictionStartTimestamp = Timestamp.ofTimeMicroseconds(tracker.currentRestriction().getFrom());
    final Timestamp previousStartTimestamp = Timestamp.ofTimeMicroseconds(TimestampConverter.timestampToMicros(restrictionStartTimestamp) - 1);
    final boolean isFirstRun = restrictionStartTimestamp.compareTo(partition.getStartTimestamp()) == 0;
    final Timestamp startTimestamp = isFirstRun ? restrictionStartTimestamp : previousStartTimestamp;
    try (Scope scope = TRACER.spanBuilder("QueryChangeStreamAction").setRecordEvents(true).startScopedSpan()) {
        TRACER.getCurrentSpan().putAttribute(PARTITION_ID_ATTRIBUTE_LABEL, AttributeValue.stringAttributeValue(token));
        // TODO: Potentially we can avoid this fetch, by enriching the runningAt timestamp when the
        // ReadChangeStreamPartitionDoFn#processElement is called
        final PartitionMetadata updatedPartition = Optional.ofNullable(partitionMetadataDao.getPartition(token)).map(partitionMetadataMapper::from).orElseThrow(() -> new IllegalStateException("Partition " + token + " not found in metadata table"));
        try (ChangeStreamResultSet resultSet = changeStreamDao.changeStreamQuery(token, startTimestamp, endTimestamp, partition.getHeartbeatMillis())) {
            while (resultSet.next()) {
                final List<ChangeStreamRecord> records = changeStreamRecordMapper.toChangeStreamRecords(updatedPartition, resultSet.getCurrentRowAsStruct(), resultSet.getMetadata());
                Optional<ProcessContinuation> maybeContinuation;
                for (final ChangeStreamRecord record : records) {
                    if (record.getRecordTimestamp().compareTo(restrictionStartTimestamp) < 0) {
                        continue;
                    }
                    if (record instanceof DataChangeRecord) {
                        maybeContinuation = dataChangeRecordAction.run(updatedPartition, (DataChangeRecord) record, tracker, receiver, watermarkEstimator);
                    } else if (record instanceof HeartbeatRecord) {
                        maybeContinuation = heartbeatRecordAction.run(updatedPartition, (HeartbeatRecord) record, tracker, watermarkEstimator);
                    } else if (record instanceof ChildPartitionsRecord) {
                        maybeContinuation = childPartitionsRecordAction.run(updatedPartition, (ChildPartitionsRecord) record, tracker, watermarkEstimator);
                    } else {
                        LOG.error("[" + token + "] Unknown record type " + record.getClass());
                        throw new IllegalArgumentException("Unknown record type " + record.getClass());
                    }
                    if (maybeContinuation.isPresent()) {
                        LOG.debug("[" + token + "] Continuation present, returning " + maybeContinuation);
                        bundleFinalizer.afterBundleCommit(Instant.now().plus(BUNDLE_FINALIZER_TIMEOUT), updateWatermarkCallback(token, watermarkEstimator));
                        return maybeContinuation.get();
                    }
                }
            }
            bundleFinalizer.afterBundleCommit(Instant.now().plus(BUNDLE_FINALIZER_TIMEOUT), updateWatermarkCallback(token, watermarkEstimator));
        } catch (SpannerException e) {
            if (isTimestampOutOfRange(e)) {
                LOG.debug("[" + token + "] query change stream is out of range for " + startTimestamp + " to " + endTimestamp + ", finishing stream");
            } else {
                throw e;
            }
        }
    }
    final long endMicros = TimestampConverter.timestampToMicros(endTimestamp);
    LOG.debug("[" + token + "] change stream completed successfully");
    if (tracker.tryClaim(endMicros)) {
        LOG.debug("[" + token + "] Finishing partition");
        partitionMetadataDao.updateToFinished(token);
        LOG.info("[" + token + "] Partition finished");
    }
    return ProcessContinuation.stop();
}
Also used : DataChangeRecord(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.DataChangeRecord) HeartbeatRecord(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.HeartbeatRecord) Timestamp(com.google.cloud.Timestamp) ProcessContinuation(org.apache.beam.sdk.transforms.DoFn.ProcessContinuation) ChangeStreamResultSet(org.apache.beam.sdk.io.gcp.spanner.changestreams.dao.ChangeStreamResultSet) Scope(io.opencensus.common.Scope) PartitionMetadata(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.PartitionMetadata) ChildPartitionsRecord(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.ChildPartitionsRecord) SpannerException(com.google.cloud.spanner.SpannerException) ChangeStreamRecord(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.ChangeStreamRecord) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 7 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class ChildPartitionsRecordAction method run.

/**
 * This is the main processing function for a {@link ChildPartitionsRecord}. It returns an {@link
 * Optional} of {@link ProcessContinuation} to indicate if the calling function should stop or
 * not. If the {@link Optional} returned is empty, it means that the calling function can continue
 * with the processing. If an {@link Optional} of {@link ProcessContinuation#stop()} is returned,
 * it means that this function was unable to claim the timestamp of the {@link
 * ChildPartitionsRecord}, so the caller should stop.
 *
 * <p>When processing the {@link ChildPartitionsRecord} the following procedure is applied:
 *
 * <ol>
 *   <li>We try to claim the child partition record timestamp. If it is not possible, we stop here
 *       and return.
 *   <li>We update the watermark to the child partition record timestamp.
 *   <li>For each child partition, we try to insert them in the metadata tables if they do not
 *       exist.
 *   <li>For each child partition, we check if they originate from a split or a merge and
 *       increment the corresponding metric.
 * </ol>
 *
 * Dealing with partition splits and merge cases is detailed below:
 *
 * <ul>
 *   <li>Partition Splits: child partition tokens should not exist in the partition metadata
 *       table, so new rows are just added to such table. In case of a bundle retry, we silently
 *       ignore duplicate entries.
 *   <li>Partition Merges: the first parent partition that receives the child token should succeed
 *       in inserting it. The remaining parents will silently ignore and skip the insertion.
 * </ul>
 *
 * @param partition the current partition being processed
 * @param record the change stream child partition record received
 * @param tracker the restriction tracker of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @param watermarkEstimator the watermark estimator of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @return {@link Optional#empty()} if the caller can continue processing more records. A non
 *     empty {@link Optional} with {@link ProcessContinuation#stop()} if this function was unable
 *     to claim the {@link ChildPartitionsRecord} timestamp
 */
@VisibleForTesting
public Optional<ProcessContinuation> run(PartitionMetadata partition, ChildPartitionsRecord record, RestrictionTracker<OffsetRange, Long> tracker, ManualWatermarkEstimator<Instant> watermarkEstimator) {
    final String token = partition.getPartitionToken();
    try (Scope scope = TRACER.spanBuilder("ChildPartitionsRecordAction").setRecordEvents(true).startScopedSpan()) {
        TRACER.getCurrentSpan().putAttribute(PARTITION_ID_ATTRIBUTE_LABEL, AttributeValue.stringAttributeValue(token));
        LOG.debug("[" + token + "] Processing child partition record " + record);
        final Timestamp startTimestamp = record.getStartTimestamp();
        final Instant startInstant = new Instant(startTimestamp.toSqlTimestamp().getTime());
        final long startMicros = TimestampConverter.timestampToMicros(startTimestamp);
        if (!tracker.tryClaim(startMicros)) {
            LOG.debug("[" + token + "] Could not claim queryChangeStream(" + startTimestamp + "), stopping");
            return Optional.of(ProcessContinuation.stop());
        }
        watermarkEstimator.setWatermark(startInstant);
        for (ChildPartition childPartition : record.getChildPartitions()) {
            processChildPartition(partition, record, childPartition);
        }
        LOG.debug("[" + token + "] Child partitions action completed successfully");
        return Optional.empty();
    }
}
Also used : Scope(io.opencensus.common.Scope) ChildPartition(org.apache.beam.sdk.io.gcp.spanner.changestreams.model.ChildPartition) Instant(org.joda.time.Instant) Timestamp(com.google.cloud.Timestamp) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 8 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class DataChangeRecordAction method run.

/**
 * This is the main processing function for a {@link DataChangeRecord}. It returns an {@link
 * Optional} of {@link ProcessContinuation} to indicate if the calling function should stop or
 * not. If the {@link Optional} returned is empty, it means that the calling function can continue
 * with the processing. If an {@link Optional} of {@link ProcessContinuation#stop()} is returned,
 * it means that this function was unable to claim the timestamp of the {@link DataChangeRecord},
 * so the caller should stop.
 *
 * <p>When processing the {@link DataChangeRecord} the following procedure is applied:
 *
 * <ol>
 *   <li>We try to cliam the data change record commit timestamp. If it is not possible, we stop
 *       here and return.
 *   <li>We emit the data change record through the {@link OutputReceiver}.
 *   <li>We update the watermark to the data change record commit timestamp.
 * </ol>
 *
 * @param partition the current partition being processed
 * @param record the change stream data record received
 * @param tracker the restriction tracker of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @param outputReceiver the output receiver of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @param watermarkEstimator the watermark estimator of the {@link
 *     org.apache.beam.sdk.io.gcp.spanner.changestreams.dofn.ReadChangeStreamPartitionDoFn} SDF
 * @return {@link Optional#empty()} if the caller can continue processing more records. A non
 *     empty {@link Optional} with {@link ProcessContinuation#stop()} if this function was unable
 *     to claim the {@link ChildPartitionsRecord} timestamp
 */
@VisibleForTesting
public Optional<ProcessContinuation> run(PartitionMetadata partition, DataChangeRecord record, RestrictionTracker<OffsetRange, Long> tracker, OutputReceiver<DataChangeRecord> outputReceiver, ManualWatermarkEstimator<Instant> watermarkEstimator) {
    try (Scope scope = TRACER.spanBuilder("DataChangeRecordAction").setRecordEvents(true).startScopedSpan()) {
        TRACER.getCurrentSpan().putAttribute(PARTITION_ID_ATTRIBUTE_LABEL, AttributeValue.stringAttributeValue(partition.getPartitionToken()));
        final String token = partition.getPartitionToken();
        LOG.debug("[" + token + "] Processing data record " + record.getCommitTimestamp());
        final Timestamp commitTimestamp = record.getCommitTimestamp();
        final Instant commitInstant = new Instant(commitTimestamp.toSqlTimestamp().getTime());
        final long commitMicros = TimestampConverter.timestampToMicros(commitTimestamp);
        if (!tracker.tryClaim(commitMicros)) {
            LOG.debug("[" + token + "] Could not claim queryChangeStream(" + commitTimestamp + "), stopping");
            return Optional.of(ProcessContinuation.stop());
        }
        outputReceiver.outputWithTimestamp(record, commitInstant);
        watermarkEstimator.setWatermark(commitInstant);
        LOG.debug("[" + token + "] Data record action completed successfully");
        return Optional.empty();
    }
}
Also used : Scope(io.opencensus.common.Scope) Instant(org.joda.time.Instant) Timestamp(com.google.cloud.Timestamp) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 9 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class HeartbeatRecordAction method run.

/**
 * This is the main processing function for a {@link HeartbeatRecord}. It returns an {@link
 * Optional} of {@link ProcessContinuation} to indicate if the calling function should stop or
 * not. If the {@link Optional} returned is empty, it means that the calling function can continue
 * with the processing. If an {@link Optional} of {@link ProcessContinuation#stop()} is returned,
 * it means that this function was unable to claim the timestamp of the {@link HeartbeatRecord},
 * so the caller should stop.
 *
 * <p>When processing the {@link HeartbeatRecord} the following procedure is applied:
 *
 * <ol>
 *   <li>We try to claim the heartbeat record timestamp. If it is not possible, we stop here and
 *       return.
 *   <li>We update the necessary metrics.
 *   <li>We update the watermark to the heartbeat record timestamp.
 * </ol>
 */
@VisibleForTesting
public Optional<ProcessContinuation> run(PartitionMetadata partition, HeartbeatRecord record, RestrictionTracker<OffsetRange, Long> tracker, ManualWatermarkEstimator<Instant> watermarkEstimator) {
    try (Scope scope = TRACER.spanBuilder("HeartbeatRecordAction").setRecordEvents(true).startScopedSpan()) {
        TRACER.getCurrentSpan().putAttribute(PARTITION_ID_ATTRIBUTE_LABEL, AttributeValue.stringAttributeValue(partition.getPartitionToken()));
        final String token = partition.getPartitionToken();
        LOG.debug("[" + token + "] Processing heartbeat record " + record);
        final Timestamp timestamp = record.getTimestamp();
        final Instant timestampInstant = new Instant(timestamp.toSqlTimestamp().getTime());
        final long timestampMicros = TimestampConverter.timestampToMicros(timestamp);
        if (!tracker.tryClaim(timestampMicros)) {
            LOG.debug("[" + token + "] Could not claim queryChangeStream(" + timestamp + "), stopping");
            return Optional.of(ProcessContinuation.stop());
        }
        metrics.incHeartbeatRecordCount();
        watermarkEstimator.setWatermark(timestampInstant);
        LOG.debug("[" + token + "] Heartbeat record action completed successfully");
        return Optional.empty();
    }
}
Also used : Scope(io.opencensus.common.Scope) Instant(org.joda.time.Instant) Timestamp(com.google.cloud.Timestamp) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 10 with VisibleForTesting

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting in project beam by apache.

the class DoFnSignatures method analyzeSetupMethod.

@VisibleForTesting
static DoFnSignature.LifecycleMethod analyzeSetupMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnT, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, FnAnalysisContext fnContext) {
    errors.checkArgument(void.class.equals(m.getReturnType()), "Must return void");
    Type[] params = m.getGenericParameterTypes();
    MethodAnalysisContext methodContext = MethodAnalysisContext.create();
    for (int i = 0; i < params.length; ++i) {
        Parameter extraParam = analyzeExtraParameter(errors, fnContext, methodContext, ParameterDescription.of(m, i, fnT.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
        methodContext.addParameter(extraParam);
    }
    for (Parameter parameter : methodContext.getExtraParameters()) {
        checkParameterOneOf(errors, parameter, ALLOWED_SETUP_PARAMETERS);
    }
    return DoFnSignature.LifecycleMethod.create(m, methodContext.extraParameters);
}
Also used : Type(java.lang.reflect.Type) ParameterizedType(java.lang.reflect.ParameterizedType) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) WatermarkEstimatorParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter) BundleFinalizerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter) PipelineOptionsParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter) WindowParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter) TimerFamilyParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter) TypeParameter(org.apache.beam.sdk.values.TypeParameter) RestrictionTrackerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter) TimerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter) WatermarkEstimatorStateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) StateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

VisibleForTesting (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)81 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)17 ParameterizedType (java.lang.reflect.ParameterizedType)15 Type (java.lang.reflect.Type)15 Parameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter)14 BundleFinalizerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter)14 PipelineOptionsParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter)14 RestrictionParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter)14 RestrictionTrackerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter)14 SchemaElementParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter)14 StateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter)14 TimerFamilyParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter)14 TimerParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter)14 WatermarkEstimatorParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter)14 WatermarkEstimatorStateParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter)14 WindowParameter (org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter)14 TypeParameter (org.apache.beam.sdk.values.TypeParameter)14 DoFn (org.apache.beam.sdk.transforms.DoFn)10 Map (java.util.Map)7