Search in sources :

Example 11 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class ParDoTest method testTimerReceivedInOriginalWindow.

@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testTimerReceivedInOriginalWindow() throws Exception {
    final String timerId = "foo";
    DoFn<KV<String, Integer>, BoundedWindow> fn = new DoFn<KV<String, Integer>, BoundedWindow>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer) {
            timer.offset(Duration.standardSeconds(1)).setRelative();
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, BoundedWindow window) {
            context.output(context.window());
        }

        public TypeDescriptor<BoundedWindow> getOutputTypeDescriptor() {
            return (TypeDescriptor) TypeDescriptor.of(IntervalWindow.class);
        }
    };
    SlidingWindows windowing = SlidingWindows.of(Duration.standardMinutes(3)).every(Duration.standardMinutes(1));
    PCollection<BoundedWindow> output = pipeline.apply(Create.timestamped(TimestampedValue.of(KV.of("hello", 24), new Instant(0L)))).apply(Window.<KV<String, Integer>>into(windowing)).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(new IntervalWindow(new Instant(0), Duration.standardMinutes(3)), new IntervalWindow(new Instant(0).minus(Duration.standardMinutes(1)), Duration.standardMinutes(3)), new IntervalWindow(new Instant(0).minus(Duration.standardMinutes(2)), Duration.standardMinutes(3)));
    pipeline.run();
}
Also used : Instant(org.joda.time.Instant) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) OnTimer(org.apache.beam.sdk.transforms.DoFn.OnTimer) Timer(org.apache.beam.sdk.state.Timer) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) SlidingWindows(org.apache.beam.sdk.transforms.windowing.SlidingWindows) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 12 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class SchemaCoderCloudObjectTranslator method fromCloudObject.

/**
 * Convert from a cloud object.
 */
@Override
public SchemaCoder fromCloudObject(CloudObject cloudObject) {
    try {
        TypeDescriptor typeDescriptor = (TypeDescriptor) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, TYPE_DESCRIPTOR)), "typeDescriptor");
        SerializableFunction toRowFunction = (SerializableFunction) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, TO_ROW_FUNCTION)), "toRowFunction");
        SerializableFunction fromRowFunction = (SerializableFunction) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, FROM_ROW_FUNCTION)), "fromRowFunction");
        SchemaApi.Schema.Builder schemaBuilder = SchemaApi.Schema.newBuilder();
        JsonFormat.parser().merge(Structs.getString(cloudObject, SCHEMA), schemaBuilder);
        Schema schema = SchemaTranslation.schemaFromProto(schemaBuilder.build());
        @Nullable UUID uuid = schema.getUUID();
        if (schema.isEncodingPositionsOverridden() && uuid != null) {
            SchemaCoder.overrideEncodingPositions(uuid, schema.getEncodingPositions());
        }
        return SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Schema(org.apache.beam.sdk.schemas.Schema) IOException(java.io.IOException) UUID(java.util.UUID) Nullable(javax.annotation.Nullable)

Example 13 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class DirectRunnerTest method byteArrayCountShouldSucceed.

@Test
public void byteArrayCountShouldSucceed() {
    Pipeline p = getPipeline();
    SerializableFunction<Integer, byte[]> getBytes = input -> {
        try {
            return CoderUtils.encodeToByteArray(VarIntCoder.of(), input);
        } catch (CoderException e) {
            fail("Unexpected Coder Exception " + e);
            throw new AssertionError("Unreachable");
        }
    };
    TypeDescriptor<byte[]> td = new TypeDescriptor<byte[]>() {
    };
    PCollection<byte[]> foos = p.apply(Create.of(1, 1, 1, 2, 2, 3)).apply(MapElements.into(td).via(getBytes));
    PCollection<byte[]> msync = p.apply(Create.of(1, -2, -8, -16)).apply(MapElements.into(td).via(getBytes));
    PCollection<byte[]> bytes = PCollectionList.of(foos).and(msync).apply(Flatten.pCollections());
    PCollection<KV<byte[], Long>> counts = bytes.apply(Count.perElement());
    PCollection<KV<Integer, Long>> countsBackToString = counts.apply(MapElements.via(new SimpleFunction<KV<byte[], Long>, KV<Integer, Long>>() {

        @Override
        public KV<Integer, Long> apply(KV<byte[], Long> input) {
            try {
                return KV.of(CoderUtils.decodeFromByteArray(VarIntCoder.of(), input.getKey()), input.getValue());
            } catch (CoderException e) {
                fail("Unexpected Coder Exception " + e);
                throw new AssertionError("Unreachable");
            }
        }
    }));
    Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder().put(1, 4L).put(2, 2L).put(3, 1L).put(-2, 1L).put(-8, 1L).put(-16, 1L).build();
    PAssert.thatMap(countsBackToString).isEqualTo(expected);
}
Also used : Count(org.apache.beam.sdk.transforms.Count) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) PBegin(org.apache.beam.sdk.values.PBegin) Matchers.isA(org.hamcrest.Matchers.isA) CoderUtils(org.apache.beam.sdk.util.CoderUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) ListCoder(org.apache.beam.sdk.coders.ListCoder) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) ThrowableMessageMatcher(org.junit.internal.matchers.ThrowableMessageMatcher) Future(java.util.concurrent.Future) DirectPipelineResult(org.apache.beam.runners.direct.DirectRunner.DirectPipelineResult) PCollectionList(org.apache.beam.sdk.values.PCollectionList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) Assert.fail(org.junit.Assert.fail) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Sum(org.apache.beam.sdk.transforms.Sum) BlockingQueue(java.util.concurrent.BlockingQueue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CoderException(org.apache.beam.sdk.coders.CoderException) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) Matchers.equalTo(org.hamcrest.Matchers.equalTo) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Optional(java.util.Optional) State(org.apache.beam.sdk.PipelineResult.State) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) AfterWatermark(org.apache.beam.sdk.transforms.windowing.AfterWatermark) Default(org.apache.beam.sdk.options.Default) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) Callable(java.util.concurrent.Callable) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) PTransform(org.apache.beam.sdk.transforms.PTransform) Read(org.apache.beam.sdk.io.Read) PipelineRunner(org.apache.beam.sdk.PipelineRunner) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) NoSuchElementException(java.util.NoSuchElementException) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExpectedException(org.junit.rules.ExpectedException) ExecutorService(java.util.concurrent.ExecutorService) Nullable(org.checkerframework.checker.nullness.qual.Nullable) OutputStream(java.io.OutputStream) DoFn(org.apache.beam.sdk.transforms.DoFn) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) CountingSource(org.apache.beam.sdk.io.CountingSource) PDone(org.apache.beam.sdk.values.PDone) PAssert(org.apache.beam.sdk.testing.PAssert) IllegalMutationException(org.apache.beam.sdk.util.IllegalMutationException) Matchers(org.hamcrest.Matchers) IOException(java.io.IOException) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) AtomicLong(java.util.concurrent.atomic.AtomicLong) BoundedSource(org.apache.beam.sdk.io.BoundedSource) Rule(org.junit.Rule) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) AtomicCoder(org.apache.beam.sdk.coders.AtomicCoder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) KV(org.apache.beam.sdk.values.KV) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) AtomicLong(java.util.concurrent.atomic.AtomicLong) CoderException(org.apache.beam.sdk.coders.CoderException) Test(org.junit.Test)

Example 14 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method getSideInputIdToPCollectionViewMap.

private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> getSideInputIdToPCollectionViewMap(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) {
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
    LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = new LinkedHashMap<>();
    // for PCollectionView compatibility, not used to transform materialization
    ViewFn<Iterable<WindowedValue<?>>, ?> viewFn = (ViewFn) new PCollectionViews.MultimapViewFn<>((PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>) () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {
    }), (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids);
    for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId : stagePayload.getSideInputsList()) {
        // TODO: local name is unique as long as only one transform with side input can be within a
        // stage
        String sideInputTag = sideInputId.getLocalName();
        String collectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
        RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
        final WindowingStrategy<?, ?> windowingStrategy;
        try {
            windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
        } catch (InvalidProtocolBufferException e) {
            throw new IllegalStateException(String.format("Unable to hydrate side input windowing strategy %s.", windowingStrategyProto), e);
        }
        Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components);
        // side input materialization via GBK (T -> Iterable<T>)
        WindowedValueCoder wvCoder = (WindowedValueCoder) coder;
        coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder()));
        sideInputs.put(sideInputId, new RunnerPCollectionView<>(null, new TupleTag<>(sideInputTag), viewFn, // TODO: support custom mapping fn
        windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy, coder));
    }
    return sideInputs;
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) LinkedHashMap(java.util.LinkedHashMap) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ViewFn(org.apache.beam.sdk.transforms.ViewFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PCollectionViews(org.apache.beam.sdk.values.PCollectionViews) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 15 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class FieldValueTypeInformation method forSetter.

public static FieldValueTypeInformation forSetter(Method method, String setterPrefix) {
    String name;
    if (method.getName().startsWith(setterPrefix)) {
        name = ReflectUtils.stripPrefix(method.getName(), setterPrefix);
    } else {
        throw new RuntimeException("Setter has wrong prefix " + method.getName());
    }
    TypeDescriptor type = TypeDescriptor.of(method.getGenericParameterTypes()[0]);
    boolean nullable = hasSingleNullableParameter(method);
    return new AutoValue_FieldValueTypeInformation.Builder().setName(name).setNullable(nullable).setType(type).setRawType(type.getRawType()).setMethod(method).setElementType(getIterableComponentType(type)).setMapKeyType(getMapKeyType(type)).setMapValueType(getMapValueType(type)).setOneOfTypes(Collections.emptyMap()).build();
}
Also used : TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor)

Aggregations

TypeDescriptor (org.apache.beam.sdk.values.TypeDescriptor)20 ParameterizedType (java.lang.reflect.ParameterizedType)8 Type (java.lang.reflect.Type)6 DoFn (org.apache.beam.sdk.transforms.DoFn)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)5 LinkedHashMap (java.util.LinkedHashMap)4 Timer (org.apache.beam.sdk.state.Timer)4 KV (org.apache.beam.sdk.values.KV)4 Test (org.junit.Test)4 FormatString (com.google.errorprone.annotations.FormatString)3 List (java.util.List)3 Map (java.util.Map)3 BagState (org.apache.beam.sdk.state.BagState)3 MapState (org.apache.beam.sdk.state.MapState)3 OrderedListState (org.apache.beam.sdk.state.OrderedListState)3 ReadableState (org.apache.beam.sdk.state.ReadableState)3 SetState (org.apache.beam.sdk.state.SetState)3 State (org.apache.beam.sdk.state.State)3 ValueState (org.apache.beam.sdk.state.ValueState)3 WatermarkHoldState (org.apache.beam.sdk.state.WatermarkHoldState)3