Search in sources :

Example 26 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class SamzaTestStreamTranslator method translate.

@Override
public void translate(TestStream<T> testStream, TransformHierarchy.Node node, TranslationContext ctx) {
    final PCollection<T> output = ctx.getOutput(testStream);
    final String outputId = ctx.getIdForPValue(output);
    final Coder<T> valueCoder = testStream.getValueCoder();
    final TestStream.TestStreamCoder<T> testStreamCoder = TestStream.TestStreamCoder.of(valueCoder);
    // encode testStream as a string
    final String encodedTestStream;
    try {
        encodedTestStream = CoderUtils.encodeToBase64(testStreamCoder, testStream);
    } catch (CoderException e) {
        throw new RuntimeException("Could not encode TestStream.", e);
    }
    // the decoder for encodedTestStream
    SerializableFunction<String, TestStream<T>> testStreamDecoder = string -> {
        try {
            return CoderUtils.decodeFromBase64(TestStream.TestStreamCoder.of(valueCoder), string);
        } catch (CoderException e) {
            throw new RuntimeException("Could not decode TestStream.", e);
        }
    };
    ctx.registerInputMessageStream(output, createInputDescriptor(outputId, encodedTestStream, testStreamDecoder));
}
Also used : SamzaPipelineTranslatorUtils(org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils) CoderUtils(org.apache.beam.sdk.util.CoderUtils) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Coder(org.apache.beam.sdk.coders.Coder) Base64Serializer(org.apache.beam.runners.core.serialization.Base64Serializer) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Serde(org.apache.samza.serializers.Serde) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Map(java.util.Map) TestStreamTranslation(org.apache.beam.runners.core.construction.TestStreamTranslation) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) QueryablePipeline(org.apache.beam.runners.core.construction.graph.QueryablePipeline) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) TransformHierarchy(org.apache.beam.sdk.runners.TransformHierarchy) CoderException(org.apache.beam.sdk.coders.CoderException) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) TestStream(org.apache.beam.sdk.testing.TestStream) KVSerde(org.apache.samza.serializers.KVSerde) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) CoderException(org.apache.beam.sdk.coders.CoderException) TestStream(org.apache.beam.sdk.testing.TestStream)

Example 27 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class RequiresStableInputIT method testParDoRequiresStableInput.

/**
 * Test for the support of {@link org.apache.beam.sdk.transforms.DoFn.RequiresStableInput} in both
 * {@link ParDo.SingleOutput} and {@link ParDo.MultiOutput}.
 *
 * <p>In each test, a singleton string value is paired with a random key. In the following
 * transform, the value is written to a file, whose path is specified by the random key, and then
 * the transform fails. When the pipeline retries, the latter transform should receive the same
 * input from the former transform, because its {@link DoFn} is annotated with {@link
 * org.apache.beam.sdk.transforms.DoFn.RequiresStableInput}, and it will not fail due to presence
 * of the file. Therefore, only one file for each transform is expected.
 */
@Test
public void testParDoRequiresStableInput() {
    TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
    ResourceId outputDir = FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("requires-stable-input-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY);
    String singleOutputPrefix = outputDir.resolve("pardo-single-output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("key-", StandardResolveOptions.RESOLVE_FILE).toString();
    String multiOutputPrefix = outputDir.resolve("pardo-multi-output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("key-", StandardResolveOptions.RESOLVE_FILE).toString();
    Pipeline p = Pipeline.create(options);
    SerializableFunction<Void, Void> firstTime = (SerializableFunction<Void, Void>) value -> {
        throw new RuntimeException("Deliberate failure: should happen only once for each application of the DoFn" + "within the transform graph.");
    };
    PCollection<String> singleton = p.apply("CreatePCollectionOfOneValue", Create.of(VALUE));
    singleton.apply("Single-PairWithRandomKey", MapElements.via(new PairWithRandomKeyFn())).apply("Single-MakeSideEffectAndThenFail", ParDo.of(new MakeSideEffectAndThenFailFn(singleOutputPrefix, firstTime)));
    singleton.apply("Multi-PairWithRandomKey", MapElements.via(new PairWithRandomKeyFn())).apply("Multi-MakeSideEffectAndThenFail", ParDo.of(new MakeSideEffectAndThenFailFn(multiOutputPrefix, firstTime)).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
    p.run().waitUntilFinish();
    assertThat(new FilePatternMatchingShardedFile(singleOutputPrefix + "*"), fileContentsHaveChecksum(VALUE_CHECKSUM));
    assertThat(new FilePatternMatchingShardedFile(multiOutputPrefix + "*"), fileContentsHaveChecksum(VALUE_CHECKSUM));
}
Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TupleTag(org.apache.beam.sdk.values.TupleTag) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) Date(java.util.Date) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) FilePatternMatchingShardedFile(org.apache.beam.sdk.util.FilePatternMatchingShardedFile) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) Test(org.junit.Test)

Example 28 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class GenerateSequenceTest method testUnboundedDisplayData.

@Test
public void testUnboundedDisplayData() {
    Duration maxReadTime = Duration.standardHours(5);
    SerializableFunction<Long, Instant> timestampFn = input -> Instant.now();
    PTransform<?, ?> input = GenerateSequence.from(0).to(1234).withMaxReadTime(maxReadTime).withTimestampFn(timestampFn);
    DisplayData displayData = DisplayData.from(input);
    assertThat(displayData, hasDisplayItem("maxReadTime", maxReadTime));
    assertThat(displayData, hasDisplayItem("timestampFn", timestampFn.getClass()));
}
Also used : Count(org.apache.beam.sdk.transforms.Count) DoFn(org.apache.beam.sdk.transforms.DoFn) Min(org.apache.beam.sdk.transforms.Min) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) PAssert(org.apache.beam.sdk.testing.PAssert) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) NeedsRunner(org.apache.beam.sdk.testing.NeedsRunner) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) PTransform(org.apache.beam.sdk.transforms.PTransform) Max(org.apache.beam.sdk.transforms.Max) Rule(org.junit.Rule) ParDo(org.apache.beam.sdk.transforms.ParDo) Distinct(org.apache.beam.sdk.transforms.Distinct) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Instant(org.joda.time.Instant) Matchers.is(org.hamcrest.Matchers.is) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DisplayDataMatchers.hasDisplayItem(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Test(org.junit.Test)

Example 29 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project component-runtime by Talend.

the class ProcessorTest method processor.

@Test
public void processor() {
    final Processor processor = COMPONENT_FACTORY.createProcessor(SampleProcessor.class, new Object());
    final JoinInputFactory joinInputFactory = new JoinInputFactory().withInput("__default__", asList(new SampleProcessor.Sample(1), Json.createObjectBuilder().add("data", 2).build()));
    final PCollection<JsonObject> inputs = pipeline.apply(Data.of(processor.plugin(), joinInputFactory.asInputRecords()));
    final PCollection<Map<String, JsonObject>> outputs = inputs.apply(TalendFn.asFn(processor)).apply(Data.map(processor.plugin(), JsonObject.class));
    PAssert.that(outputs).satisfies((SerializableFunction<Iterable<Map<String, JsonObject>>, Void>) input -> {
        final List<Map<String, JsonObject>> result = StreamSupport.stream(input.spliterator(), false).collect(toList());
        assertEquals(2, result.size());
        result.forEach(e -> assertTrue(e.containsKey("__default__") && e.containsKey("reject")));
        assertEquals(new HashSet<>(asList(1, 2)), result.stream().map(e -> e.get("__default__").getInt("data")).collect(toSet()));
        return null;
    });
    assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
Also used : JsonObject(javax.json.JsonObject) SampleProcessor(org.talend.sdk.component.junit.beam.test.SampleProcessor) PAssert(org.apache.beam.sdk.testing.PAssert) PipelineResult(org.apache.beam.sdk.PipelineResult) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Test(org.junit.Test) JoinInputFactory(org.talend.sdk.component.junit.JoinInputFactory) PCollection(org.apache.beam.sdk.values.PCollection) Processor(org.talend.sdk.component.runtime.output.Processor) HashSet(java.util.HashSet) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Rule(org.junit.Rule) Arrays.asList(java.util.Arrays.asList) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Json(javax.json.Json) StreamSupport(java.util.stream.StreamSupport) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) ClassRule(org.junit.ClassRule) SimpleComponentRule(org.talend.sdk.component.junit.SimpleComponentRule) Collectors.toSet(java.util.stream.Collectors.toSet) TalendFn(org.talend.sdk.component.runtime.beam.TalendFn) SampleProcessor(org.talend.sdk.component.junit.beam.test.SampleProcessor) Processor(org.talend.sdk.component.runtime.output.Processor) JsonObject(javax.json.JsonObject) JoinInputFactory(org.talend.sdk.component.junit.JoinInputFactory) JsonObject(javax.json.JsonObject) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 30 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project component-runtime by Talend.

the class TInProcessorBeamTest method processor.

@Test
@Ignore("You need to complete this test with your own data and assertions")
public void processor() {
    // Processor configuration
    // Setup your component configuration for the test here
    final TInProcessorConfiguration configuration = new TInProcessorConfiguration();
    // We create the component processor instance using the configuration filled above
    final Processor processor = COMPONENT_FACTORY.createProcessor(TInProcessor.class, configuration);
    // The join input factory construct inputs test data for every input branch you have defined for this component
    // Make sure to fil in some test data for the branches you want to test
    // You can also remove the branches that you don't need from the factory below
    final JoinInputFactory joinInputFactory = new JoinInputFactory().withInput("__default__", asList());
    // Convert it to a beam "source"
    final PCollection<JsonObject> inputs = pipeline.apply(Data.of(processor.plugin(), joinInputFactory.asInputRecords()));
    // add our processor right after to see each data as configured previously
    final PCollection<Map<String, JsonObject>> outputs = inputs.apply(TalendFn.asFn(processor)).apply(Data.map(processor.plugin(), JsonObject.class));
    PAssert.that(outputs).satisfies((SerializableFunction<Iterable<Map<String, JsonObject>>, Void>) input -> {
        final List<Map<String, JsonObject>> result = StreamSupport.stream(input.spliterator(), false).collect(toList());
        return null;
    });
    // run the pipeline and ensure the execution was successful
    assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
Also used : JsonObject(javax.json.JsonObject) PAssert(org.apache.beam.sdk.testing.PAssert) PipelineResult(org.apache.beam.sdk.PipelineResult) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Test(org.junit.Test) JoinInputFactory(org.talend.sdk.component.junit.JoinInputFactory) PCollection(org.apache.beam.sdk.values.PCollection) Processor(org.talend.sdk.component.runtime.output.Processor) Data(org.talend.sdk.component.junit.beam.Data) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) StreamSupport(java.util.stream.StreamSupport) ClassRule(org.junit.ClassRule) SimpleComponentRule(org.talend.sdk.component.junit.SimpleComponentRule) Assert.assertEquals(org.junit.Assert.assertEquals) TalendFn(org.talend.sdk.component.runtime.beam.TalendFn) Processor(org.talend.sdk.component.runtime.output.Processor) JoinInputFactory(org.talend.sdk.component.junit.JoinInputFactory) JsonObject(javax.json.JsonObject) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

SerializableFunction (org.apache.beam.sdk.transforms.SerializableFunction)37 Test (org.junit.Test)27 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 PCollection (org.apache.beam.sdk.values.PCollection)22 PAssert (org.apache.beam.sdk.testing.PAssert)20 Instant (org.joda.time.Instant)17 Rule (org.junit.Rule)17 List (java.util.List)16 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)16 RunWith (org.junit.runner.RunWith)16 Map (java.util.Map)15 Duration (org.joda.time.Duration)14 JUnit4 (org.junit.runners.JUnit4)13 ArrayList (java.util.ArrayList)12 Collections (java.util.Collections)12 Create (org.apache.beam.sdk.transforms.Create)12 Arrays (java.util.Arrays)11 ParDo (org.apache.beam.sdk.transforms.ParDo)11 KV (org.apache.beam.sdk.values.KV)11 Assert.assertEquals (org.junit.Assert.assertEquals)10