use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class SamzaTestStreamTranslator method translate.
@Override
public void translate(TestStream<T> testStream, TransformHierarchy.Node node, TranslationContext ctx) {
final PCollection<T> output = ctx.getOutput(testStream);
final String outputId = ctx.getIdForPValue(output);
final Coder<T> valueCoder = testStream.getValueCoder();
final TestStream.TestStreamCoder<T> testStreamCoder = TestStream.TestStreamCoder.of(valueCoder);
// encode testStream as a string
final String encodedTestStream;
try {
encodedTestStream = CoderUtils.encodeToBase64(testStreamCoder, testStream);
} catch (CoderException e) {
throw new RuntimeException("Could not encode TestStream.", e);
}
// the decoder for encodedTestStream
SerializableFunction<String, TestStream<T>> testStreamDecoder = string -> {
try {
return CoderUtils.decodeFromBase64(TestStream.TestStreamCoder.of(valueCoder), string);
} catch (CoderException e) {
throw new RuntimeException("Could not decode TestStream.", e);
}
};
ctx.registerInputMessageStream(output, createInputDescriptor(outputId, encodedTestStream, testStreamDecoder));
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class RequiresStableInputIT method testParDoRequiresStableInput.
/**
* Test for the support of {@link org.apache.beam.sdk.transforms.DoFn.RequiresStableInput} in both
* {@link ParDo.SingleOutput} and {@link ParDo.MultiOutput}.
*
* <p>In each test, a singleton string value is paired with a random key. In the following
* transform, the value is written to a file, whose path is specified by the random key, and then
* the transform fails. When the pipeline retries, the latter transform should receive the same
* input from the former transform, because its {@link DoFn} is annotated with {@link
* org.apache.beam.sdk.transforms.DoFn.RequiresStableInput}, and it will not fail due to presence
* of the file. Therefore, only one file for each transform is expected.
*/
@Test
public void testParDoRequiresStableInput() {
TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
ResourceId outputDir = FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("requires-stable-input-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY);
String singleOutputPrefix = outputDir.resolve("pardo-single-output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("key-", StandardResolveOptions.RESOLVE_FILE).toString();
String multiOutputPrefix = outputDir.resolve("pardo-multi-output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("key-", StandardResolveOptions.RESOLVE_FILE).toString();
Pipeline p = Pipeline.create(options);
SerializableFunction<Void, Void> firstTime = (SerializableFunction<Void, Void>) value -> {
throw new RuntimeException("Deliberate failure: should happen only once for each application of the DoFn" + "within the transform graph.");
};
PCollection<String> singleton = p.apply("CreatePCollectionOfOneValue", Create.of(VALUE));
singleton.apply("Single-PairWithRandomKey", MapElements.via(new PairWithRandomKeyFn())).apply("Single-MakeSideEffectAndThenFail", ParDo.of(new MakeSideEffectAndThenFailFn(singleOutputPrefix, firstTime)));
singleton.apply("Multi-PairWithRandomKey", MapElements.via(new PairWithRandomKeyFn())).apply("Multi-MakeSideEffectAndThenFail", ParDo.of(new MakeSideEffectAndThenFailFn(multiOutputPrefix, firstTime)).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
p.run().waitUntilFinish();
assertThat(new FilePatternMatchingShardedFile(singleOutputPrefix + "*"), fileContentsHaveChecksum(VALUE_CHECKSUM));
assertThat(new FilePatternMatchingShardedFile(multiOutputPrefix + "*"), fileContentsHaveChecksum(VALUE_CHECKSUM));
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class GenerateSequenceTest method testUnboundedDisplayData.
@Test
public void testUnboundedDisplayData() {
Duration maxReadTime = Duration.standardHours(5);
SerializableFunction<Long, Instant> timestampFn = input -> Instant.now();
PTransform<?, ?> input = GenerateSequence.from(0).to(1234).withMaxReadTime(maxReadTime).withTimestampFn(timestampFn);
DisplayData displayData = DisplayData.from(input);
assertThat(displayData, hasDisplayItem("maxReadTime", maxReadTime));
assertThat(displayData, hasDisplayItem("timestampFn", timestampFn.getClass()));
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project component-runtime by Talend.
the class ProcessorTest method processor.
@Test
public void processor() {
final Processor processor = COMPONENT_FACTORY.createProcessor(SampleProcessor.class, new Object());
final JoinInputFactory joinInputFactory = new JoinInputFactory().withInput("__default__", asList(new SampleProcessor.Sample(1), Json.createObjectBuilder().add("data", 2).build()));
final PCollection<JsonObject> inputs = pipeline.apply(Data.of(processor.plugin(), joinInputFactory.asInputRecords()));
final PCollection<Map<String, JsonObject>> outputs = inputs.apply(TalendFn.asFn(processor)).apply(Data.map(processor.plugin(), JsonObject.class));
PAssert.that(outputs).satisfies((SerializableFunction<Iterable<Map<String, JsonObject>>, Void>) input -> {
final List<Map<String, JsonObject>> result = StreamSupport.stream(input.spliterator(), false).collect(toList());
assertEquals(2, result.size());
result.forEach(e -> assertTrue(e.containsKey("__default__") && e.containsKey("reject")));
assertEquals(new HashSet<>(asList(1, 2)), result.stream().map(e -> e.get("__default__").getInt("data")).collect(toSet()));
return null;
});
assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project component-runtime by Talend.
the class TInProcessorBeamTest method processor.
@Test
@Ignore("You need to complete this test with your own data and assertions")
public void processor() {
// Processor configuration
// Setup your component configuration for the test here
final TInProcessorConfiguration configuration = new TInProcessorConfiguration();
// We create the component processor instance using the configuration filled above
final Processor processor = COMPONENT_FACTORY.createProcessor(TInProcessor.class, configuration);
// The join input factory construct inputs test data for every input branch you have defined for this component
// Make sure to fil in some test data for the branches you want to test
// You can also remove the branches that you don't need from the factory below
final JoinInputFactory joinInputFactory = new JoinInputFactory().withInput("__default__", asList());
// Convert it to a beam "source"
final PCollection<JsonObject> inputs = pipeline.apply(Data.of(processor.plugin(), joinInputFactory.asInputRecords()));
// add our processor right after to see each data as configured previously
final PCollection<Map<String, JsonObject>> outputs = inputs.apply(TalendFn.asFn(processor)).apply(Data.map(processor.plugin(), JsonObject.class));
PAssert.that(outputs).satisfies((SerializableFunction<Iterable<Map<String, JsonObject>>, Void>) input -> {
final List<Map<String, JsonObject>> result = StreamSupport.stream(input.spliterator(), false).collect(toList());
return null;
});
// run the pipeline and ensure the execution was successful
assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
Aggregations