Search in sources :

Example 36 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class JavaClassLookupTransformProvider method getMethod.

private Method getMethod(PTransform<PInput, POutput> transform, BuilderMethod builderMethod, AllowedClass allowListClass) {
    Row builderMethodRow = decodeRow(builderMethod.getSchema(), builderMethod.getPayload());
    List<Method> matchingMethods = Arrays.stream(transform.getClass().getMethods()).filter(m -> isBuilderMethodForName(m, builderMethod.getName(), allowListClass)).filter(m -> parametersCompatible(m.getParameters(), builderMethodRow)).filter(m -> PTransform.class.isAssignableFrom(m.getReturnType())).collect(Collectors.toList());
    if (matchingMethods.size() == 0) {
        throw new RuntimeException("Could not find a matching method in transform " + transform + " for BuilderMethod" + builderMethod + ". When using field names, make sure they are available in the compiled" + " Java class.");
    } else if (matchingMethods.size() > 1) {
        throw new RuntimeException("Expected to find exactly one matching method in transform " + transform + " for BuilderMethod" + builderMethod + " but found " + matchingMethods.size());
    }
    return matchingMethods.get(0);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) Arrays(java.util.Arrays) Array(java.lang.reflect.Array) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) SchemaApi(org.apache.beam.model.pipeline.v1.SchemaApi) RowCoder(org.apache.beam.sdk.coders.RowCoder) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Constructor(java.lang.reflect.Constructor) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JavaFieldSchema(org.apache.beam.sdk.schemas.JavaFieldSchema) PInput(org.apache.beam.sdk.values.PInput) Row(org.apache.beam.sdk.values.Row) Method(java.lang.reflect.Method) Nullable(org.checkerframework.checker.nullness.qual.Nullable) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Field(org.apache.beam.sdk.schemas.Schema.Field) TransformProvider(org.apache.beam.sdk.expansion.service.ExpansionService.TransformProvider) Collection(java.util.Collection) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Schema(org.apache.beam.sdk.schemas.Schema) TypeName(org.apache.beam.sdk.schemas.Schema.TypeName) JavaClassLookupPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.JavaClassLookupPayload) InvocationTargetException(java.lang.reflect.InvocationTargetException) ExpansionMethods(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExpansionMethods) ClassUtils(org.apache.beam.repackaged.core.org.apache.commons.lang3.ClassUtils) POutput(org.apache.beam.sdk.values.POutput) List(java.util.List) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) AutoValue(com.google.auto.value.AutoValue) Annotation(java.lang.annotation.Annotation) Pattern(java.util.regex.Pattern) SchemaTranslation(org.apache.beam.sdk.schemas.SchemaTranslation) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) Row(org.apache.beam.sdk.values.Row) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) Method(java.lang.reflect.Method)

Example 37 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class JavaClassLookupTransformProvider method getTransform.

@Override
public PTransform<PInput, POutput> getTransform(FunctionSpec spec) {
    JavaClassLookupPayload payload;
    try {
        payload = JavaClassLookupPayload.parseFrom(spec.getPayload());
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException("Invalid payload type for URN " + getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP), e);
    }
    String className = payload.getClassName();
    try {
        AllowedClass allowlistClass = allowList.getAllowedClass(className);
        Class<PTransform<InputT, OutputT>> transformClass = (Class<PTransform<InputT, OutputT>>) ReflectHelpers.findClassLoader().loadClass(className);
        PTransform<PInput, POutput> transform;
        Row constructorRow = decodeRow(payload.getConstructorSchema(), payload.getConstructorPayload());
        if (payload.getConstructorMethod().isEmpty()) {
            Constructor<?>[] constructors = transformClass.getConstructors();
            Constructor<PTransform<InputT, OutputT>> constructor = findMappingConstructor(constructors, payload);
            Object[] parameterValues = getParameterValues(constructor.getParameters(), constructorRow, constructor.getGenericParameterTypes());
            transform = (PTransform<PInput, POutput>) constructor.newInstance(parameterValues);
        } else {
            Method[] methods = transformClass.getMethods();
            Method method = findMappingConstructorMethod(methods, payload, allowlistClass);
            Object[] parameterValues = getParameterValues(method.getParameters(), constructorRow, method.getGenericParameterTypes());
            transform = (PTransform<PInput, POutput>) method.invoke(null, /* static */
            parameterValues);
        }
        return applyBuilderMethods(transform, payload, allowlistClass);
    } catch (ClassNotFoundException e) {
        throw new IllegalArgumentException("Could not find class " + className, e);
    } catch (InstantiationException | IllegalArgumentException | IllegalAccessException | InvocationTargetException e) {
        throw new IllegalArgumentException("Could not instantiate class " + className, e);
    }
}
Also used : JavaClassLookupPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.JavaClassLookupPayload) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) POutput(org.apache.beam.sdk.values.POutput) PTransform(org.apache.beam.sdk.transforms.PTransform) PInput(org.apache.beam.sdk.values.PInput) Constructor(java.lang.reflect.Constructor) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) Row(org.apache.beam.sdk.values.Row)

Example 38 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project component-runtime by Talend.

the class BeamExecutor method run.

@Override
public void run() {
    try {
        final Map<String, Mapper> mappers = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(Job.Component::isSource).collect(toMap(Job.Component::getId, e -> delegate.getManager().findMapper(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No mapper found for: " + e.getNode()))));
        final Map<String, Processor> processors = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(component -> !component.isSource()).collect(toMap(Job.Component::getId, e -> delegate.getManager().findProcessor(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No processor found for:" + e.getNode()))));
        final Pipeline pipeline = Pipeline.create(createPipelineOptions());
        final Map<String, PCollection<JsonObject>> pCollections = new HashMap<>();
        delegate.getLevels().values().stream().flatMap(Collection::stream).forEach(component -> {
            if (component.isSource()) {
                final Mapper mapper = mappers.get(component.getId());
                pCollections.put(component.getId(), pipeline.apply(toName("TalendIO", component), TalendIO.read(mapper)).apply(toName("RecordNormalizer", component), RecordNormalizer.of(mapper.plugin())));
            } else {
                final Processor processor = processors.get(component.getId());
                final List<Job.Edge> joins = getEdges(delegate.getEdges(), component, e -> e.getTo().getNode());
                final Map<String, PCollection<KV<String, JsonObject>>> inputs = joins.stream().collect(toMap(e -> e.getTo().getBranch(), e -> {
                    final PCollection<JsonObject> pc = pCollections.get(e.getFrom().getNode().getId());
                    final PCollection<JsonObject> filteredInput = pc.apply(toName("RecordBranchFilter", component, e), RecordBranchFilter.of(processor.plugin(), e.getFrom().getBranch()));
                    final PCollection<JsonObject> mappedInput;
                    if (e.getFrom().getBranch().equals(e.getTo().getBranch())) {
                        mappedInput = filteredInput;
                    } else {
                        mappedInput = filteredInput.apply(toName("RecordBranchMapper", component, e), RecordBranchMapper.of(processor.plugin(), e.getFrom().getBranch(), e.getTo().getBranch()));
                    }
                    return mappedInput.apply(toName("RecordBranchUnwrapper", component, e), RecordBranchUnwrapper.of(processor.plugin(), e.getTo().getBranch())).apply(toName("AutoKVWrapper", component, e), AutoKVWrapper.of(processor.plugin(), delegate.getKeyProvider(component.getId()), component.getId(), e.getFrom().getBranch()));
                }));
                KeyedPCollectionTuple<String> join = null;
                for (final Map.Entry<String, PCollection<KV<String, JsonObject>>> entry : inputs.entrySet()) {
                    final TupleTag<JsonObject> branch = new TupleTag<>(entry.getKey());
                    join = join == null ? KeyedPCollectionTuple.of(branch, entry.getValue()) : join.and(branch, entry.getValue());
                }
                final PCollection<JsonObject> preparedInput = join.apply(toName("CoGroupByKey", component), CoGroupByKey.create()).apply(toName("CoGroupByKeyResultMappingTransform", component), new CoGroupByKeyResultMappingTransform<>(processor.plugin(), true));
                if (getEdges(delegate.getEdges(), component, e -> e.getFrom().getNode()).isEmpty()) {
                    final PTransform<PCollection<JsonObject>, PDone> write = TalendIO.write(processor);
                    preparedInput.apply(toName("Output", component), write);
                } else {
                    final PTransform<PCollection<JsonObject>, PCollection<JsonObject>> process = TalendFn.asFn(processor);
                    pCollections.put(component.getId(), preparedInput.apply(toName("Processor", component), process));
                }
            }
        });
        final PipelineResult result = pipeline.run();
        // the wait until finish don't wait for the job to complete on the direct runner
        result.waitUntilFinish();
        while (PipelineResult.State.RUNNING.equals(result.getState())) {
            try {
                Thread.sleep(100L);
            } catch (final InterruptedException e) {
                throw new IllegalStateException("the job was aborted", e);
            }
        }
    } finally {
        delegate.getLevels().values().stream().flatMap(Collection::stream).map(Job.Component::getId).forEach(JobImpl.LocalSequenceHolder::clean);
    }
}
Also used : TalendIO(org.talend.sdk.component.runtime.beam.TalendIO) KV(org.apache.beam.sdk.values.KV) PipelineResult(org.apache.beam.sdk.PipelineResult) RecordBranchFilter(org.talend.sdk.component.runtime.beam.transform.RecordBranchFilter) HashMap(java.util.HashMap) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Function(java.util.function.Function) PTransform(org.apache.beam.sdk.transforms.PTransform) RecordBranchMapper(org.talend.sdk.component.runtime.beam.transform.RecordBranchMapper) Collectors.toMap(java.util.stream.Collectors.toMap) TupleTag(org.apache.beam.sdk.values.TupleTag) Map(java.util.Map) RecordNormalizer(org.talend.sdk.component.runtime.beam.transform.RecordNormalizer) Pipeline(org.apache.beam.sdk.Pipeline) KeyedPCollectionTuple(org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) JsonObject(javax.json.JsonObject) PDone(org.apache.beam.sdk.values.PDone) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) Processor(org.talend.sdk.component.runtime.output.Processor) RecordBranchUnwrapper(org.talend.sdk.component.runtime.beam.transform.RecordBranchUnwrapper) CoGroupByKey(org.apache.beam.sdk.transforms.join.CoGroupByKey) AutoKVWrapper(org.talend.sdk.component.runtime.beam.transform.AutoKVWrapper) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Mapper(org.talend.sdk.component.runtime.input.Mapper) CoGroupByKeyResultMappingTransform(org.talend.sdk.component.runtime.beam.transform.CoGroupByKeyResultMappingTransform) Job(org.talend.sdk.component.runtime.manager.chain.Job) JobImpl(org.talend.sdk.component.runtime.manager.chain.internal.JobImpl) AllArgsConstructor(lombok.AllArgsConstructor) TalendFn(org.talend.sdk.component.runtime.beam.TalendFn) Processor(org.talend.sdk.component.runtime.output.Processor) HashMap(java.util.HashMap) JsonObject(javax.json.JsonObject) TupleTag(org.apache.beam.sdk.values.TupleTag) RecordBranchMapper(org.talend.sdk.component.runtime.beam.transform.RecordBranchMapper) Mapper(org.talend.sdk.component.runtime.input.Mapper) Job(org.talend.sdk.component.runtime.manager.chain.Job) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) PDone(org.apache.beam.sdk.values.PDone) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) HashMap(java.util.HashMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map)

Example 39 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project component-runtime by Talend.

the class BeamIOWrappingTest method processor.

@Test
public void processor() {
    MySink.DATA.clear();
    final Object source = newComponent("beamio_output", ComponentManager.ComponentType.PROCESSOR);
    final Processor processor = new BeamProcessorChainImpl((PTransform<PCollection<?>, ?>) source, null, getPlugin(), "test", "beamio_output");
    processor.start();
    processor.beforeGroup();
    Stream.of("tsrif", "dnoces").forEach(data -> processor.onNext(name -> {
        assertEquals(Branches.DEFAULT_BRANCH, name);
        return data;
    }, null));
    processor.afterGroup(name -> {
        assertEquals(Branches.DEFAULT_BRANCH, name);
        return value -> MySink.DATA.add(value.toString());
    });
    processor.stop();
    assertEquals(asList("setup", "start-bundle", "first", "second", "finish-out", "finish-bundle", "teardown"), MySink.DATA);
    MySink.DATA.clear();
}
Also used : PartitionMapper(org.talend.sdk.component.api.input.PartitionMapper) PBegin(org.apache.beam.sdk.values.PBegin) PipelineResult(org.apache.beam.sdk.PipelineResult) RequiredArgsConstructor(lombok.RequiredArgsConstructor) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) Assert.assertThat(org.junit.Assert.assertThat) Sample(org.talend.sdk.component.runtime.beam.data.Sample) Create(org.apache.beam.sdk.transforms.Create) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Assert.fail(org.junit.Assert.fail) ClassRule(org.junit.ClassRule) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Processor(org.talend.sdk.component.runtime.output.Processor) Collectors.joining(java.util.stream.Collectors.joining) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) Branches(org.talend.sdk.component.runtime.output.Branches) Stream(java.util.stream.Stream) ParDo(org.apache.beam.sdk.transforms.ParDo) Optional(java.util.Optional) BeamMapperImpl(org.talend.sdk.component.runtime.beam.impl.BeamMapperImpl) BeamProcessorChainImpl(org.talend.sdk.component.runtime.beam.impl.BeamProcessorChainImpl) Getter(lombok.Getter) CapturingPipeline(org.talend.sdk.component.runtime.beam.impl.CapturingPipeline) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) Option(org.talend.sdk.component.api.configuration.Option) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) FileBasedSink(org.apache.beam.sdk.io.FileBasedSink) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DelegatingTransform(org.talend.sdk.component.runtime.beam.transform.DelegatingTransform) Input(org.talend.sdk.component.runtime.input.Input) OutputStream(java.io.OutputStream) DoFn(org.apache.beam.sdk.transforms.DoFn) PDone(org.apache.beam.sdk.values.PDone) Files(java.nio.file.Files) PAssert(org.apache.beam.sdk.testing.PAssert) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) InputStreamReader(java.io.InputStreamReader) File(java.io.File) Collectors.toList(java.util.stream.Collectors.toList) Mapper(org.talend.sdk.component.runtime.input.Mapper) Assert.assertNull(org.junit.Assert.assertNull) Rule(org.junit.Rule) Instant(org.joda.time.Instant) JarLocation.jarLocation(org.apache.ziplock.JarLocation.jarLocation) BufferedReader(java.io.BufferedReader) ComponentManager(org.talend.sdk.component.runtime.manager.ComponentManager) Assert.assertEquals(org.junit.Assert.assertEquals) TextIO(org.apache.beam.sdk.io.TextIO) TemporaryFolder(org.junit.rules.TemporaryFolder) InputStream(java.io.InputStream) PCollection(org.apache.beam.sdk.values.PCollection) Processor(org.talend.sdk.component.runtime.output.Processor) BeamProcessorChainImpl(org.talend.sdk.component.runtime.beam.impl.BeamProcessorChainImpl) Test(org.junit.Test)

Example 40 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project component-runtime by Talend.

the class BeamIOWrappingTest method outputChain.

@Test
public void outputChain() {
    MySink.DATA.clear();
    final Object source = newComponent("beamio_output_chain", ComponentManager.ComponentType.PROCESSOR);
    final Processor processor = new BeamProcessorChainImpl((PTransform<PCollection<?>, PDone>) source, null, getPlugin(), "test", "beamio_output");
    processor.start();
    processor.beforeGroup();
    Stream.of("tsrif", "dnoces").forEach(data -> processor.onNext(name -> {
        assertEquals(Branches.DEFAULT_BRANCH, name);
        return new Sample(data);
    }, name -> value -> MySink.DATA.add(value.toString())));
    processor.afterGroup(name -> {
        assertEquals(Branches.DEFAULT_BRANCH, name);
        return value -> MySink.DATA.add(value.toString());
    });
    processor.stop();
    assertEquals(asList("setup", "start-bundle", "first", "second", "finish-out", "finish-bundle", "teardown"), MySink.DATA);
    MySink.DATA.clear();
}
Also used : PartitionMapper(org.talend.sdk.component.api.input.PartitionMapper) PBegin(org.apache.beam.sdk.values.PBegin) PipelineResult(org.apache.beam.sdk.PipelineResult) RequiredArgsConstructor(lombok.RequiredArgsConstructor) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) Assert.assertThat(org.junit.Assert.assertThat) Sample(org.talend.sdk.component.runtime.beam.data.Sample) Create(org.apache.beam.sdk.transforms.Create) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Assert.fail(org.junit.Assert.fail) ClassRule(org.junit.ClassRule) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Processor(org.talend.sdk.component.runtime.output.Processor) Collectors.joining(java.util.stream.Collectors.joining) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) Branches(org.talend.sdk.component.runtime.output.Branches) Stream(java.util.stream.Stream) ParDo(org.apache.beam.sdk.transforms.ParDo) Optional(java.util.Optional) BeamMapperImpl(org.talend.sdk.component.runtime.beam.impl.BeamMapperImpl) BeamProcessorChainImpl(org.talend.sdk.component.runtime.beam.impl.BeamProcessorChainImpl) Getter(lombok.Getter) CapturingPipeline(org.talend.sdk.component.runtime.beam.impl.CapturingPipeline) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) Option(org.talend.sdk.component.api.configuration.Option) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) FileBasedSink(org.apache.beam.sdk.io.FileBasedSink) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DelegatingTransform(org.talend.sdk.component.runtime.beam.transform.DelegatingTransform) Input(org.talend.sdk.component.runtime.input.Input) OutputStream(java.io.OutputStream) DoFn(org.apache.beam.sdk.transforms.DoFn) PDone(org.apache.beam.sdk.values.PDone) Files(java.nio.file.Files) PAssert(org.apache.beam.sdk.testing.PAssert) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) InputStreamReader(java.io.InputStreamReader) File(java.io.File) Collectors.toList(java.util.stream.Collectors.toList) Mapper(org.talend.sdk.component.runtime.input.Mapper) Assert.assertNull(org.junit.Assert.assertNull) Rule(org.junit.Rule) Instant(org.joda.time.Instant) JarLocation.jarLocation(org.apache.ziplock.JarLocation.jarLocation) BufferedReader(java.io.BufferedReader) ComponentManager(org.talend.sdk.component.runtime.manager.ComponentManager) Assert.assertEquals(org.junit.Assert.assertEquals) TextIO(org.apache.beam.sdk.io.TextIO) TemporaryFolder(org.junit.rules.TemporaryFolder) InputStream(java.io.InputStream) PCollection(org.apache.beam.sdk.values.PCollection) Processor(org.talend.sdk.component.runtime.output.Processor) BeamProcessorChainImpl(org.talend.sdk.component.runtime.beam.impl.BeamProcessorChainImpl) PDone(org.apache.beam.sdk.values.PDone) Sample(org.talend.sdk.component.runtime.beam.data.Sample) Test(org.junit.Test)

Aggregations

PTransform (org.apache.beam.sdk.transforms.PTransform)41 PCollection (org.apache.beam.sdk.values.PCollection)29 Test (org.junit.Test)18 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)11 PBegin (org.apache.beam.sdk.values.PBegin)11 IOException (java.io.IOException)10 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Map (java.util.Map)10 TupleTag (org.apache.beam.sdk.values.TupleTag)10 DoFn (org.apache.beam.sdk.transforms.DoFn)9 Coder (org.apache.beam.sdk.coders.Coder)8 Create (org.apache.beam.sdk.transforms.Create)8 ParDo (org.apache.beam.sdk.transforms.ParDo)7 PDone (org.apache.beam.sdk.values.PDone)7 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)6 Collection (java.util.Collection)5 HashMap (java.util.HashMap)5 Collectors.toList (java.util.stream.Collectors.toList)5 Schema (org.apache.beam.sdk.schemas.Schema)5