use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class JavaClassLookupTransformProvider method getMethod.
private Method getMethod(PTransform<PInput, POutput> transform, BuilderMethod builderMethod, AllowedClass allowListClass) {
Row builderMethodRow = decodeRow(builderMethod.getSchema(), builderMethod.getPayload());
List<Method> matchingMethods = Arrays.stream(transform.getClass().getMethods()).filter(m -> isBuilderMethodForName(m, builderMethod.getName(), allowListClass)).filter(m -> parametersCompatible(m.getParameters(), builderMethodRow)).filter(m -> PTransform.class.isAssignableFrom(m.getReturnType())).collect(Collectors.toList());
if (matchingMethods.size() == 0) {
throw new RuntimeException("Could not find a matching method in transform " + transform + " for BuilderMethod" + builderMethod + ". When using field names, make sure they are available in the compiled" + " Java class.");
} else if (matchingMethods.size() > 1) {
throw new RuntimeException("Expected to find exactly one matching method in transform " + transform + " for BuilderMethod" + builderMethod + " but found " + matchingMethods.size());
}
return matchingMethods.get(0);
}
use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class JavaClassLookupTransformProvider method getTransform.
@Override
public PTransform<PInput, POutput> getTransform(FunctionSpec spec) {
JavaClassLookupPayload payload;
try {
payload = JavaClassLookupPayload.parseFrom(spec.getPayload());
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException("Invalid payload type for URN " + getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP), e);
}
String className = payload.getClassName();
try {
AllowedClass allowlistClass = allowList.getAllowedClass(className);
Class<PTransform<InputT, OutputT>> transformClass = (Class<PTransform<InputT, OutputT>>) ReflectHelpers.findClassLoader().loadClass(className);
PTransform<PInput, POutput> transform;
Row constructorRow = decodeRow(payload.getConstructorSchema(), payload.getConstructorPayload());
if (payload.getConstructorMethod().isEmpty()) {
Constructor<?>[] constructors = transformClass.getConstructors();
Constructor<PTransform<InputT, OutputT>> constructor = findMappingConstructor(constructors, payload);
Object[] parameterValues = getParameterValues(constructor.getParameters(), constructorRow, constructor.getGenericParameterTypes());
transform = (PTransform<PInput, POutput>) constructor.newInstance(parameterValues);
} else {
Method[] methods = transformClass.getMethods();
Method method = findMappingConstructorMethod(methods, payload, allowlistClass);
Object[] parameterValues = getParameterValues(method.getParameters(), constructorRow, method.getGenericParameterTypes());
transform = (PTransform<PInput, POutput>) method.invoke(null, /* static */
parameterValues);
}
return applyBuilderMethods(transform, payload, allowlistClass);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Could not find class " + className, e);
} catch (InstantiationException | IllegalArgumentException | IllegalAccessException | InvocationTargetException e) {
throw new IllegalArgumentException("Could not instantiate class " + className, e);
}
}
use of org.apache.beam.sdk.transforms.PTransform in project component-runtime by Talend.
the class BeamExecutor method run.
@Override
public void run() {
try {
final Map<String, Mapper> mappers = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(Job.Component::isSource).collect(toMap(Job.Component::getId, e -> delegate.getManager().findMapper(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No mapper found for: " + e.getNode()))));
final Map<String, Processor> processors = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(component -> !component.isSource()).collect(toMap(Job.Component::getId, e -> delegate.getManager().findProcessor(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No processor found for:" + e.getNode()))));
final Pipeline pipeline = Pipeline.create(createPipelineOptions());
final Map<String, PCollection<JsonObject>> pCollections = new HashMap<>();
delegate.getLevels().values().stream().flatMap(Collection::stream).forEach(component -> {
if (component.isSource()) {
final Mapper mapper = mappers.get(component.getId());
pCollections.put(component.getId(), pipeline.apply(toName("TalendIO", component), TalendIO.read(mapper)).apply(toName("RecordNormalizer", component), RecordNormalizer.of(mapper.plugin())));
} else {
final Processor processor = processors.get(component.getId());
final List<Job.Edge> joins = getEdges(delegate.getEdges(), component, e -> e.getTo().getNode());
final Map<String, PCollection<KV<String, JsonObject>>> inputs = joins.stream().collect(toMap(e -> e.getTo().getBranch(), e -> {
final PCollection<JsonObject> pc = pCollections.get(e.getFrom().getNode().getId());
final PCollection<JsonObject> filteredInput = pc.apply(toName("RecordBranchFilter", component, e), RecordBranchFilter.of(processor.plugin(), e.getFrom().getBranch()));
final PCollection<JsonObject> mappedInput;
if (e.getFrom().getBranch().equals(e.getTo().getBranch())) {
mappedInput = filteredInput;
} else {
mappedInput = filteredInput.apply(toName("RecordBranchMapper", component, e), RecordBranchMapper.of(processor.plugin(), e.getFrom().getBranch(), e.getTo().getBranch()));
}
return mappedInput.apply(toName("RecordBranchUnwrapper", component, e), RecordBranchUnwrapper.of(processor.plugin(), e.getTo().getBranch())).apply(toName("AutoKVWrapper", component, e), AutoKVWrapper.of(processor.plugin(), delegate.getKeyProvider(component.getId()), component.getId(), e.getFrom().getBranch()));
}));
KeyedPCollectionTuple<String> join = null;
for (final Map.Entry<String, PCollection<KV<String, JsonObject>>> entry : inputs.entrySet()) {
final TupleTag<JsonObject> branch = new TupleTag<>(entry.getKey());
join = join == null ? KeyedPCollectionTuple.of(branch, entry.getValue()) : join.and(branch, entry.getValue());
}
final PCollection<JsonObject> preparedInput = join.apply(toName("CoGroupByKey", component), CoGroupByKey.create()).apply(toName("CoGroupByKeyResultMappingTransform", component), new CoGroupByKeyResultMappingTransform<>(processor.plugin(), true));
if (getEdges(delegate.getEdges(), component, e -> e.getFrom().getNode()).isEmpty()) {
final PTransform<PCollection<JsonObject>, PDone> write = TalendIO.write(processor);
preparedInput.apply(toName("Output", component), write);
} else {
final PTransform<PCollection<JsonObject>, PCollection<JsonObject>> process = TalendFn.asFn(processor);
pCollections.put(component.getId(), preparedInput.apply(toName("Processor", component), process));
}
}
});
final PipelineResult result = pipeline.run();
// the wait until finish don't wait for the job to complete on the direct runner
result.waitUntilFinish();
while (PipelineResult.State.RUNNING.equals(result.getState())) {
try {
Thread.sleep(100L);
} catch (final InterruptedException e) {
throw new IllegalStateException("the job was aborted", e);
}
}
} finally {
delegate.getLevels().values().stream().flatMap(Collection::stream).map(Job.Component::getId).forEach(JobImpl.LocalSequenceHolder::clean);
}
}
use of org.apache.beam.sdk.transforms.PTransform in project component-runtime by Talend.
the class BeamIOWrappingTest method processor.
@Test
public void processor() {
MySink.DATA.clear();
final Object source = newComponent("beamio_output", ComponentManager.ComponentType.PROCESSOR);
final Processor processor = new BeamProcessorChainImpl((PTransform<PCollection<?>, ?>) source, null, getPlugin(), "test", "beamio_output");
processor.start();
processor.beforeGroup();
Stream.of("tsrif", "dnoces").forEach(data -> processor.onNext(name -> {
assertEquals(Branches.DEFAULT_BRANCH, name);
return data;
}, null));
processor.afterGroup(name -> {
assertEquals(Branches.DEFAULT_BRANCH, name);
return value -> MySink.DATA.add(value.toString());
});
processor.stop();
assertEquals(asList("setup", "start-bundle", "first", "second", "finish-out", "finish-bundle", "teardown"), MySink.DATA);
MySink.DATA.clear();
}
use of org.apache.beam.sdk.transforms.PTransform in project component-runtime by Talend.
the class BeamIOWrappingTest method outputChain.
@Test
public void outputChain() {
MySink.DATA.clear();
final Object source = newComponent("beamio_output_chain", ComponentManager.ComponentType.PROCESSOR);
final Processor processor = new BeamProcessorChainImpl((PTransform<PCollection<?>, PDone>) source, null, getPlugin(), "test", "beamio_output");
processor.start();
processor.beforeGroup();
Stream.of("tsrif", "dnoces").forEach(data -> processor.onNext(name -> {
assertEquals(Branches.DEFAULT_BRANCH, name);
return new Sample(data);
}, name -> value -> MySink.DATA.add(value.toString())));
processor.afterGroup(name -> {
assertEquals(Branches.DEFAULT_BRANCH, name);
return value -> MySink.DATA.add(value.toString());
});
processor.stop();
assertEquals(asList("setup", "start-bundle", "first", "second", "finish-out", "finish-bundle", "teardown"), MySink.DATA);
MySink.DATA.clear();
}
Aggregations