use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class WorkerCustomSources method serializeSplitToCloudSource.
/**
* Version of {@link CustomSources#serializeToCloudSource(Source, PipelineOptions)} intended for
* use on splits of {@link BoundedSource}.
*/
private static com.google.api.services.dataflow.model.Source serializeSplitToCloudSource(BoundedSource<?> source) throws Exception {
com.google.api.services.dataflow.model.Source cloudSource = new com.google.api.services.dataflow.model.Source();
cloudSource.setSpec(CloudObject.forClass(CustomSources.class));
addString(cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
SourceMetadata metadata = new SourceMetadata();
// Size estimation is best effort so we continue even if it fails here.
try {
long estimatedSize = source.getEstimatedSizeBytes(PipelineOptionsFactory.create());
if (estimatedSize >= 0) {
metadata.setEstimatedSizeBytes(estimatedSize);
} else {
LOG.warn("Ignoring negative estimated size {} produced by source {}", estimatedSize, source);
}
} catch (Exception e) {
LOG.warn("Size estimation of the source failed: " + source, e);
}
cloudSource.setMetadata(metadata);
return cloudSource;
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class ReadTranslationTest method testToFromProtoBounded.
@Test
public void testToFromProtoBounded() throws Exception {
// TODO: Split into two tests.
assumeThat(source, instanceOf(BoundedSource.class));
BoundedSource<?> boundedSource = (BoundedSource<?>) this.source;
SplittableParDo.PrimitiveBoundedRead<?> boundedRead = new SplittableParDo.PrimitiveBoundedRead<>(Read.from(boundedSource));
ReadPayload payload = ReadTranslation.toProto(boundedRead);
assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.BOUNDED));
BoundedSource<?> deserializedSource = ReadTranslation.boundedSourceFromProto(payload);
assertThat(deserializedSource, equalTo(source));
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class ReadTranslator method translate.
@Override
public void translate(PTransform<PBegin, PCollection<T>> transform, TransformHierarchy.Node node, TranslationContext ctx) {
final PCollection<T> output = ctx.getOutput(transform);
final Coder<WindowedValue<T>> coder = SamzaCoders.of(output);
final Source<?> source = transform instanceof SplittableParDo.PrimitiveBoundedRead ? ((SplittableParDo.PrimitiveBoundedRead) transform).getSource() : ((SplittableParDo.PrimitiveUnboundedRead) transform).getSource();
final String id = ctx.getIdForPValue(output);
// Create system descriptor
final GenericSystemDescriptor systemDescriptor;
if (source instanceof BoundedSource) {
systemDescriptor = new GenericSystemDescriptor(id, BoundedSourceSystem.Factory.class.getName());
} else {
systemDescriptor = new GenericSystemDescriptor(id, UnboundedSourceSystem.Factory.class.getName());
}
final Map<String, String> systemConfig = ImmutableMap.of("source", Base64Serializer.serializeUnchecked(source), "coder", Base64Serializer.serializeUnchecked(coder), "stepName", node.getFullName());
systemDescriptor.withSystemConfigs(systemConfig);
// Create stream descriptor
@SuppressWarnings("unchecked") final Serde<KV<?, OpMessage<T>>> kvSerde = (Serde) KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
final GenericInputDescriptor<KV<?, OpMessage<T>>> inputDescriptor = systemDescriptor.getInputDescriptor(id, kvSerde);
if (source instanceof BoundedSource) {
inputDescriptor.isBounded();
}
ctx.registerInputMessageStream(output, inputDescriptor);
}
use of org.apache.beam.sdk.io.BoundedSource in project component-runtime by Talend.
the class DIPipeline method wrapTransformIfNeeded.
private <PT extends POutput> PTransform<? super PBegin, PT> wrapTransformIfNeeded(final PTransform<? super PBegin, PT> root) {
if (Read.Bounded.class.isInstance(root)) {
final BoundedSource source = Read.Bounded.class.cast(root).getSource();
final DelegatingBoundedSource boundedSource = new DelegatingBoundedSource(source, null);
setState(boundedSource);
return Read.from(boundedSource);
}
if (Read.Unbounded.class.isInstance(root)) {
final UnboundedSource source = Read.Unbounded.class.cast(root).getSource();
if (InMemoryQueueIO.UnboundedQueuedInput.class.isInstance(source)) {
return root;
}
final DelegatingUnBoundedSource unBoundedSource = new DelegatingUnBoundedSource(source, null);
setState(unBoundedSource);
return Read.from(unBoundedSource);
}
return root;
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class XmlSourceTest method testReadXMLInvalidRecordClassWithCustomEventHandler.
@Test
public void testReadXMLInvalidRecordClassWithCustomEventHandler() throws IOException {
File file = tempFolder.newFile("trainXMLSmall");
Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
ValidationEventHandler validationEventHandler = event -> {
throw new RuntimeException("MyCustomValidationEventHandler failure mesage");
};
BoundedSource<WrongTrainType> source = XmlIO.<WrongTrainType>read().from(file.toPath().toString()).withRootElement("trains").withRecordElement("train").withRecordClass(WrongTrainType.class).withValidationEventHandler(validationEventHandler).createSource();
exception.expect(RuntimeException.class);
// JAXB internationalizes the error message. So this is all we can match for.
exception.expectMessage("MyCustomValidationEventHandler failure mesage");
try (Reader<WrongTrainType> reader = source.createReader(null)) {
for (boolean available = reader.start(); available; available = reader.advance()) {
reader.getCurrent();
}
}
}
Aggregations