use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class BoundedDataset method cache.
@Override
@SuppressWarnings("unchecked")
public void cache(String storageLevel, Coder<?> coder) {
StorageLevel level = StorageLevel.fromString(storageLevel);
if (TranslationUtils.canAvoidRddSerialization(level)) {
// if it is memory only reduce the overhead of moving to bytes
this.rdd = getRDD().persist(level);
} else {
// Caching can cause Serialization, we need to code to bytes
// more details in https://issues.apache.org/jira/browse/BEAM-2669
Coder<WindowedValue<T>> windowedValueCoder = (Coder<WindowedValue<T>>) coder;
this.rdd = getRDD().map(v -> ValueAndCoderLazySerializable.of(v, windowedValueCoder)).persist(level).map(v -> v.getOrDecode(windowedValueCoder));
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DoFnFunction method prepareSerialization.
/**
* prepares the DoFnFunction class so it can be serialized properly. This involves using various
* protobuf's and byte arrays which are later converted back into the proper classes during
* deserialization.
*/
private void prepareSerialization() {
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipelineOptions.as(PortablePipelineOptions.class)));
this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
doFnwithEx = ParDoTranslation.translateDoFn(this.doFn, mainOutput, sideInputMapping, doFnSchemaInformation, components);
doFnwithExBytes = doFnwithEx.getPayload().toByteArray();
outputCodersBytes = new HashMap<>();
try {
coderBytes = SerializableUtils.serializeToByteArray(inputCoder);
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
windowBytes = windowStrategyProto.toByteArray();
for (Map.Entry<TupleTag<?>, Coder<?>> entry : outputCoders.entrySet()) {
outputCodersBytes.put(entry.getKey().getId(), SerializableUtils.serializeToByteArray(entry.getValue()));
}
sideInputBytes = new HashMap<>();
for (Map.Entry<TupleTag<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(entry.getValue(), components);
sideInputBytes.put(entry.getKey().getId(), windowStrategyProto.toByteArray());
}
serializedSideOutputs = new ArrayList<>();
for (TupleTag<?> sideOutput : sideOutputs) {
serializedSideOutputs.add(sideOutput.getId());
}
serializedOutputMap = new HashMap<>();
for (Map.Entry<TupleTag<?>, Integer> entry : outputMap.entrySet()) {
serializedOutputMap.put(entry.getKey().getId(), entry.getValue());
}
} catch (IOException e) {
LOG.info(e.getMessage());
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class Twister2SideInputReader method getMultimapSideInput.
private <T> T getMultimapSideInput(PCollectionView<T> view, BoundedWindow window) {
Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = getPartitionedElements(view);
Map<BoundedWindow, T> resultMap = new HashMap<>();
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
T result = resultMap.get(window);
if (result == null) {
result = viewFn.apply(InMemoryMultimapSideInputView.empty());
}
return result;
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class WriteFiles method expand.
@Override
public WriteFilesResult<DestinationT> expand(PCollection<UserT> input) {
if (input.isBounded() == IsBounded.UNBOUNDED) {
checkArgument(getWindowedWrites(), "Must use windowed writes when applying %s to an unbounded PCollection", WriteFiles.class.getSimpleName());
// Check merging window here due to https://issues.apache.org/jira/browse/BEAM-12040.
if (input.getWindowingStrategy().needsMerge()) {
checkArgument(getComputeNumShards() != null || getNumShardsProvider() != null, "When applying %s to an unbounded PCollection with merging windows," + " must specify number of output shards explicitly", WriteFiles.class.getSimpleName());
}
}
this.writeOperation = getSink().createWriteOperation();
if (getWindowedWrites()) {
this.writeOperation.setWindowedWrites();
} else {
// Re-window the data into the global window and remove any existing triggers.
input = input.apply("RewindowIntoGlobal", Window.<UserT>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes());
}
Coder<DestinationT> destinationCoder;
try {
destinationCoder = getDynamicDestinations().getDestinationCoderWithDefault(input.getPipeline().getCoderRegistry());
destinationCoder.verifyDeterministic();
} catch (CannotProvideCoderException | NonDeterministicException e) {
throw new RuntimeException(e);
}
@SuppressWarnings("unchecked") Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) input.getWindowingStrategy().getWindowFn().windowCoder();
FileResultCoder<DestinationT> fileResultCoder = FileResultCoder.of(windowCoder, destinationCoder);
PCollectionView<Integer> numShardsView = (getComputeNumShards() == null) ? null : input.apply(getComputeNumShards());
boolean fixedSharding = getComputeNumShards() != null || getNumShardsProvider() != null;
PCollection<List<FileResult<DestinationT>>> tempFileResults;
if (fixedSharding) {
tempFileResults = input.apply("WriteShardedBundlesToTempFiles", new WriteShardedBundlesToTempFiles(destinationCoder, fileResultCoder, numShardsView)).apply("GatherTempFileResults", new GatherResults<>(fileResultCoder));
} else {
if (input.isBounded() == IsBounded.BOUNDED) {
tempFileResults = input.apply("WriteUnshardedBundlesToTempFiles", new WriteUnshardedBundlesToTempFiles(destinationCoder, fileResultCoder)).apply("GatherTempFileResults", new GatherResults<>(fileResultCoder));
} else {
tempFileResults = input.apply("WriteAutoShardedBundlesToTempFiles", new WriteAutoShardedBundlesToTempFiles(destinationCoder, fileResultCoder));
}
}
return tempFileResults.apply("FinalizeTempFileBundles", new FinalizeTempFileBundles(numShardsView, destinationCoder));
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class ParDo method codersForStateSpecTypes.
/**
* Try to provide coders for as many of the type arguments of given {@link
* DoFnSignature.StateDeclaration} as possible.
*/
private static <InputT> Coder[] codersForStateSpecTypes(DoFnSignature.StateDeclaration stateDeclaration, CoderRegistry coderRegistry, Coder<InputT> inputCoder) {
Type stateType = stateDeclaration.stateType().getType();
if (!(stateType instanceof ParameterizedType)) {
// No type arguments means no coders to infer.
return new Coder[0];
}
Type[] typeArguments = ((ParameterizedType) stateType).getActualTypeArguments();
Coder[] coders = new Coder[typeArguments.length];
for (int i = 0; i < typeArguments.length; i++) {
Type typeArgument = typeArguments[i];
TypeDescriptor<?> typeDescriptor = TypeDescriptor.of(typeArgument);
try {
coders[i] = coderRegistry.getCoder(typeDescriptor);
} catch (CannotProvideCoderException e) {
try {
coders[i] = coderRegistry.getCoder(typeDescriptor, inputCoder.getEncodedTypeDescriptor(), inputCoder);
} catch (CannotProvideCoderException ignored) {
// Since not all type arguments will have a registered coder we ignore this exception.
}
}
}
return coders;
}
Aggregations