use of org.apache.beam.sdk.io.FileBasedSink.FileResultCoder in project beam by apache.
the class WriteFiles method expand.
@Override
public WriteFilesResult<DestinationT> expand(PCollection<UserT> input) {
if (input.isBounded() == IsBounded.UNBOUNDED) {
checkArgument(getWindowedWrites(), "Must use windowed writes when applying %s to an unbounded PCollection", WriteFiles.class.getSimpleName());
// Check merging window here due to https://issues.apache.org/jira/browse/BEAM-12040.
if (input.getWindowingStrategy().needsMerge()) {
checkArgument(getComputeNumShards() != null || getNumShardsProvider() != null, "When applying %s to an unbounded PCollection with merging windows," + " must specify number of output shards explicitly", WriteFiles.class.getSimpleName());
}
}
this.writeOperation = getSink().createWriteOperation();
if (getWindowedWrites()) {
this.writeOperation.setWindowedWrites();
} else {
// Re-window the data into the global window and remove any existing triggers.
input = input.apply("RewindowIntoGlobal", Window.<UserT>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes());
}
Coder<DestinationT> destinationCoder;
try {
destinationCoder = getDynamicDestinations().getDestinationCoderWithDefault(input.getPipeline().getCoderRegistry());
destinationCoder.verifyDeterministic();
} catch (CannotProvideCoderException | NonDeterministicException e) {
throw new RuntimeException(e);
}
@SuppressWarnings("unchecked") Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) input.getWindowingStrategy().getWindowFn().windowCoder();
FileResultCoder<DestinationT> fileResultCoder = FileResultCoder.of(windowCoder, destinationCoder);
PCollectionView<Integer> numShardsView = (getComputeNumShards() == null) ? null : input.apply(getComputeNumShards());
boolean fixedSharding = getComputeNumShards() != null || getNumShardsProvider() != null;
PCollection<List<FileResult<DestinationT>>> tempFileResults;
if (fixedSharding) {
tempFileResults = input.apply("WriteShardedBundlesToTempFiles", new WriteShardedBundlesToTempFiles(destinationCoder, fileResultCoder, numShardsView)).apply("GatherTempFileResults", new GatherResults<>(fileResultCoder));
} else {
if (input.isBounded() == IsBounded.BOUNDED) {
tempFileResults = input.apply("WriteUnshardedBundlesToTempFiles", new WriteUnshardedBundlesToTempFiles(destinationCoder, fileResultCoder)).apply("GatherTempFileResults", new GatherResults<>(fileResultCoder));
} else {
tempFileResults = input.apply("WriteAutoShardedBundlesToTempFiles", new WriteAutoShardedBundlesToTempFiles(destinationCoder, fileResultCoder));
}
}
return tempFileResults.apply("FinalizeTempFileBundles", new FinalizeTempFileBundles(numShardsView, destinationCoder));
}
Aggregations