use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder in project beam by apache.
the class IsmSinkFactory method create.
@Override
public Sink<?> create(CloudObject spec, @Nullable Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
options = checkArgumentNotNull(options);
coder = checkArgumentNotNull(coder);
// The validity of this coder is checked in detail by the typed create, below
@SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder;
String filename = getString(spec, WorkerPropertyNames.FILENAME);
checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder);
WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
@SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() * // Note the conversion from MiB to bytes
1024 * 1024, RoundingMode.DOWN));
return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder in project beam by apache.
the class IsmReaderFactory method createImpl.
<V> NativeReader<?> createImpl(CloudObject spec, Coder<?> coder, PipelineOptions options, DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
final ResourceId resourceId = FileSystems.matchNewResource(getString(spec, WorkerPropertyNames.FILENAME), false);
checkArgument(coder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmReader.class, WindowedValueCoder.class, coder);
@SuppressWarnings("unchecked") WindowedValueCoder<IsmRecord<V>> windowedCoder = (WindowedValueCoder<IsmRecord<V>>) coder;
checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmReader.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
@SuppressWarnings("unchecked") final IsmRecordCoder<V> ismCoder = (IsmRecordCoder<V>) windowedCoder.getValueCoder();
checkArgument(executionContext instanceof BatchModeExecutionContext, "%s only supports using %s but got %s.", IsmReader.class, BatchModeExecutionContext.class, executionContext);
final BatchModeExecutionContext execContext = (BatchModeExecutionContext) executionContext;
// the same file.
return execContext.<IsmReaderKey, NativeReader<?>>getLogicalReferenceCache().get(new IsmReaderKey(resourceId.toString()), () -> new IsmReaderImpl<V>(resourceId, ismCoder, execContext.<IsmReaderImpl.IsmShardKey, WeightedValue<NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>>>getDataCache()));
}
use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder in project beam by apache.
the class CreateIsmShardKeyAndSortKeyDoFnFactoryTest method testConversionOfRecord.
@Test
public void testConversionOfRecord() throws Exception {
ParDoFn parDoFn = new CreateIsmShardKeyAndSortKeyDoFnFactory().create(null, /* pipeline options */
CloudObject.fromSpec(ImmutableMap.of(PropertyNames.OBJECT_TYPE_NAME, "CreateIsmShardKeyAndSortKeyDoFn", PropertyNames.ENCODING, createIsmRecordEncoding())), null, /* side input infos */
null, /* main output tag */
null, /* output tag to receiver index */
null, /* exection context */
null);
List<Object> outputReceiver = new ArrayList<>();
parDoFn.startBundle(outputReceiver::add);
parDoFn.processElement(valueInGlobalWindow(KV.of(42, 43)));
IsmRecordCoder<?> coder = (IsmRecordCoder) CloudObjects.coderFromCloudObject(CloudObject.fromSpec(createIsmRecordEncoding()));
assertThat(outputReceiver, contains(valueInGlobalWindow(KV.of(coder.hash(ImmutableList.of(42)), /* hash key */
KV.of(KV.of(42, GlobalWindow.INSTANCE), /* sort key */
43)))));
}
Aggregations