use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class CommonCoderTest method convertValue.
/**
* Converts from JSON-auto-deserialized types into the proper Java types for the known coders.
*/
private static Object convertValue(Object value, CommonCoder coderSpec, Coder coder) {
String s = coderSpec.getUrn();
if (s.equals(getUrn(StandardCoders.Enum.BYTES))) {
return ((String) value).getBytes(StandardCharsets.ISO_8859_1);
} else if (s.equals(getUrn(StandardCoders.Enum.BOOL))) {
return value;
} else if (s.equals(getUrn(StandardCoders.Enum.STRING_UTF8))) {
return value;
} else if (s.equals(getUrn(StandardCoders.Enum.KV))) {
Coder keyCoder = ((KvCoder) coder).getKeyCoder();
Coder valueCoder = ((KvCoder) coder).getValueCoder();
Map<String, Object> kvMap = (Map<String, Object>) value;
Object k = convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder);
Object v = convertValue(kvMap.get("value"), coderSpec.getComponents().get(1), valueCoder);
return KV.of(k, v);
} else if (s.equals(getUrn(StandardCoders.Enum.VARINT))) {
return ((Number) value).longValue();
} else if (s.equals(getUrn(StandardCoders.Enum.TIMER))) {
Map<String, Object> kvMap = (Map<String, Object>) value;
Coder<?> keyCoder = ((Timer.Coder) coder).getValueCoder();
Coder<? extends BoundedWindow> windowCoder = ((Timer.Coder) coder).getWindowCoder();
List<BoundedWindow> windows = new ArrayList<>();
for (Object window : (List<Object>) kvMap.get("windows")) {
windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
}
if ((boolean) kvMap.get("clearBit")) {
return Timer.cleared(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows);
}
Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
return Timer.of(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows, new Instant(((Number) kvMap.get("fireTimestamp")).longValue()), new Instant(((Number) kvMap.get("holdTimestamp")).longValue()), paneInfo);
} else if (s.equals(getUrn(StandardCoders.Enum.INTERVAL_WINDOW))) {
Map<String, Object> kvMap = (Map<String, Object>) value;
Instant end = new Instant(((Number) kvMap.get("end")).longValue());
Duration span = Duration.millis(((Number) kvMap.get("span")).longValue());
return new IntervalWindow(end.minus(span), span);
} else if (s.equals(getUrn(StandardCoders.Enum.ITERABLE)) || s.equals(getUrn(StandardCoders.Enum.STATE_BACKED_ITERABLE))) {
Coder elementCoder = ((IterableLikeCoder) coder).getElemCoder();
List<Object> elements = (List<Object>) value;
List<Object> convertedElements = new ArrayList<>();
for (Object element : elements) {
convertedElements.add(convertValue(element, coderSpec.getComponents().get(0), elementCoder));
}
return convertedElements;
} else if (s.equals(getUrn(StandardCoders.Enum.GLOBAL_WINDOW))) {
return GlobalWindow.INSTANCE;
} else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE)) || s.equals(getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE))) {
Map<String, Object> kvMap = (Map<String, Object>) value;
Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) coder).getValueCoder();
Coder windowCoder = ((WindowedValue.FullWindowedValueCoder) coder).getWindowCoder();
Object windowValue = convertValue(kvMap.get("value"), coderSpec.getComponents().get(0), valueCoder);
Instant timestamp = new Instant(((Number) kvMap.get("timestamp")).longValue());
List<BoundedWindow> windows = new ArrayList<>();
for (Object window : (List<Object>) kvMap.get("windows")) {
windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
}
Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
return WindowedValue.of(windowValue, timestamp, windows, paneInfo);
} else if (s.equals(getUrn(StandardCoders.Enum.DOUBLE))) {
return Double.parseDouble((String) value);
} else if (s.equals(getUrn(StandardCoders.Enum.ROW))) {
Schema schema;
try {
schema = SchemaTranslation.schemaFromProto(SchemaApi.Schema.parseFrom(coderSpec.getPayload()));
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Failed to parse schema payload for row coder", e);
}
return parseField(value, Schema.FieldType.row(schema));
} else if (s.equals(getUrn(StandardCoders.Enum.SHARDED_KEY))) {
Map<String, Object> kvMap = (Map<String, Object>) value;
Coder<?> keyCoder = ((ShardedKey.Coder) coder).getKeyCoder();
byte[] shardId = ((String) kvMap.get("shardId")).getBytes(StandardCharsets.ISO_8859_1);
return ShardedKey.of(convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder), shardId);
} else if (s.equals(getUrn(StandardCoders.Enum.CUSTOM_WINDOW))) {
Map<String, Object> kvMap = (Map<String, Object>) value;
Coder windowCoder = ((TimestampPrefixingWindowCoder) coder).getWindowCoder();
return convertValue(kvMap.get("window"), coderSpec.getComponents().get(0), windowCoder);
} else {
throw new IllegalStateException("Unknown coder URN: " + coderSpec.getUrn());
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class FlattenP method tryProcess.
@Override
protected boolean tryProcess(int ordinal, @Nonnull Object item) {
Coder inputCoder = inputOrdinalCoders.get(ordinal);
WindowedValue<Object> windowedValue = Utils.decodeWindowedValue((byte[]) item, inputCoder);
return tryEmit(Utils.encode(windowedValue, outputCoder));
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class ParDoTranslatorBatch method createBroadcastSideInputs.
private static SideInputBroadcast createBroadcastSideInputs(List<PCollectionView<?>> sideInputs, AbstractTranslationContext context) {
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(context.getSparkSession().sparkContext());
SideInputBroadcast sideInputBroadcast = new SideInputBroadcast();
for (PCollectionView<?> sideInput : sideInputs) {
Coder<? extends BoundedWindow> windowCoder = sideInput.getPCollection().getWindowingStrategy().getWindowFn().windowCoder();
Coder<WindowedValue<?>> windowedValueCoder = (Coder<WindowedValue<?>>) (Coder<?>) WindowedValue.getFullCoder(sideInput.getPCollection().getCoder(), windowCoder);
Dataset<WindowedValue<?>> broadcastSet = context.getSideInputDataSet(sideInput);
List<WindowedValue<?>> valuesList = broadcastSet.collectAsList();
List<byte[]> codedValues = new ArrayList<>();
for (WindowedValue<?> v : valuesList) {
codedValues.add(CoderHelpers.toByteArray(v, windowedValueCoder));
}
sideInputBroadcast.add(sideInput.getTagInternal().getId(), jsc.broadcast(codedValues), windowedValueCoder);
}
return sideInputBroadcast;
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class SparkSideInputReader method initializeBroadcastVariable.
private <T> Map<BoundedWindow, T> initializeBroadcastVariable(Iterable<WindowedValue<?>> inputValues, PCollectionView<T> view) {
// first partition into windows
Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = new HashMap<>();
for (WindowedValue<?> value : inputValues) {
for (BoundedWindow window : value.getWindows()) {
List<WindowedValue<?>> windowedValues = partitionedElements.computeIfAbsent(window, k -> new ArrayList<>());
windowedValues.add(value);
}
}
Map<BoundedWindow, T> resultMap = new HashMap<>();
for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
switch(view.getViewFn().getMaterialization().getUrn()) {
case Materializations.ITERABLE_MATERIALIZATION_URN:
{
ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
resultMap.put(elements.getKey(), viewFn.apply(() -> elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList())));
}
break;
case Materializations.MULTIMAP_MATERIALIZATION_URN:
{
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
break;
default:
throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
}
}
return resultMap;
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class SparkExecutableStageFunction method getStateRequestHandler.
private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
final StateRequestHandler sideInputHandler;
StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {
@Override
public <T> List<T> getSideInput(String pCollectionId) {
Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
Broadcast<List<byte[]>> broadcast = tuple2._1;
WindowedValueCoder<SideInputT> coder = tuple2._2;
return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
}
});
try {
sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
} catch (IOException e) {
throw new RuntimeException("Failed to setup state handler", e);
}
if (bagUserStateHandlerFactory == null) {
bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
}
final StateRequestHandler userStateHandler;
if (executableStage.getUserStates().size() > 0) {
// Need to discard the old key's state
bagUserStateHandlerFactory.resetForNewKey();
userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
} else {
userStateHandler = StateRequestHandler.unsupported();
}
handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
Aggregations