use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class ConfigGeneratorTest method testUserStoreConfig.
@Test
public void testUserStoreConfig() {
SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
options.setJobName("TestStoreConfig");
options.setRunner(SamzaRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.empty(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))).apply(ParDo.of(new DoFn<KV<String, String>, Void>() {
private static final String testState = "testState";
@StateId(testState)
private final StateSpec<ValueState<Integer>> state = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext context, @StateId(testState) ValueState<Integer> state) {
}
}));
final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
final ConfigBuilder configBuilder = new ConfigBuilder(options);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
final Config config = configBuilder.build();
assertEquals(RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.testState.factory"));
assertEquals("byteArraySerde", config.get("stores.testState.key.serde"));
assertEquals("stateValueSerde", config.get("stores.testState.msg.serde"));
assertNull(config.get("stores.testState.changelog"));
options.setStateDurable(true);
SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
final Config config2 = configBuilder.build();
assertEquals("TestStoreConfig-1-testState-changelog", config2.get("stores.testState.changelog"));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class WindowTest method testTimestampCombinerDefault.
/**
* Tests that when two elements are combined via a GroupByKey their output timestamp agrees with
* the windowing function default, the end of the window.
*/
@Test
@Category(ValidatesRunner.class)
public void testTimestampCombinerDefault() {
pipeline.enableAbandonedNodeEnforcement(true);
pipeline.apply(Create.timestamped(TimestampedValue.of(KV.of(0, "hello"), new Instant(0)), TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10)))).apply(Window.into(FixedWindows.of(Duration.standardMinutes(10)))).apply(GroupByKey.create()).apply(ParDo.of(new DoFn<KV<Integer, Iterable<String>>, Void>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
assertThat(c.timestamp(), equalTo(new IntervalWindow(new Instant(0), new Instant(0).plus(Duration.standardMinutes(10))).maxTimestamp()));
}
}));
pipeline.run();
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class BeamWindowRel method aggField.
private static DoFn<List<Row>, Row> aggField(final Schema expectedSchema, final FieldAggregation fieldAgg) {
return new DoFn<List<Row>, Row>() {
@ProcessElement
public void processElement(@Element List<Row> inputPartition, OutputReceiver<Row> out, ProcessContext c) {
List<Row> sortedRowsAsList = inputPartition;
NavigableMap<BigDecimal, List<Row>> indexRange = null;
if (!fieldAgg.rows) {
indexRange = indexRows(sortedRowsAsList);
}
for (int idx = 0; idx < sortedRowsAsList.size(); idx++) {
List<Row> aggRange = null;
if (fieldAgg.rows) {
aggRange = getRows(sortedRowsAsList, idx);
} else {
aggRange = getRange(indexRange, sortedRowsAsList.get(idx));
}
Object accumulator = fieldAgg.combineFn.createAccumulator();
// if not inputs are needed, put a mock Field index
final int aggFieldIndex = fieldAgg.inputFields.isEmpty() ? -1 : fieldAgg.inputFields.get(0);
long count = 0;
for (Row aggRow : aggRange) {
if (fieldAgg.combineFn instanceof BeamBuiltinAnalyticFunctions.PositionAwareCombineFn) {
BeamBuiltinAnalyticFunctions.PositionAwareCombineFn fn = (BeamBuiltinAnalyticFunctions.PositionAwareCombineFn) fieldAgg.combineFn;
accumulator = fn.addInput(accumulator, getOrderByValue(aggRow), count, (long) idx, (long) sortedRowsAsList.size());
} else {
accumulator = fieldAgg.combineFn.addInput(accumulator, aggRow.getBaseValue(aggFieldIndex));
}
count++;
}
Object result = fieldAgg.combineFn.extractOutput(accumulator);
Row processingRow = sortedRowsAsList.get(idx);
List<Object> fieldValues = Lists.newArrayListWithCapacity(processingRow.getFieldCount());
fieldValues.addAll(processingRow.getValues());
fieldValues.add(result);
Row build = Row.withSchema(expectedSchema).addValues(fieldValues).build();
out.output(build);
}
}
private NavigableMap<BigDecimal, List<Row>> indexRows(List<Row> input) {
NavigableMap<BigDecimal, List<Row>> map = new TreeMap<BigDecimal, List<Row>>();
for (Row r : input) {
BigDecimal orderByValue = getOrderByValue(r);
if (orderByValue == null) {
/**
* Special case agg(X) OVER () set dummy value.
*/
orderByValue = BigDecimal.ZERO;
}
if (!map.containsKey(orderByValue)) {
map.put(orderByValue, Lists.newArrayList());
}
map.get(orderByValue).add(r);
}
return map;
}
private List<Row> getRange(NavigableMap<BigDecimal, List<Row>> indexRanges, Row aRow) {
NavigableMap<BigDecimal, List<Row>> subMap;
BigDecimal currentRowValue = getOrderByValue(aRow);
if (currentRowValue != null && fieldAgg.lowerLimit != null && fieldAgg.upperLimit != null) {
BigDecimal ll = currentRowValue.subtract(fieldAgg.lowerLimit);
BigDecimal ul = currentRowValue.add(fieldAgg.upperLimit);
subMap = indexRanges.subMap(ll, true, ul, true);
} else if (currentRowValue != null && fieldAgg.lowerLimit != null && fieldAgg.upperLimit == null) {
BigDecimal ll = currentRowValue.subtract(fieldAgg.lowerLimit);
subMap = indexRanges.tailMap(ll, true);
} else if (currentRowValue != null && fieldAgg.lowerLimit == null && fieldAgg.upperLimit != null) {
BigDecimal ul = currentRowValue.add(fieldAgg.upperLimit);
subMap = indexRanges.headMap(ul, true);
} else {
subMap = indexRanges;
}
List<Row> result = Lists.newArrayList();
for (List<Row> partialList : subMap.values()) {
result.addAll(partialList);
}
return result;
}
private BigDecimal getOrderByValue(Row r) {
/**
* Special Case: This query is transformed by calcite as follows: agg(X) over () -> agg(X)
* over (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) No orderKeys, so return
* null.
*/
if (fieldAgg.orderKeys.size() == 0) {
return null;
} else {
return new BigDecimal(((Number) r.getBaseValue(fieldAgg.orderKeys.get(0))).toString());
}
}
private List<Row> getRows(List<Row> input, int index) {
Integer ll = fieldAgg.lowerLimit != null ? fieldAgg.lowerLimit.intValue() : Integer.MAX_VALUE;
Integer ul = fieldAgg.upperLimit != null ? fieldAgg.upperLimit.intValue() : Integer.MAX_VALUE;
int lowerIndex = ll == Integer.MAX_VALUE ? Integer.MIN_VALUE : index - ll;
int upperIndex = ul == Integer.MAX_VALUE ? Integer.MAX_VALUE : index + ul + 1;
lowerIndex = lowerIndex < 0 ? 0 : lowerIndex;
upperIndex = upperIndex > input.size() ? input.size() : upperIndex;
List<Row> out = input.subList(lowerIndex, upperIndex);
return out;
}
};
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DataflowPipelineTranslatorTest method testStepDisplayData.
@Test
public void testStepDisplayData() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
DoFn<Integer, Integer> fn1 = new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element());
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
builder.add(DisplayData.item("foo", "bar")).add(DisplayData.item("foo2", DataflowPipelineTranslatorTest.class).withLabel("Test Class").withLinkUrl("http://www.google.com"));
}
};
DoFn<Integer, Integer> fn2 = new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element());
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
builder.add(DisplayData.item("foo3", 1234));
}
};
ParDo.SingleOutput<Integer, Integer> parDo1 = ParDo.of(fn1);
ParDo.SingleOutput<Integer, Integer> parDo2 = ParDo.of(fn2);
pipeline.apply(Create.of(1, 2, 3)).apply(parDo1).apply(parDo2);
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(3, steps.size());
Map<String, Object> parDo1Properties = steps.get(1).getProperties();
Map<String, Object> parDo2Properties = steps.get(2).getProperties();
assertThat(parDo1Properties, hasKey("display_data"));
@SuppressWarnings("unchecked") Collection<Map<String, String>> fn1displayData = (Collection<Map<String, String>>) parDo1Properties.get("display_data");
@SuppressWarnings("unchecked") Collection<Map<String, String>> fn2displayData = (Collection<Map<String, String>>) parDo2Properties.get("display_data");
ImmutableSet<ImmutableMap<String, Object>> expectedFn1DisplayData = ImmutableSet.of(ImmutableMap.<String, Object>builder().put("key", "foo").put("type", "STRING").put("value", "bar").put("namespace", fn1.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "fn").put("label", "Transform Function").put("type", "JAVA_CLASS").put("value", fn1.getClass().getName()).put("shortValue", fn1.getClass().getSimpleName()).put("namespace", parDo1.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "foo2").put("type", "JAVA_CLASS").put("value", DataflowPipelineTranslatorTest.class.getName()).put("shortValue", DataflowPipelineTranslatorTest.class.getSimpleName()).put("namespace", fn1.getClass().getName()).put("label", "Test Class").put("linkUrl", "http://www.google.com").build());
ImmutableSet<ImmutableMap<String, Object>> expectedFn2DisplayData = ImmutableSet.of(ImmutableMap.<String, Object>builder().put("key", "fn").put("label", "Transform Function").put("type", "JAVA_CLASS").put("value", fn2.getClass().getName()).put("shortValue", fn2.getClass().getSimpleName()).put("namespace", parDo2.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "foo3").put("type", "INTEGER").put("value", 1234L).put("namespace", fn2.getClass().getName()).build());
assertEquals(expectedFn1DisplayData, ImmutableSet.copyOf(fn1displayData));
assertEquals(expectedFn2DisplayData, ImmutableSet.copyOf(fn2displayData));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class BatchStatefulParDoOverridesTest method testFnApiSingleOutputOverrideNonCrashing.
@Test
public void testFnApiSingleOutputOverrideNonCrashing() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
options.setRunner(DataflowRunner.class);
Pipeline pipeline = Pipeline.create(options);
DummyStatefulDoFn fn = new DummyStatefulDoFn();
pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
Aggregations