use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class BigQueryIOTest method testTransformingSourceUnsplittable.
@Test
public void testTransformingSourceUnsplittable() throws Exception {
int numElements = 10000;
@SuppressWarnings("deprecation") BoundedSource<Long> longSource = SourceTestUtils.toUnsplittableSource(CountingSource.upTo(numElements));
SerializableFunction<Long, String> toStringFn = new SerializableFunction<Long, String>() {
@Override
public String apply(Long input) {
return input.toString();
}
};
BoundedSource<String> stringSource = new TransformingSource<>(longSource, toStringFn, StringUtf8Coder.of());
List<String> expected = Lists.newArrayList();
for (int i = 0; i < numElements; i++) {
expected.add(String.valueOf(i));
}
PipelineOptions options = PipelineOptionsFactory.create();
Assert.assertThat(SourceTestUtils.readFromSource(stringSource, options), CoreMatchers.is(expected));
SourceTestUtils.assertSplitAtFractionBehavior(stringSource, 100, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
SourceTestUtils.assertSourcesEqualReferenceSource(stringSource, stringSource.split(100, options), options);
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class BigQuerySourceBase method createSources.
private List<BoundedSource<TableRow>> createSources(List<ResourceId> files, TableSchema tableSchema) throws IOException, InterruptedException {
final String jsonSchema = BigQueryIO.JSON_FACTORY.toString(tableSchema);
SerializableFunction<GenericRecord, TableRow> function = new SerializableFunction<GenericRecord, TableRow>() {
@Override
public TableRow apply(GenericRecord input) {
return BigQueryAvroUtils.convertGenericRecordToTableRow(input, BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class));
}
};
List<BoundedSource<TableRow>> avroSources = Lists.newArrayList();
for (ResourceId file : files) {
avroSources.add(new TransformingSource<>(AvroSource.from(file.toString()), function, getDefaultOutputCoder()));
}
return ImmutableList.copyOf(avroSources);
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project DataflowJavaSDK-examples by GoogleCloudPlatform.
the class LeaderBoardTest method testTeamScoresObservablyLate.
/**
* A test where elements arrive behind the watermark (late data) after the watermark passes the
* end of the window, but before the maximum allowed lateness. These elements are emitted in a
* late pane.
*/
@Test
public void testTeamScoresObservablyLate() {
Instant firstWindowCloses = baseTime.plus(ALLOWED_LATENESS).plus(TEAM_WINDOW_DURATION);
TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)).advanceWatermarkTo(baseTime).addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8))).advanceProcessingTime(Duration.standardMinutes(10)).advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))).addElements(event(TestUser.RED_ONE, 3, Duration.standardMinutes(1)), event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))).advanceWatermarkTo(firstWindowCloses.minus(Duration.standardMinutes(1))).addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))).advanceProcessingTime(Duration.standardMinutes(12)).addElements(event(TestUser.RED_TWO, 9, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 1, Duration.standardMinutes(3))).advanceWatermarkToInfinity();
PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION);
String blueTeam = TestUser.BLUE_ONE.getTeam();
String redTeam = TestUser.RED_ONE.getTeam();
PAssert.that(teamScores).inWindow(window).satisfies((SerializableFunction<Iterable<KV<String, Integer>>, Void>) input -> {
assertThat(input, hasItem(KV.of(blueTeam, 11)));
assertThat(input, hasItem(KV.of(redTeam, 27)));
return null;
});
PAssert.thatMap(teamScores).inOnTimePane(window).isEqualTo(ImmutableMap.<String, Integer>builder().put(redTeam, 7).put(blueTeam, 11).build());
// No final pane is emitted for the blue team, as all of their updates have been taken into
// account in earlier panes
PAssert.that(teamScores).inFinalPane(window).containsInAnyOrder(KV.of(redTeam, 27));
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class SchemaCoderCloudObjectTranslator method fromCloudObject.
/**
* Convert from a cloud object.
*/
@Override
public SchemaCoder fromCloudObject(CloudObject cloudObject) {
try {
TypeDescriptor typeDescriptor = (TypeDescriptor) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, TYPE_DESCRIPTOR)), "typeDescriptor");
SerializableFunction toRowFunction = (SerializableFunction) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, TO_ROW_FUNCTION)), "toRowFunction");
SerializableFunction fromRowFunction = (SerializableFunction) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, FROM_ROW_FUNCTION)), "fromRowFunction");
SchemaApi.Schema.Builder schemaBuilder = SchemaApi.Schema.newBuilder();
JsonFormat.parser().merge(Structs.getString(cloudObject, SCHEMA), schemaBuilder);
Schema schema = SchemaTranslation.schemaFromProto(schemaBuilder.build());
@Nullable UUID uuid = schema.getUUID();
if (schema.isEncodingPositionsOverridden() && uuid != null) {
SchemaCoder.overrideEncodingPositions(uuid, schema.getEncodingPositions());
}
return SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class DirectRunnerTest method byteArrayCountShouldSucceed.
@Test
public void byteArrayCountShouldSucceed() {
Pipeline p = getPipeline();
SerializableFunction<Integer, byte[]> getBytes = input -> {
try {
return CoderUtils.encodeToByteArray(VarIntCoder.of(), input);
} catch (CoderException e) {
fail("Unexpected Coder Exception " + e);
throw new AssertionError("Unreachable");
}
};
TypeDescriptor<byte[]> td = new TypeDescriptor<byte[]>() {
};
PCollection<byte[]> foos = p.apply(Create.of(1, 1, 1, 2, 2, 3)).apply(MapElements.into(td).via(getBytes));
PCollection<byte[]> msync = p.apply(Create.of(1, -2, -8, -16)).apply(MapElements.into(td).via(getBytes));
PCollection<byte[]> bytes = PCollectionList.of(foos).and(msync).apply(Flatten.pCollections());
PCollection<KV<byte[], Long>> counts = bytes.apply(Count.perElement());
PCollection<KV<Integer, Long>> countsBackToString = counts.apply(MapElements.via(new SimpleFunction<KV<byte[], Long>, KV<Integer, Long>>() {
@Override
public KV<Integer, Long> apply(KV<byte[], Long> input) {
try {
return KV.of(CoderUtils.decodeFromByteArray(VarIntCoder.of(), input.getKey()), input.getValue());
} catch (CoderException e) {
fail("Unexpected Coder Exception " + e);
throw new AssertionError("Unreachable");
}
}
}));
Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder().put(1, 4L).put(2, 2L).put(3, 1L).put(-2, 1L).put(-8, 1L).put(-16, 1L).build();
PAssert.thatMap(countsBackToString).isEqualTo(expected);
}
Aggregations