use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class Neo4jIOIT method testWriteUnwind.
@Test
public void testWriteUnwind() throws Exception {
PCollection<String> stringsCollections = writeUnwindPipeline.apply(Create.of(Arrays.asList("one", "two", "three")));
// Every row is represented by a Map<String, Object> in the parameters map.
// We accumulate the rows and 'unwind' those to Neo4j for performance reasons.
//
SerializableFunction<String, Map<String, Object>> parametersMapper = name -> Collections.singletonMap("name", name);
Neo4jIO.WriteUnwind<String> read = Neo4jIO.<String>writeUnwind().withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withBatchSize(5000).withUnwindMapName("rows").withCypher("UNWIND $rows AS row MERGE(n:Num { name : row.name })").withParametersFunction(parametersMapper).withCypherLogging();
stringsCollections.apply(read);
// Now run this pipeline
//
PipelineResult pipelineResult = writeUnwindPipeline.run();
Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
//
try (Driver driver = Neo4jTestUtil.getDriver(containerHostname, containerPort)) {
try (Session session = Neo4jTestUtil.getSession(driver, true)) {
List<String> names = session.readTransaction(tx -> {
List<String> list = new ArrayList<>();
Result result = tx.run("MATCH(n:Num) RETURN n.name");
while (result.hasNext()) {
Record record = result.next();
list.add(record.get(0).asString());
}
return list;
});
assertThat(names, containsInAnyOrder("one", "two", "three"));
}
}
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class Neo4jIOIT method testParameterizedRead.
@Test
public void testParameterizedRead() throws Exception {
PCollection<String> stringsCollections = parameterizedReadPipeline.apply(Create.of(Arrays.asList("one", "two", "three")));
final Schema outputSchema = Schema.of(Schema.Field.of("One", Schema.FieldType.INT32), Schema.Field.of("Str", Schema.FieldType.STRING));
SerializableFunction<String, Map<String, Object>> parametersFunction = string -> Collections.singletonMap("par1", string);
Neo4jIO.RowMapper<Row> rowMapper = record -> {
int one = record.get(0).asInt();
String string = record.get(1).asString();
return Row.withSchema(outputSchema).attachValues(one, string);
};
Neo4jIO.ReadAll<String, Row> read = Neo4jIO.<String, Row>readAll().withCypher("RETURN 1, $par1").withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withRowMapper(rowMapper).withParametersFunction(parametersFunction).withCoder(SerializableCoder.of(Row.class)).withCypherLogging();
PCollection<Row> outputRows = stringsCollections.apply(read);
PCollection<String> outputLines = outputRows.apply(ParDo.of(new ParameterizedReadRowToLineFn()));
PAssert.that(outputLines).containsInAnyOrder("1,one", "1,two", "1,three");
// Now run this pipeline
//
PipelineResult pipelineResult = parameterizedReadPipeline.run();
Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class GroupByWithNullValuesTest method testGroupByWithNullValues.
@Test
public void testGroupByWithNullValues() {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setRunner(TestFlinkRunner.class);
options.setStreaming(true);
Pipeline pipeline = Pipeline.create(options);
PCollection<Integer> result = pipeline.apply(GenerateSequence.from(0).to(100).withTimestampFn(new SerializableFunction<Long, Instant>() {
@Override
public Instant apply(Long input) {
return new Instant(input);
}
})).apply(Window.into(FixedWindows.of(Duration.millis(10)))).apply(ParDo.of(new DoFn<Long, KV<String, Void>>() {
@ProcessElement
public void processElement(ProcessContext pc) {
pc.output(KV.of("hello", null));
}
})).apply(GroupByKey.create()).apply(ParDo.of(new DoFn<KV<String, Iterable<Void>>, Integer>() {
@ProcessElement
public void processElement(ProcessContext pc) {
int count = 0;
for (Void aVoid : pc.element().getValue()) {
assertNull("Element should be null", aVoid);
count++;
}
pc.output(count);
}
}));
PAssert.that(result).containsInAnyOrder(10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
pipeline.run();
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class FlinkRequiresStableInputTest method createPipeline.
private static Pipeline createPipeline(PipelineOptions options, String singleOutputPrefix, String multiOutputPrefix) {
Pipeline p = Pipeline.create(options);
SerializableFunction<Void, Void> firstTime = (SerializableFunction<Void, Void>) value -> {
latch.countDown();
return null;
};
PCollection<String> impulse = p.apply("CreatePCollectionOfOneValue", Create.of(VALUE));
impulse.apply("Single-PairWithRandomKey", MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn())).apply("Single-MakeSideEffectAndThenFail", ParDo.of(new RequiresStableInputIT.MakeSideEffectAndThenFailFn(singleOutputPrefix, firstTime)));
impulse.apply("Multi-PairWithRandomKey", MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn())).apply("Multi-MakeSideEffectAndThenFail", ParDo.of(new RequiresStableInputIT.MakeSideEffectAndThenFailFn(multiOutputPrefix, firstTime)).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
return p;
}
use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.
the class BigQueryIOTest method writeDynamicDestinations.
public void writeDynamicDestinations(boolean streaming) throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
bqOptions.setProject("project-id");
bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
Pipeline p = TestPipeline.create(bqOptions);
final PCollectionView<List<String>> sideInput1 = p.apply("Create SideInput 1", Create.of("a", "b", "c").withCoder(StringUtf8Coder.of())).apply("asList", View.<String>asList());
final PCollectionView<Map<String, String>> sideInput2 = p.apply("Create SideInput2", Create.of(KV.of("a", "a"), KV.of("b", "b"), KV.of("c", "c"))).apply("AsMap", View.<String, String>asMap());
final List<String> allUsernames = ImmutableList.of("bill", "bob", "randolph");
List<String> userList = Lists.newArrayList();
// WriteGroupedRecordsToFiles.
for (int i = 0; i < BatchLoads.DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE * 10; ++i) {
// Every user has 10 nicknames.
for (int j = 0; j < 1; ++j) {
String nickname = allUsernames.get(ThreadLocalRandom.current().nextInt(allUsernames.size()));
userList.add(nickname + i);
}
}
PCollection<String> users = p.apply("CreateUsers", Create.of(userList)).apply(Window.into(new PartitionedGlobalWindows<>(new SerializableFunction<String, String>() {
@Override
public String apply(String arg) {
return arg;
}
})));
if (streaming) {
users = users.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
}
users.apply("WriteBigQuery", BigQueryIO.<String>write().withTestServices(fakeBqServices).withMaxFilesPerBundle(5).withMaxFileSize(10).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withFormatFunction(new SerializableFunction<String, TableRow>() {
@Override
public TableRow apply(String user) {
Matcher matcher = userPattern.matcher(user);
if (matcher.matches()) {
return new TableRow().set("name", matcher.group(1)).set("id", Integer.valueOf(matcher.group(2)));
}
throw new RuntimeException("Unmatching element " + user);
}
}).to(new StringIntegerDestinations() {
@Override
public Integer getDestination(ValueInSingleWindow<String> element) {
assertThat(element.getWindow(), Matchers.instanceOf(PartitionedGlobalWindow.class));
Matcher matcher = userPattern.matcher(element.getValue());
if (matcher.matches()) {
// a table.
return Integer.valueOf(matcher.group(2));
}
throw new RuntimeException("Unmatching destination " + element.getValue());
}
@Override
public TableDestination getTable(Integer userId) {
verifySideInputs();
// Each user in it's own table.
return new TableDestination("dataset-id.userid-" + userId, "table for userid " + userId);
}
@Override
public TableSchema getSchema(Integer userId) {
verifySideInputs();
return new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("id").setType("INTEGER")));
}
@Override
public List<PCollectionView<?>> getSideInputs() {
return ImmutableList.of(sideInput1, sideInput2);
}
private void verifySideInputs() {
assertThat(sideInput(sideInput1), containsInAnyOrder("a", "b", "c"));
Map<String, String> mapSideInput = sideInput(sideInput2);
assertEquals(3, mapSideInput.size());
assertThat(mapSideInput, allOf(hasEntry("a", "a"), hasEntry("b", "b"), hasEntry("c", "c")));
}
}).withoutValidation());
p.run();
File tempDir = new File(bqOptions.getTempLocation());
testNumFiles(tempDir, 0);
Map<Integer, List<TableRow>> expectedTableRows = Maps.newHashMap();
for (int i = 0; i < userList.size(); ++i) {
Matcher matcher = userPattern.matcher(userList.get(i));
checkState(matcher.matches());
String nickname = matcher.group(1);
int userid = Integer.valueOf(matcher.group(2));
List<TableRow> expected = expectedTableRows.get(userid);
if (expected == null) {
expected = Lists.newArrayList();
expectedTableRows.put(userid, expected);
}
expected.add(new TableRow().set("name", nickname).set("id", userid));
}
for (Map.Entry<Integer, List<TableRow>> entry : expectedTableRows.entrySet()) {
assertThat(datasetService.getAllRows("project-id", "dataset-id", "userid-" + entry.getKey()), containsInAnyOrder(Iterables.toArray(entry.getValue(), TableRow.class)));
}
}
Aggregations