Search in sources :

Example 16 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class Neo4jIOIT method testWriteUnwind.

@Test
public void testWriteUnwind() throws Exception {
    PCollection<String> stringsCollections = writeUnwindPipeline.apply(Create.of(Arrays.asList("one", "two", "three")));
    // Every row is represented by a Map<String, Object> in the parameters map.
    // We accumulate the rows and 'unwind' those to Neo4j for performance reasons.
    // 
    SerializableFunction<String, Map<String, Object>> parametersMapper = name -> Collections.singletonMap("name", name);
    Neo4jIO.WriteUnwind<String> read = Neo4jIO.<String>writeUnwind().withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withBatchSize(5000).withUnwindMapName("rows").withCypher("UNWIND $rows AS row MERGE(n:Num { name : row.name })").withParametersFunction(parametersMapper).withCypherLogging();
    stringsCollections.apply(read);
    // Now run this pipeline
    // 
    PipelineResult pipelineResult = writeUnwindPipeline.run();
    Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
    // 
    try (Driver driver = Neo4jTestUtil.getDriver(containerHostname, containerPort)) {
        try (Session session = Neo4jTestUtil.getSession(driver, true)) {
            List<String> names = session.readTransaction(tx -> {
                List<String> list = new ArrayList<>();
                Result result = tx.run("MATCH(n:Num) RETURN n.name");
                while (result.hasNext()) {
                    Record record = result.next();
                    list.add(record.get(0).asString());
                }
                return list;
            });
            assertThat(names, containsInAnyOrder("one", "two", "three"));
        }
    }
}
Also used : Session(org.neo4j.driver.Session) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) DockerImageName(org.testcontainers.utility.DockerImageName) IsIterableContainingInAnyOrder.containsInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder) PipelineResult(org.apache.beam.sdk.PipelineResult) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ArrayList(java.util.ArrayList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) DoFn(org.apache.beam.sdk.transforms.DoFn) AfterClass(org.junit.AfterClass) Driver(org.neo4j.driver.Driver) Neo4jContainer(org.testcontainers.containers.Neo4jContainer) PAssert(org.apache.beam.sdk.testing.PAssert) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Result(org.neo4j.driver.Result) List(java.util.List) Rule(org.junit.Rule) SessionConfig(org.neo4j.driver.SessionConfig) ParDo(org.apache.beam.sdk.transforms.ParDo) Assert(org.junit.Assert) Collections(java.util.Collections) Record(org.neo4j.driver.Record) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) Driver(org.neo4j.driver.Driver) PipelineResult(org.apache.beam.sdk.PipelineResult) Result(org.neo4j.driver.Result) Record(org.neo4j.driver.Record) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Session(org.neo4j.driver.Session) Test(org.junit.Test)

Example 17 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class Neo4jIOIT method testParameterizedRead.

@Test
public void testParameterizedRead() throws Exception {
    PCollection<String> stringsCollections = parameterizedReadPipeline.apply(Create.of(Arrays.asList("one", "two", "three")));
    final Schema outputSchema = Schema.of(Schema.Field.of("One", Schema.FieldType.INT32), Schema.Field.of("Str", Schema.FieldType.STRING));
    SerializableFunction<String, Map<String, Object>> parametersFunction = string -> Collections.singletonMap("par1", string);
    Neo4jIO.RowMapper<Row> rowMapper = record -> {
        int one = record.get(0).asInt();
        String string = record.get(1).asString();
        return Row.withSchema(outputSchema).attachValues(one, string);
    };
    Neo4jIO.ReadAll<String, Row> read = Neo4jIO.<String, Row>readAll().withCypher("RETURN 1, $par1").withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withRowMapper(rowMapper).withParametersFunction(parametersFunction).withCoder(SerializableCoder.of(Row.class)).withCypherLogging();
    PCollection<Row> outputRows = stringsCollections.apply(read);
    PCollection<String> outputLines = outputRows.apply(ParDo.of(new ParameterizedReadRowToLineFn()));
    PAssert.that(outputLines).containsInAnyOrder("1,one", "1,two", "1,three");
    // Now run this pipeline
    // 
    PipelineResult pipelineResult = parameterizedReadPipeline.run();
    Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
}
Also used : Session(org.neo4j.driver.Session) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) DockerImageName(org.testcontainers.utility.DockerImageName) IsIterableContainingInAnyOrder.containsInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder) PipelineResult(org.apache.beam.sdk.PipelineResult) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ArrayList(java.util.ArrayList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) DoFn(org.apache.beam.sdk.transforms.DoFn) AfterClass(org.junit.AfterClass) Driver(org.neo4j.driver.Driver) Neo4jContainer(org.testcontainers.containers.Neo4jContainer) PAssert(org.apache.beam.sdk.testing.PAssert) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Result(org.neo4j.driver.Result) List(java.util.List) Rule(org.junit.Rule) SessionConfig(org.neo4j.driver.SessionConfig) ParDo(org.apache.beam.sdk.transforms.ParDo) Assert(org.junit.Assert) Collections(java.util.Collections) Record(org.neo4j.driver.Record) Schema(org.apache.beam.sdk.schemas.Schema) PipelineResult(org.apache.beam.sdk.PipelineResult) Row(org.apache.beam.sdk.values.Row) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Example 18 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class GroupByWithNullValuesTest method testGroupByWithNullValues.

@Test
public void testGroupByWithNullValues() {
    FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
    options.setRunner(TestFlinkRunner.class);
    options.setStreaming(true);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Integer> result = pipeline.apply(GenerateSequence.from(0).to(100).withTimestampFn(new SerializableFunction<Long, Instant>() {

        @Override
        public Instant apply(Long input) {
            return new Instant(input);
        }
    })).apply(Window.into(FixedWindows.of(Duration.millis(10)))).apply(ParDo.of(new DoFn<Long, KV<String, Void>>() {

        @ProcessElement
        public void processElement(ProcessContext pc) {
            pc.output(KV.of("hello", null));
        }
    })).apply(GroupByKey.create()).apply(ParDo.of(new DoFn<KV<String, Iterable<Void>>, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext pc) {
            int count = 0;
            for (Void aVoid : pc.element().getValue()) {
                assertNull("Element should be null", aVoid);
                count++;
            }
            pc.output(count);
        }
    }));
    PAssert.that(result).containsInAnyOrder(10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
    pipeline.run();
}
Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Instant(org.joda.time.Instant) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) Test(org.junit.Test)

Example 19 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class FlinkRequiresStableInputTest method createPipeline.

private static Pipeline createPipeline(PipelineOptions options, String singleOutputPrefix, String multiOutputPrefix) {
    Pipeline p = Pipeline.create(options);
    SerializableFunction<Void, Void> firstTime = (SerializableFunction<Void, Void>) value -> {
        latch.countDown();
        return null;
    };
    PCollection<String> impulse = p.apply("CreatePCollectionOfOneValue", Create.of(VALUE));
    impulse.apply("Single-PairWithRandomKey", MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn())).apply("Single-MakeSideEffectAndThenFail", ParDo.of(new RequiresStableInputIT.MakeSideEffectAndThenFailFn(singleOutputPrefix, firstTime)));
    impulse.apply("Multi-PairWithRandomKey", MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn())).apply("Multi-MakeSideEffectAndThenFail", ParDo.of(new RequiresStableInputIT.MakeSideEffectAndThenFailFn(multiOutputPrefix, firstTime)).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
    return p;
}
Also used : RequiresStableInputIT(org.apache.beam.sdk.RequiresStableInputIT) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TupleTag(org.apache.beam.sdk.values.TupleTag) Pipeline(org.apache.beam.sdk.Pipeline)

Example 20 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class BigQueryIOTest method writeDynamicDestinations.

public void writeDynamicDestinations(boolean streaming) throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("project-id");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    datasetService.createDataset("project-id", "dataset-id", "", "");
    final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
    Pipeline p = TestPipeline.create(bqOptions);
    final PCollectionView<List<String>> sideInput1 = p.apply("Create SideInput 1", Create.of("a", "b", "c").withCoder(StringUtf8Coder.of())).apply("asList", View.<String>asList());
    final PCollectionView<Map<String, String>> sideInput2 = p.apply("Create SideInput2", Create.of(KV.of("a", "a"), KV.of("b", "b"), KV.of("c", "c"))).apply("AsMap", View.<String, String>asMap());
    final List<String> allUsernames = ImmutableList.of("bill", "bob", "randolph");
    List<String> userList = Lists.newArrayList();
    // WriteGroupedRecordsToFiles.
    for (int i = 0; i < BatchLoads.DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE * 10; ++i) {
        // Every user has 10 nicknames.
        for (int j = 0; j < 1; ++j) {
            String nickname = allUsernames.get(ThreadLocalRandom.current().nextInt(allUsernames.size()));
            userList.add(nickname + i);
        }
    }
    PCollection<String> users = p.apply("CreateUsers", Create.of(userList)).apply(Window.into(new PartitionedGlobalWindows<>(new SerializableFunction<String, String>() {

        @Override
        public String apply(String arg) {
            return arg;
        }
    })));
    if (streaming) {
        users = users.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    }
    users.apply("WriteBigQuery", BigQueryIO.<String>write().withTestServices(fakeBqServices).withMaxFilesPerBundle(5).withMaxFileSize(10).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withFormatFunction(new SerializableFunction<String, TableRow>() {

        @Override
        public TableRow apply(String user) {
            Matcher matcher = userPattern.matcher(user);
            if (matcher.matches()) {
                return new TableRow().set("name", matcher.group(1)).set("id", Integer.valueOf(matcher.group(2)));
            }
            throw new RuntimeException("Unmatching element " + user);
        }
    }).to(new StringIntegerDestinations() {

        @Override
        public Integer getDestination(ValueInSingleWindow<String> element) {
            assertThat(element.getWindow(), Matchers.instanceOf(PartitionedGlobalWindow.class));
            Matcher matcher = userPattern.matcher(element.getValue());
            if (matcher.matches()) {
                // a table.
                return Integer.valueOf(matcher.group(2));
            }
            throw new RuntimeException("Unmatching destination " + element.getValue());
        }

        @Override
        public TableDestination getTable(Integer userId) {
            verifySideInputs();
            // Each user in it's own table.
            return new TableDestination("dataset-id.userid-" + userId, "table for userid " + userId);
        }

        @Override
        public TableSchema getSchema(Integer userId) {
            verifySideInputs();
            return new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("id").setType("INTEGER")));
        }

        @Override
        public List<PCollectionView<?>> getSideInputs() {
            return ImmutableList.of(sideInput1, sideInput2);
        }

        private void verifySideInputs() {
            assertThat(sideInput(sideInput1), containsInAnyOrder("a", "b", "c"));
            Map<String, String> mapSideInput = sideInput(sideInput2);
            assertEquals(3, mapSideInput.size());
            assertThat(mapSideInput, allOf(hasEntry("a", "a"), hasEntry("b", "b"), hasEntry("c", "c")));
        }
    }).withoutValidation());
    p.run();
    File tempDir = new File(bqOptions.getTempLocation());
    testNumFiles(tempDir, 0);
    Map<Integer, List<TableRow>> expectedTableRows = Maps.newHashMap();
    for (int i = 0; i < userList.size(); ++i) {
        Matcher matcher = userPattern.matcher(userList.get(i));
        checkState(matcher.matches());
        String nickname = matcher.group(1);
        int userid = Integer.valueOf(matcher.group(2));
        List<TableRow> expected = expectedTableRows.get(userid);
        if (expected == null) {
            expected = Lists.newArrayList();
            expectedTableRows.put(userid, expected);
        }
        expected.add(new TableRow().set("name", nickname).set("id", userid));
    }
    for (Map.Entry<Integer, List<TableRow>> entry : expectedTableRows.entrySet()) {
        assertThat(datasetService.getAllRows("project-id", "dataset-id", "userid-" + entry.getKey()), containsInAnyOrder(Iterables.toArray(entry.getValue(), TableRow.class)));
    }
}
Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) Matcher(java.util.regex.Matcher) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Pattern(java.util.regex.Pattern) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) TableRow(com.google.api.services.bigquery.model.TableRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File)

Aggregations

SerializableFunction (org.apache.beam.sdk.transforms.SerializableFunction)37 Test (org.junit.Test)27 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 PCollection (org.apache.beam.sdk.values.PCollection)22 PAssert (org.apache.beam.sdk.testing.PAssert)20 Instant (org.joda.time.Instant)17 Rule (org.junit.Rule)17 List (java.util.List)16 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)16 RunWith (org.junit.runner.RunWith)16 Map (java.util.Map)15 Duration (org.joda.time.Duration)14 JUnit4 (org.junit.runners.JUnit4)13 ArrayList (java.util.ArrayList)12 Collections (java.util.Collections)12 Create (org.apache.beam.sdk.transforms.Create)12 Arrays (java.util.Arrays)11 ParDo (org.apache.beam.sdk.transforms.ParDo)11 KV (org.apache.beam.sdk.values.KV)11 Assert.assertEquals (org.junit.Assert.assertEquals)10