Search in sources :

Example 41 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class SpannerWriteIT method testFailFast.

@Test
public void testFailFast() throws Exception {
    thrown.expect(new StackTraceContainsString("SpannerException"));
    thrown.expect(new StackTraceContainsString("Value must not be NULL in table users"));
    int numRecords = 100;
    p.apply(GenerateSequence.from(0).to(2 * numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
    PipelineResult result = p.run();
    result.waitUntilFinish();
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 42 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class Neo4jIOIT method testLargeWriteUnwind.

@Test
public void testLargeWriteUnwind() throws Exception {
    final int startId = 5000;
    final int endId = 6000;
    // Create 1000 IDs
    List<Integer> idList = new ArrayList<>();
    for (int id = startId; id < endId; id++) {
        idList.add(id);
    }
    PCollection<Integer> idCollection = largeWriteUnwindPipeline.apply(Create.of(idList));
    // Every row is represented by a Map<String, Object> in the parameters map.
    // We accumulate the rows and 'unwind' those to Neo4j for performance reasons.
    // 
    SerializableFunction<Integer, Map<String, Object>> parametersFunction = id -> ImmutableMap.of("id", id, "name", "Casters", "firstName", "Matt");
    // 1000 rows with a batch size of 123 should trigger most scenarios we can think of
    // We've put a unique constraint on Something.id
    // 
    Neo4jIO.WriteUnwind<Integer> read = Neo4jIO.<Integer>writeUnwind().withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withBatchSize(123).withUnwindMapName("rows").withCypher("UNWIND $rows AS row CREATE(n:Something { id : row.id })").withParametersFunction(parametersFunction).withCypherLogging();
    idCollection.apply(read);
    // Now run this pipeline
    // 
    PipelineResult pipelineResult = largeWriteUnwindPipeline.run();
    Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
    // 
    try (Driver driver = Neo4jTestUtil.getDriver(containerHostname, containerPort)) {
        try (Session session = Neo4jTestUtil.getSession(driver, true)) {
            List<Integer> values = session.readTransaction(tx -> {
                List<Integer> v = null;
                int nrRows = 0;
                Result result = tx.run("MATCH(n:Something) RETURN count(n), min(n.id), max(n.id)");
                while (result.hasNext()) {
                    Record record = result.next();
                    v = Arrays.asList(record.get(0).asInt(), record.get(1).asInt(), record.get(2).asInt(), ++nrRows);
                }
                return v;
            });
            Assert.assertNotNull(values);
            assertThat(values, contains(endId - startId, startId, endId - 1, 1));
        }
    }
}
Also used : Session(org.neo4j.driver.Session) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) DockerImageName(org.testcontainers.utility.DockerImageName) IsIterableContainingInAnyOrder.containsInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder) PipelineResult(org.apache.beam.sdk.PipelineResult) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ArrayList(java.util.ArrayList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) DoFn(org.apache.beam.sdk.transforms.DoFn) AfterClass(org.junit.AfterClass) Driver(org.neo4j.driver.Driver) Neo4jContainer(org.testcontainers.containers.Neo4jContainer) PAssert(org.apache.beam.sdk.testing.PAssert) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Result(org.neo4j.driver.Result) List(java.util.List) Rule(org.junit.Rule) SessionConfig(org.neo4j.driver.SessionConfig) ParDo(org.apache.beam.sdk.transforms.ParDo) Assert(org.junit.Assert) Collections(java.util.Collections) Record(org.neo4j.driver.Record) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) Driver(org.neo4j.driver.Driver) PipelineResult(org.apache.beam.sdk.PipelineResult) Result(org.neo4j.driver.Result) Record(org.neo4j.driver.Record) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Session(org.neo4j.driver.Session) Test(org.junit.Test)

Example 43 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class Neo4jIOIT method testWriteUnwind.

@Test
public void testWriteUnwind() throws Exception {
    PCollection<String> stringsCollections = writeUnwindPipeline.apply(Create.of(Arrays.asList("one", "two", "three")));
    // Every row is represented by a Map<String, Object> in the parameters map.
    // We accumulate the rows and 'unwind' those to Neo4j for performance reasons.
    // 
    SerializableFunction<String, Map<String, Object>> parametersMapper = name -> Collections.singletonMap("name", name);
    Neo4jIO.WriteUnwind<String> read = Neo4jIO.<String>writeUnwind().withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withBatchSize(5000).withUnwindMapName("rows").withCypher("UNWIND $rows AS row MERGE(n:Num { name : row.name })").withParametersFunction(parametersMapper).withCypherLogging();
    stringsCollections.apply(read);
    // Now run this pipeline
    // 
    PipelineResult pipelineResult = writeUnwindPipeline.run();
    Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
    // 
    try (Driver driver = Neo4jTestUtil.getDriver(containerHostname, containerPort)) {
        try (Session session = Neo4jTestUtil.getSession(driver, true)) {
            List<String> names = session.readTransaction(tx -> {
                List<String> list = new ArrayList<>();
                Result result = tx.run("MATCH(n:Num) RETURN n.name");
                while (result.hasNext()) {
                    Record record = result.next();
                    list.add(record.get(0).asString());
                }
                return list;
            });
            assertThat(names, containsInAnyOrder("one", "two", "three"));
        }
    }
}
Also used : Session(org.neo4j.driver.Session) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) DockerImageName(org.testcontainers.utility.DockerImageName) IsIterableContainingInAnyOrder.containsInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder) PipelineResult(org.apache.beam.sdk.PipelineResult) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ArrayList(java.util.ArrayList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) DoFn(org.apache.beam.sdk.transforms.DoFn) AfterClass(org.junit.AfterClass) Driver(org.neo4j.driver.Driver) Neo4jContainer(org.testcontainers.containers.Neo4jContainer) PAssert(org.apache.beam.sdk.testing.PAssert) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Result(org.neo4j.driver.Result) List(java.util.List) Rule(org.junit.Rule) SessionConfig(org.neo4j.driver.SessionConfig) ParDo(org.apache.beam.sdk.transforms.ParDo) Assert(org.junit.Assert) Collections(java.util.Collections) Record(org.neo4j.driver.Record) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) Driver(org.neo4j.driver.Driver) PipelineResult(org.apache.beam.sdk.PipelineResult) Result(org.neo4j.driver.Result) Record(org.neo4j.driver.Record) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Session(org.neo4j.driver.Session) Test(org.junit.Test)

Example 44 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class Neo4jIOIT method testParameterizedRead.

@Test
public void testParameterizedRead() throws Exception {
    PCollection<String> stringsCollections = parameterizedReadPipeline.apply(Create.of(Arrays.asList("one", "two", "three")));
    final Schema outputSchema = Schema.of(Schema.Field.of("One", Schema.FieldType.INT32), Schema.Field.of("Str", Schema.FieldType.STRING));
    SerializableFunction<String, Map<String, Object>> parametersFunction = string -> Collections.singletonMap("par1", string);
    Neo4jIO.RowMapper<Row> rowMapper = record -> {
        int one = record.get(0).asInt();
        String string = record.get(1).asString();
        return Row.withSchema(outputSchema).attachValues(one, string);
    };
    Neo4jIO.ReadAll<String, Row> read = Neo4jIO.<String, Row>readAll().withCypher("RETURN 1, $par1").withDriverConfiguration(Neo4jTestUtil.getDriverConfiguration(containerHostname, containerPort)).withSessionConfig(SessionConfig.forDatabase(Neo4jTestUtil.NEO4J_DATABASE)).withRowMapper(rowMapper).withParametersFunction(parametersFunction).withCoder(SerializableCoder.of(Row.class)).withCypherLogging();
    PCollection<Row> outputRows = stringsCollections.apply(read);
    PCollection<String> outputLines = outputRows.apply(ParDo.of(new ParameterizedReadRowToLineFn()));
    PAssert.that(outputLines).containsInAnyOrder("1,one", "1,two", "1,three");
    // Now run this pipeline
    // 
    PipelineResult pipelineResult = parameterizedReadPipeline.run();
    Assert.assertEquals(PipelineResult.State.DONE, pipelineResult.getState());
}
Also used : Session(org.neo4j.driver.Session) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) DockerImageName(org.testcontainers.utility.DockerImageName) IsIterableContainingInAnyOrder.containsInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder) PipelineResult(org.apache.beam.sdk.PipelineResult) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ArrayList(java.util.ArrayList) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) DoFn(org.apache.beam.sdk.transforms.DoFn) AfterClass(org.junit.AfterClass) Driver(org.neo4j.driver.Driver) Neo4jContainer(org.testcontainers.containers.Neo4jContainer) PAssert(org.apache.beam.sdk.testing.PAssert) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Result(org.neo4j.driver.Result) List(java.util.List) Rule(org.junit.Rule) SessionConfig(org.neo4j.driver.SessionConfig) ParDo(org.apache.beam.sdk.transforms.ParDo) Assert(org.junit.Assert) Collections(java.util.Collections) Record(org.neo4j.driver.Record) Schema(org.apache.beam.sdk.schemas.Schema) PipelineResult(org.apache.beam.sdk.PipelineResult) Row(org.apache.beam.sdk.values.Row) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Example 45 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class TestUniversalRunner method run.

@Override
public PipelineResult run(Pipeline pipeline) {
    Options testOptions = options.as(Options.class);
    if (testOptions.getLocalJobServicePortFile() != null) {
        String localServicePortFilePath = testOptions.getLocalJobServicePortFile();
        try {
            testOptions.setJobEndpoint("localhost:" + new String(Files.readAllBytes(Paths.get(localServicePortFilePath)), Charsets.UTF_8).trim());
        } catch (IOException e) {
            throw new RuntimeException(String.format("Error reading local job service port file %s", localServicePortFilePath), e);
        }
    }
    PortablePipelineOptions portableOptions = options.as(PortablePipelineOptions.class);
    portableOptions.setRunner(PortableRunner.class);
    PortableRunner runner = PortableRunner.fromOptions(portableOptions);
    PipelineResult result = runner.run(pipeline);
    assertThat("Pipeline did not succeed.", result.waitUntilFinish(), Matchers.is(PipelineResult.State.DONE));
    return result;
}
Also used : TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineResult(org.apache.beam.sdk.PipelineResult) IOException(java.io.IOException) PortableRunner(org.apache.beam.runners.portability.PortableRunner)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6