Search in sources :

Example 21 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class BigQueryIOTest method testTransformingSourceUnsplittable.

@Test
public void testTransformingSourceUnsplittable() throws Exception {
    int numElements = 10000;
    @SuppressWarnings("deprecation") BoundedSource<Long> longSource = SourceTestUtils.toUnsplittableSource(CountingSource.upTo(numElements));
    SerializableFunction<Long, String> toStringFn = new SerializableFunction<Long, String>() {

        @Override
        public String apply(Long input) {
            return input.toString();
        }
    };
    BoundedSource<String> stringSource = new TransformingSource<>(longSource, toStringFn, StringUtf8Coder.of());
    List<String> expected = Lists.newArrayList();
    for (int i = 0; i < numElements; i++) {
        expected.add(String.valueOf(i));
    }
    PipelineOptions options = PipelineOptionsFactory.create();
    Assert.assertThat(SourceTestUtils.readFromSource(stringSource, options), CoreMatchers.is(expected));
    SourceTestUtils.assertSplitAtFractionBehavior(stringSource, 100, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
    SourceTestUtils.assertSourcesEqualReferenceSource(stringSource, stringSource.split(100, options), options);
}
Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Example 22 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class BigQuerySourceBase method createSources.

private List<BoundedSource<TableRow>> createSources(List<ResourceId> files, TableSchema tableSchema) throws IOException, InterruptedException {
    final String jsonSchema = BigQueryIO.JSON_FACTORY.toString(tableSchema);
    SerializableFunction<GenericRecord, TableRow> function = new SerializableFunction<GenericRecord, TableRow>() {

        @Override
        public TableRow apply(GenericRecord input) {
            return BigQueryAvroUtils.convertGenericRecordToTableRow(input, BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class));
        }
    };
    List<BoundedSource<TableRow>> avroSources = Lists.newArrayList();
    for (ResourceId file : files) {
        avroSources.add(new TransformingSource<>(AvroSource.from(file.toString()), function, getDefaultOutputCoder()));
    }
    return ImmutableList.copyOf(avroSources);
}
Also used : BoundedSource(org.apache.beam.sdk.io.BoundedSource) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TableSchema(com.google.api.services.bigquery.model.TableSchema) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) TableRow(com.google.api.services.bigquery.model.TableRow) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 23 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class LeaderBoardTest method testTeamScoresObservablyLate.

/**
 * A test where elements arrive behind the watermark (late data) after the watermark passes the
 * end of the window, but before the maximum allowed lateness. These elements are emitted in a
 * late pane.
 */
@Test
public void testTeamScoresObservablyLate() {
    Instant firstWindowCloses = baseTime.plus(ALLOWED_LATENESS).plus(TEAM_WINDOW_DURATION);
    TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)).advanceWatermarkTo(baseTime).addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8))).advanceProcessingTime(Duration.standardMinutes(10)).advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))).addElements(event(TestUser.RED_ONE, 3, Duration.standardMinutes(1)), event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))).advanceWatermarkTo(firstWindowCloses.minus(Duration.standardMinutes(1))).addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))).advanceProcessingTime(Duration.standardMinutes(12)).addElements(event(TestUser.RED_TWO, 9, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 1, Duration.standardMinutes(3))).advanceWatermarkToInfinity();
    PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
    BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION);
    String blueTeam = TestUser.BLUE_ONE.getTeam();
    String redTeam = TestUser.RED_ONE.getTeam();
    PAssert.that(teamScores).inWindow(window).satisfies((SerializableFunction<Iterable<KV<String, Integer>>, Void>) input -> {
        assertThat(input, hasItem(KV.of(blueTeam, 11)));
        assertThat(input, hasItem(KV.of(redTeam, 27)));
        return null;
    });
    PAssert.thatMap(teamScores).inOnTimePane(window).isEqualTo(ImmutableMap.<String, Integer>builder().put(redTeam, 7).put(blueTeam, 11).build());
    // No final pane is emitted for the blue team, as all of their updates have been taken into
    // account in earlier panes
    PAssert.that(teamScores).inFinalPane(window).containsInAnyOrder(KV.of(redTeam, 27));
    p.run().waitUntilFinish();
}
Also used : GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) KV(org.apache.beam.sdk.values.KV) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) CalculateUserScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateUserScores) PTransform(org.apache.beam.sdk.transforms.PTransform) Assert.assertThat(org.junit.Assert.assertThat) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) ImmutableMap(com.google.common.collect.ImmutableMap) PAssert(org.apache.beam.sdk.testing.PAssert) CalculateTeamScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateTeamScores) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Serializable(java.io.Serializable) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Rule(org.junit.Rule) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) TestStream(org.apache.beam.sdk.testing.TestStream) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) Instant(org.joda.time.Instant) CalculateTeamScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateTeamScores) KV(org.apache.beam.sdk.values.KV) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 24 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class SchemaCoderCloudObjectTranslator method fromCloudObject.

/**
 * Convert from a cloud object.
 */
@Override
public SchemaCoder fromCloudObject(CloudObject cloudObject) {
    try {
        TypeDescriptor typeDescriptor = (TypeDescriptor) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, TYPE_DESCRIPTOR)), "typeDescriptor");
        SerializableFunction toRowFunction = (SerializableFunction) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, TO_ROW_FUNCTION)), "toRowFunction");
        SerializableFunction fromRowFunction = (SerializableFunction) SerializableUtils.deserializeFromByteArray(StringUtils.jsonStringToByteArray(Structs.getString(cloudObject, FROM_ROW_FUNCTION)), "fromRowFunction");
        SchemaApi.Schema.Builder schemaBuilder = SchemaApi.Schema.newBuilder();
        JsonFormat.parser().merge(Structs.getString(cloudObject, SCHEMA), schemaBuilder);
        Schema schema = SchemaTranslation.schemaFromProto(schemaBuilder.build());
        @Nullable UUID uuid = schema.getUUID();
        if (schema.isEncodingPositionsOverridden() && uuid != null) {
            SchemaCoder.overrideEncodingPositions(uuid, schema.getEncodingPositions());
        }
        return SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Schema(org.apache.beam.sdk.schemas.Schema) IOException(java.io.IOException) UUID(java.util.UUID) Nullable(javax.annotation.Nullable)

Example 25 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class DirectRunnerTest method byteArrayCountShouldSucceed.

@Test
public void byteArrayCountShouldSucceed() {
    Pipeline p = getPipeline();
    SerializableFunction<Integer, byte[]> getBytes = input -> {
        try {
            return CoderUtils.encodeToByteArray(VarIntCoder.of(), input);
        } catch (CoderException e) {
            fail("Unexpected Coder Exception " + e);
            throw new AssertionError("Unreachable");
        }
    };
    TypeDescriptor<byte[]> td = new TypeDescriptor<byte[]>() {
    };
    PCollection<byte[]> foos = p.apply(Create.of(1, 1, 1, 2, 2, 3)).apply(MapElements.into(td).via(getBytes));
    PCollection<byte[]> msync = p.apply(Create.of(1, -2, -8, -16)).apply(MapElements.into(td).via(getBytes));
    PCollection<byte[]> bytes = PCollectionList.of(foos).and(msync).apply(Flatten.pCollections());
    PCollection<KV<byte[], Long>> counts = bytes.apply(Count.perElement());
    PCollection<KV<Integer, Long>> countsBackToString = counts.apply(MapElements.via(new SimpleFunction<KV<byte[], Long>, KV<Integer, Long>>() {

        @Override
        public KV<Integer, Long> apply(KV<byte[], Long> input) {
            try {
                return KV.of(CoderUtils.decodeFromByteArray(VarIntCoder.of(), input.getKey()), input.getValue());
            } catch (CoderException e) {
                fail("Unexpected Coder Exception " + e);
                throw new AssertionError("Unreachable");
            }
        }
    }));
    Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder().put(1, 4L).put(2, 2L).put(3, 1L).put(-2, 1L).put(-8, 1L).put(-16, 1L).build();
    PAssert.thatMap(countsBackToString).isEqualTo(expected);
}
Also used : Count(org.apache.beam.sdk.transforms.Count) Arrays(java.util.Arrays) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) PBegin(org.apache.beam.sdk.values.PBegin) Matchers.isA(org.hamcrest.Matchers.isA) CoderUtils(org.apache.beam.sdk.util.CoderUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) ListCoder(org.apache.beam.sdk.coders.ListCoder) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) ThrowableMessageMatcher(org.junit.internal.matchers.ThrowableMessageMatcher) Future(java.util.concurrent.Future) DirectPipelineResult(org.apache.beam.runners.direct.DirectRunner.DirectPipelineResult) PCollectionList(org.apache.beam.sdk.values.PCollectionList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) Assert.fail(org.junit.Assert.fail) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Sum(org.apache.beam.sdk.transforms.Sum) BlockingQueue(java.util.concurrent.BlockingQueue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CoderException(org.apache.beam.sdk.coders.CoderException) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) Matchers.equalTo(org.hamcrest.Matchers.equalTo) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Optional(java.util.Optional) State(org.apache.beam.sdk.PipelineResult.State) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) AfterWatermark(org.apache.beam.sdk.transforms.windowing.AfterWatermark) Default(org.apache.beam.sdk.options.Default) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) Callable(java.util.concurrent.Callable) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) PTransform(org.apache.beam.sdk.transforms.PTransform) Read(org.apache.beam.sdk.io.Read) PipelineRunner(org.apache.beam.sdk.PipelineRunner) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) NoSuchElementException(java.util.NoSuchElementException) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExpectedException(org.junit.rules.ExpectedException) ExecutorService(java.util.concurrent.ExecutorService) Nullable(org.checkerframework.checker.nullness.qual.Nullable) OutputStream(java.io.OutputStream) DoFn(org.apache.beam.sdk.transforms.DoFn) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) CountingSource(org.apache.beam.sdk.io.CountingSource) PDone(org.apache.beam.sdk.values.PDone) PAssert(org.apache.beam.sdk.testing.PAssert) IllegalMutationException(org.apache.beam.sdk.util.IllegalMutationException) Matchers(org.hamcrest.Matchers) IOException(java.io.IOException) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) AtomicLong(java.util.concurrent.atomic.AtomicLong) BoundedSource(org.apache.beam.sdk.io.BoundedSource) Rule(org.junit.Rule) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) AtomicCoder(org.apache.beam.sdk.coders.AtomicCoder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) KV(org.apache.beam.sdk.values.KV) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) AtomicLong(java.util.concurrent.atomic.AtomicLong) CoderException(org.apache.beam.sdk.coders.CoderException) Test(org.junit.Test)

Aggregations

SerializableFunction (org.apache.beam.sdk.transforms.SerializableFunction)37 Test (org.junit.Test)27 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 PCollection (org.apache.beam.sdk.values.PCollection)22 PAssert (org.apache.beam.sdk.testing.PAssert)20 Instant (org.joda.time.Instant)17 Rule (org.junit.Rule)17 List (java.util.List)16 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)16 RunWith (org.junit.runner.RunWith)16 Map (java.util.Map)15 Duration (org.joda.time.Duration)14 JUnit4 (org.junit.runners.JUnit4)13 ArrayList (java.util.ArrayList)12 Collections (java.util.Collections)12 Create (org.apache.beam.sdk.transforms.Create)12 Arrays (java.util.Arrays)11 ParDo (org.apache.beam.sdk.transforms.ParDo)11 KV (org.apache.beam.sdk.values.KV)11 Assert.assertEquals (org.junit.Assert.assertEquals)10