Search in sources :

Example 46 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class ParDoTest method testCombiningState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningState() {
    final String stateId = "foo";
    DoFn<KV<String, Double>, String> fn = new DoFn<KV<String, Double>, String>() {

        private static final double EPSILON = 0.0001;

        @StateId(stateId)
        private final StateSpec<CombiningState<Double, CountSum<Double>, Double>> combiningState = StateSpecs.combining(new Mean.CountSumCoder<Double>(), Mean.<Double>of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Double, CountSum<Double>, Double> state) {
            state.add(c.element().getValue());
            Double currentValue = state.read();
            if (Math.abs(currentValue - 0.5) < EPSILON) {
                c.output("right on");
            }
        }
    };
    PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 0.3), KV.of("hello", 0.6), KV.of("hello", 0.6))).apply(ParDo.of(fn));
    // There should only be one moment at which the average is exactly 0.5
    PAssert.that(output).containsInAnyOrder("right on");
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) CountSum(org.apache.beam.sdk.transforms.Mean.CountSum) CombiningState(org.apache.beam.sdk.state.CombiningState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 47 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class DoFnTesterTest method testSupportsWindowParameter.

@Test
public void testSupportsWindowParameter() throws Exception {
    Instant now = Instant.now();
    try (DoFnTester<Integer, KV<Integer, BoundedWindow>> tester = DoFnTester.of(new DoFnWithWindowParameter())) {
        BoundedWindow firstWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(1)));
        tester.processWindowedElement(1, now, firstWindow);
        tester.processWindowedElement(2, now, firstWindow);
        BoundedWindow secondWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(4)));
        tester.processWindowedElement(3, now, secondWindow);
        tester.finishBundle();
        assertThat(tester.peekOutputElementsInWindow(firstWindow), containsInAnyOrder(TimestampedValue.of(KV.of(1, firstWindow), now), TimestampedValue.of(KV.of(2, firstWindow), now)));
        assertThat(tester.peekOutputElementsInWindow(secondWindow), containsInAnyOrder(TimestampedValue.of(KV.of(3, secondWindow), now)));
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Instant(org.joda.time.Instant) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 48 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class HadoopInputFormatIOTest method testGetCurrentSourceFunction.

/**
   * This test verifies that the method
   * {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#getCurrentSource()
   * getCurrentSource()} returns correct source object.
   */
@Test
public void testGetCurrentSourceFunction() throws Exception {
    SerializableSplit split = new SerializableSplit();
    BoundedSource<KV<Text, Employee>> source = new HadoopInputFormatBoundedSource<Text, Employee>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
    null, // No value translation required.
    null, split);
    BoundedReader<KV<Text, Employee>> hifReader = source.createReader(p.getOptions());
    BoundedSource<KV<Text, Employee>> hifSource = hifReader.getCurrentSource();
    assertEquals(hifSource, source);
}
Also used : HadoopInputFormatBoundedSource(org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.HadoopInputFormatBoundedSource) SerializableSplit(org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.SerializableSplit) Text(org.apache.hadoop.io.Text) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 49 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class HIFIOWithEmbeddedCassandraTest method testHIFReadForCassandra.

/**
   * Test to read data from embedded Cassandra instance and verify whether data is read
   * successfully.
   * @throws Exception
   */
@Test
public void testHIFReadForCassandra() throws Exception {
    // Expected hashcode is evaluated during insertion time one time and hardcoded here.
    String expectedHashCode = "1b9780833cce000138b9afa25ba63486";
    Configuration conf = getConfiguration();
    PCollection<KV<Long, String>> cassandraData = p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
    // Verify the count of data retrieved from Cassandra matches expected count.
    PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally())).isEqualTo(TEST_DATA_ROW_COUNT);
    PCollection<String> textValues = cassandraData.apply(Values.<String>create());
    // Verify the output values using checksum comparison.
    PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
    PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
    p.run().waitUntilFinish();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) KV(org.apache.beam.sdk.values.KV) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Test(org.junit.Test)

Example 50 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class HIFIOCassandraIT method testHIFReadForCassandraQuery.

/**
   * This test reads data from the Cassandra instance based on query and verifies if data is read
   * successfully.
   */
@Test
public void testHIFReadForCassandraQuery() {
    String expectedHashCode = "7bead6d6385c5f4dd0524720cd320b49";
    Long expectedNumRows = 1L;
    Configuration conf = getConfiguration(options);
    conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE + " where token(y_id) > ? and token(y_id) <= ? " + "and field0 = 'user48:field0:431531'");
    PCollection<KV<Long, String>> cassandraData = pipeline.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
    PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally())).isEqualTo(expectedNumRows);
    PCollection<String> textValues = cassandraData.apply(Values.<String>create());
    // Verify the output values using checksum comparison.
    PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
    PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
    pipeline.run().waitUntilFinish();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) KV(org.apache.beam.sdk.values.KV) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Test(org.junit.Test)

Aggregations

KV (org.apache.beam.sdk.values.KV)192 Test (org.junit.Test)143 Instant (org.joda.time.Instant)66 Category (org.junit.experimental.categories.Category)62 Pipeline (org.apache.beam.sdk.Pipeline)35 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)34 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)33 Matchers.containsString (org.hamcrest.Matchers.containsString)33 StateSpec (org.apache.beam.sdk.state.StateSpec)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 ArrayList (java.util.ArrayList)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)19 TupleTag (org.apache.beam.sdk.values.TupleTag)16 TableRow (com.google.api.services.bigquery.model.TableRow)15 Map (java.util.Map)15 ValueState (org.apache.beam.sdk.state.ValueState)15 List (java.util.List)14 ImmutableList (com.google.common.collect.ImmutableList)12 HashMap (java.util.HashMap)12 Timer (org.apache.beam.sdk.state.Timer)12