use of org.apache.beam.sdk.values.KV in project beam by apache.
the class ParDoTest method testCombiningState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningState() {
final String stateId = "foo";
DoFn<KV<String, Double>, String> fn = new DoFn<KV<String, Double>, String>() {
private static final double EPSILON = 0.0001;
@StateId(stateId)
private final StateSpec<CombiningState<Double, CountSum<Double>, Double>> combiningState = StateSpecs.combining(new Mean.CountSumCoder<Double>(), Mean.<Double>of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Double, CountSum<Double>, Double> state) {
state.add(c.element().getValue());
Double currentValue = state.read();
if (Math.abs(currentValue - 0.5) < EPSILON) {
c.output("right on");
}
}
};
PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 0.3), KV.of("hello", 0.6), KV.of("hello", 0.6))).apply(ParDo.of(fn));
// There should only be one moment at which the average is exactly 0.5
PAssert.that(output).containsInAnyOrder("right on");
pipeline.run();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class DoFnTesterTest method testSupportsWindowParameter.
@Test
public void testSupportsWindowParameter() throws Exception {
Instant now = Instant.now();
try (DoFnTester<Integer, KV<Integer, BoundedWindow>> tester = DoFnTester.of(new DoFnWithWindowParameter())) {
BoundedWindow firstWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(1)));
tester.processWindowedElement(1, now, firstWindow);
tester.processWindowedElement(2, now, firstWindow);
BoundedWindow secondWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(4)));
tester.processWindowedElement(3, now, secondWindow);
tester.finishBundle();
assertThat(tester.peekOutputElementsInWindow(firstWindow), containsInAnyOrder(TimestampedValue.of(KV.of(1, firstWindow), now), TimestampedValue.of(KV.of(2, firstWindow), now)));
assertThat(tester.peekOutputElementsInWindow(secondWindow), containsInAnyOrder(TimestampedValue.of(KV.of(3, secondWindow), now)));
}
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class HadoopInputFormatIOTest method testGetCurrentSourceFunction.
/**
* This test verifies that the method
* {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#getCurrentSource()
* getCurrentSource()} returns correct source object.
*/
@Test
public void testGetCurrentSourceFunction() throws Exception {
SerializableSplit split = new SerializableSplit();
BoundedSource<KV<Text, Employee>> source = new HadoopInputFormatBoundedSource<Text, Employee>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, split);
BoundedReader<KV<Text, Employee>> hifReader = source.createReader(p.getOptions());
BoundedSource<KV<Text, Employee>> hifSource = hifReader.getCurrentSource();
assertEquals(hifSource, source);
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class HIFIOWithEmbeddedCassandraTest method testHIFReadForCassandra.
/**
* Test to read data from embedded Cassandra instance and verify whether data is read
* successfully.
* @throws Exception
*/
@Test
public void testHIFReadForCassandra() throws Exception {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "1b9780833cce000138b9afa25ba63486";
Configuration conf = getConfiguration();
PCollection<KV<Long, String>> cassandraData = p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
// Verify the count of data retrieved from Cassandra matches expected count.
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally())).isEqualTo(TEST_DATA_ROW_COUNT);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class HIFIOCassandraIT method testHIFReadForCassandraQuery.
/**
* This test reads data from the Cassandra instance based on query and verifies if data is read
* successfully.
*/
@Test
public void testHIFReadForCassandraQuery() {
String expectedHashCode = "7bead6d6385c5f4dd0524720cd320b49";
Long expectedNumRows = 1L;
Configuration conf = getConfiguration(options);
conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE + " where token(y_id) > ? and token(y_id) <= ? " + "and field0 = 'user48:field0:431531'");
PCollection<KV<Long, String>> cassandraData = pipeline.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally())).isEqualTo(expectedNumRows);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
Aggregations