use of org.apache.beam.sdk.io.common.HashingFn in project beam by apache.
the class HIFIOWithElasticTest method testHifIOWithElastic.
/**
* Test to read data from embedded Elasticsearch instance and verify whether data is read
* successfully.
*/
@Test
public void testHifIOWithElastic() {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc";
Configuration conf = getConfiguration();
PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
PCollection<Long> count = esData.apply(Count.<KV<Text, LinkedMapWritable>>globally());
// Verify that the count of objects fetched using HIFInputFormat IO is correct.
PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT);
PCollection<LinkedMapWritable> values = esData.apply(Values.<LinkedMapWritable>create());
PCollection<String> textValues = values.apply(transformFunc);
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.io.common.HashingFn in project beam by apache.
the class HIFIOWithEmbeddedCassandraTest method testHIFReadForCassandraQuery.
/**
* Test to read data from embedded Cassandra instance based on query and verify whether data is
* read successfully.
*/
@Test
public void testHIFReadForCassandraQuery() throws Exception {
Long expectedCount = 1L;
String expectedChecksum = "f11caabc7a9fc170e22b41218749166c";
Configuration conf = getConfiguration();
conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE + " where token(id) > ? and token(id) <= ? and scientist='Faraday1' allow filtering");
PCollection<KV<Long, String>> cassandraData = p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
// Verify the count of data retrieved from Cassandra matches expected count.
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally())).isEqualTo(expectedCount);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedChecksum);
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.io.common.HashingFn in project beam by apache.
the class HIFIOCassandraIT method testHIFReadForCassandra.
/**
* This test reads data from the Cassandra instance and verifies if data is read successfully.
*/
@Test
public void testHIFReadForCassandra() {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "1a30ad400afe4ebf5fde75f5d2d95408";
Long expectedRecordsCount = 1000L;
Configuration conf = getConfiguration(options);
PCollection<KV<Long, String>> cassandraData = pipeline.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally())).isEqualTo(expectedRecordsCount);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
Aggregations