use of org.apache.beam.sdk.io.hadoop.SerializableConfiguration in project beam by apache.
the class HadoopInputFormatIOTest method loadTestConfiguration.
private static SerializableConfiguration loadTestConfiguration(Class<?> inputFormatClassName, Class<?> keyClass, Class<?> valueClass) {
Configuration conf = new Configuration();
conf.setClass("mapreduce.job.inputformat.class", inputFormatClassName, InputFormat.class);
conf.setClass("key.class", keyClass, Object.class);
conf.setClass("value.class", valueClass, Object.class);
return new SerializableConfiguration(conf);
}
use of org.apache.beam.sdk.io.hadoop.SerializableConfiguration in project beam by apache.
the class HadoopFormatIOIT method setupHadoopConfiguration.
private static void setupHadoopConfiguration(PostgresIOTestPipelineOptions options) {
Configuration conf = new Configuration();
DBConfiguration.configureDB(conf, "org.postgresql.Driver", DatabaseTestHelper.getPostgresDBUrl(options), options.getPostgresUsername(), options.getPostgresPassword());
conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName);
conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "id", "name");
conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, "id ASC");
conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, TestRowDBWritable.class, DBWritable.class);
conf.setClass("key.class", LongWritable.class, Object.class);
conf.setClass("value.class", TestRowDBWritable.class, Object.class);
conf.setClass("mapreduce.job.inputformat.class", DBInputFormat.class, InputFormat.class);
conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName);
conf.set(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, "2");
conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, "id", "name");
conf.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, TestRowDBWritable.class, Object.class);
conf.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, NullWritable.class, Object.class);
conf.setClass(HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, DBOutputFormat.class, OutputFormat.class);
conf.set(HadoopFormatIO.JOB_ID, String.valueOf(1));
hadoopConfiguration = new SerializableConfiguration(conf);
}
use of org.apache.beam.sdk.io.hadoop.SerializableConfiguration in project beam by apache.
the class HadoopFormatIOReadTest method testValidateConfigurationWithDBInputFormat.
@Test
public void testValidateConfigurationWithDBInputFormat() {
Configuration conf = new Configuration();
conf.setClass("key.class", LongWritable.class, Object.class);
conf.setClass("value.class", Text.class, Object.class);
conf.setClass("mapreduce.job.inputformat.class", DBInputFormat.class, InputFormat.class);
thrown.expect(IllegalArgumentException.class);
HadoopFormatIO.<String, String>read().withConfiguration(new SerializableConfiguration(conf).get()).withKeyTranslation(myKeyTranslate).withValueTranslation(myValueTranslate);
}
use of org.apache.beam.sdk.io.hadoop.SerializableConfiguration in project beam by apache.
the class HadoopFormatIOReadTest method testSkipKeyValueClone.
/**
* This test validates that in case reader is instructed to not to clone key value records, then
* key value records are exactly the same as output from the source no mater if they are mutable
* or immutable. This override setting is useful to turn on when using key-value translation
* functions and avoid possibly unnecessary copy.
*/
@Test
public void testSkipKeyValueClone() throws Exception {
SerializableConfiguration serConf = loadTestConfiguration(EmployeeInputFormat.class, Text.class, Employee.class);
// with skip clone 'true' it should produce different instances of key/value
List<BoundedSource<KV<Text, Employee>>> sources = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), new SingletonTextFn(), new SingletonEmployeeFn(), true, true).split(0, p.getOptions());
for (BoundedSource<KV<Text, Employee>> source : sources) {
List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
for (KV<Text, Employee> elem : elems) {
Assert.assertSame(SingletonTextFn.TEXT, elem.getKey());
Assert.assertEquals(SingletonTextFn.TEXT, elem.getKey());
Assert.assertSame(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
Assert.assertEquals(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
}
}
// with skip clone 'false' it should produce different instances of value
sources = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), new SingletonTextFn(), new SingletonEmployeeFn(), false, false).split(0, p.getOptions());
for (BoundedSource<KV<Text, Employee>> source : sources) {
List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
for (KV<Text, Employee> elem : elems) {
Assert.assertNotSame(SingletonTextFn.TEXT, elem.getKey());
Assert.assertEquals(SingletonTextFn.TEXT, elem.getKey());
Assert.assertNotSame(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
Assert.assertEquals(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
}
}
}
use of org.apache.beam.sdk.io.hadoop.SerializableConfiguration in project beam by apache.
the class HadoopFormatIOReadTest method testReadBuildsCorrectlyIfWithConfigurationIsCalledMoreThanOneTime.
/**
* This test validates {@link HadoopFormatIO.Read Read} object creation if {@link
* HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} is
* called more than once.
*/
@Test
public void testReadBuildsCorrectlyIfWithConfigurationIsCalledMoreThanOneTime() {
SerializableConfiguration diffConf = loadTestConfiguration(EmployeeInputFormat.class, Employee.class, Text.class);
HadoopFormatIO.Read<String, String> read = HadoopFormatIO.<String, String>read().withConfiguration(serConf.get()).withKeyTranslation(myKeyTranslate).withConfiguration(diffConf.get());
assertEquals(diffConf.get(), read.getConfiguration().get());
assertEquals(myKeyTranslate, read.getKeyTranslationFunction());
assertEquals(null, read.getValueTranslationFunction());
assertEquals(myKeyTranslate.getOutputTypeDescriptor(), read.getKeyTypeDescriptor());
assertEquals(diffConf.get().getClass("value.class", Object.class), read.getValueTypeDescriptor().getRawType());
}
Aggregations