use of org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.HadoopInputFormatBoundedSource in project beam by apache.
the class HadoopFormatIOReadTest method testSkipKeyValueClone.
/**
* This test validates that in case reader is instructed to not to clone key value records, then
* key value records are exactly the same as output from the source no mater if they are mutable
* or immutable. This override setting is useful to turn on when using key-value translation
* functions and avoid possibly unnecessary copy.
*/
@Test
public void testSkipKeyValueClone() throws Exception {
SerializableConfiguration serConf = loadTestConfiguration(EmployeeInputFormat.class, Text.class, Employee.class);
// with skip clone 'true' it should produce different instances of key/value
List<BoundedSource<KV<Text, Employee>>> sources = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), new SingletonTextFn(), new SingletonEmployeeFn(), true, true).split(0, p.getOptions());
for (BoundedSource<KV<Text, Employee>> source : sources) {
List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
for (KV<Text, Employee> elem : elems) {
Assert.assertSame(SingletonTextFn.TEXT, elem.getKey());
Assert.assertEquals(SingletonTextFn.TEXT, elem.getKey());
Assert.assertSame(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
Assert.assertEquals(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
}
}
// with skip clone 'false' it should produce different instances of value
sources = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), new SingletonTextFn(), new SingletonEmployeeFn(), false, false).split(0, p.getOptions());
for (BoundedSource<KV<Text, Employee>> source : sources) {
List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
for (KV<Text, Employee> elem : elems) {
Assert.assertNotSame(SingletonTextFn.TEXT, elem.getKey());
Assert.assertEquals(SingletonTextFn.TEXT, elem.getKey());
Assert.assertNotSame(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
Assert.assertEquals(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
}
}
}
use of org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.HadoopInputFormatBoundedSource in project beam by apache.
the class HadoopFormatIOReadTest method testReadDisplayData.
/**
* This test validates functionality of {@link
* HadoopInputFormatBoundedSource#populateDisplayData(DisplayData.Builder)}
* populateDisplayData(DisplayData.Builder)}.
*/
@Test
public void testReadDisplayData() {
HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, new SerializableSplit(), false, false);
DisplayData displayData = DisplayData.from(boundedSource);
assertThat(displayData, hasDisplayItem("mapreduce.job.inputformat.class", serConf.get().get("mapreduce.job.inputformat.class")));
assertThat(displayData, hasDisplayItem("key.class", serConf.get().get("key.class")));
assertThat(displayData, hasDisplayItem("value.class", serConf.get().get("value.class")));
}
use of org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.HadoopInputFormatBoundedSource in project beam by apache.
the class HadoopFormatIOReadTest method testReadIfCreateRecordReaderFails.
/**
* This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object
* creation fails.
*/
@Test
public void testReadIfCreateRecordReaderFails() throws Exception {
thrown.expect(Exception.class);
thrown.expectMessage("Exception in creating RecordReader");
InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))).thenThrow(new IOException("Exception in creating RecordReader"));
HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, new SerializableSplit(), false, false);
boundedSource.setInputFormatObj(mockInputFormat);
SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}
use of org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.HadoopInputFormatBoundedSource in project beam by apache.
the class HadoopFormatIOReadTest method testGetFractionConsumedForBadProgressValue.
/**
* This test validates the method getFractionConsumed()- when a bad progress value is returned by
* the inputformat.
*/
@Test
public void testGetFractionConsumedForBadProgressValue() throws Exception {
InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader);
Mockito.when(mockReader.nextKeyValue()).thenReturn(true);
// Set to a bad value , not in range of 0 to 1
Mockito.when(mockReader.getProgress()).thenReturn(2.0F);
InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, new SerializableSplit(mockInputSplit), false, false);
boundedSource.setInputFormatObj(mockInputFormat);
BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions());
assertEquals(Double.valueOf(0), reader.getFractionConsumed());
boolean start = reader.start();
assertTrue(start);
if (start) {
boolean advance = reader.advance();
assertEquals(null, reader.getFractionConsumed());
assertTrue(advance);
if (advance) {
advance = reader.advance();
assertEquals(null, reader.getFractionConsumed());
}
}
// Validate if getFractionConsumed() returns null after few number of reads as getProgress
// returns invalid value '2' which is not in the range of 0 to 1.
assertEquals(null, reader.getFractionConsumed());
reader.close();
}
use of org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.HadoopInputFormatBoundedSource in project beam by apache.
the class HadoopFormatIOReadTest method testReadWithNullCreateRecordReader.
/**
* This test validates behavior of HadoopInputFormatSource if {@link
* InputFormat#createRecordReader(InputSplit, TaskAttemptContext)} createRecordReader(InputSplit,
* TaskAttemptContext)} of InputFormat returns null.
*/
@Test
public void testReadWithNullCreateRecordReader() throws Exception {
InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
thrown.expect(IOException.class);
thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass()));
Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))).thenReturn(null);
HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, new SerializableSplit(), false, false);
boundedSource.setInputFormatObj(mockInputFormat);
SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}
Aggregations