use of org.apache.hadoop.mapreduce.RecordReader in project druid by druid-io.
the class BaseParquetInputTest method getFirstRow.
static Object getFirstRow(Job job, String parserType, String parquetPath) throws IOException, InterruptedException {
File testFile = new File(parquetPath);
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), null);
InputFormat inputFormat = ReflectionUtils.newInstance(INPUT_FORMAT_CLASSES.get(parserType), job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
reader.initialize(split, context);
reader.nextKeyValue();
return reader.getCurrentValue();
}
}
use of org.apache.hadoop.mapreduce.RecordReader in project mongo-hadoop by mongodb.
the class GridFSInputFormatTest method testRecordReader.
@Test
public void testRecordReader() throws IOException, InterruptedException {
List<InputSplit> splits = getSplits();
Configuration conf = getConfiguration();
// Split README by sections in Markdown.
MongoConfigUtil.setGridFSDelimiterPattern(conf, "#+");
TaskAttemptContext context = mockTaskAttemptContext(conf);
List<String> sections = new ArrayList<String>();
for (InputSplit split : splits) {
RecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
reader.initialize(split, context);
while (reader.nextKeyValue()) {
sections.add(reader.getCurrentValue().toString());
}
}
assertEquals(Arrays.asList(readmeSections), sections);
}
use of org.apache.hadoop.mapreduce.RecordReader in project elephant-bird by twitter.
the class TestLocationAsTuple method testSimpleLoad.
@Test
public void testSimpleLoad() throws IOException {
Configuration conf = new Configuration();
Job job = EasyMock.createMock(Job.class);
EasyMock.expect(HadoopCompat.getConfiguration(job)).andStubReturn(conf);
EasyMock.replay(job);
LoadFunc loader = new LocationAsTuple();
loader.setUDFContextSignature("foo");
loader.setLocation("a\tb", job);
RecordReader reader = EasyMock.createMock(RecordReader.class);
PigSplit split = EasyMock.createMock(PigSplit.class);
EasyMock.expect(split.getConf()).andStubReturn(conf);
loader.prepareToRead(reader, split);
Tuple next = loader.getNext();
assertEquals("a", next.get(0));
assertEquals("b", next.get(1));
}
use of org.apache.hadoop.mapreduce.RecordReader in project elephant-bird by twitter.
the class TestLocationAsTuple method testTokenizedLoad.
@Test
public void testTokenizedLoad() throws IOException {
Configuration conf = new Configuration();
Job job = EasyMock.createMock(Job.class);
EasyMock.expect(HadoopCompat.getConfiguration(job)).andStubReturn(conf);
EasyMock.replay(job);
LoadFunc loader = new LocationAsTuple(",");
loader.setUDFContextSignature("foo");
loader.setLocation("a,b\tc", job);
RecordReader reader = EasyMock.createMock(RecordReader.class);
PigSplit split = EasyMock.createMock(PigSplit.class);
EasyMock.expect(split.getConf()).andStubReturn(conf);
loader.prepareToRead(reader, split);
Tuple next = loader.getNext();
assertEquals("a", next.get(0));
assertEquals("b\tc", next.get(1));
}
Aggregations