use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project hadoop by apache.
the class StreamInputFormat method createRecordReader.
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
String c = conf.get("stream.recordreader.class");
if (c == null || c.indexOf("LineRecordReader") >= 0) {
return super.createRecordReader(genericSplit, context);
}
// handling non-standard record reader (likely StreamXmlRecordReader)
FileSplit split = (FileSplit) genericSplit;
// LOG.info("getRecordReader start.....split=" + split);
context.setStatus(split.toString());
context.progress();
// Open the file and seek to the start of the split
FileSystem fs = split.getPath().getFileSystem(conf);
FSDataInputStream in = fs.open(split.getPath());
// Factory dispatch based on available params..
Class readerClass;
{
readerClass = StreamUtil.goodClassOrNull(conf, c, null);
if (readerClass == null) {
throw new RuntimeException("Class not found: " + c);
}
}
Constructor ctor;
try {
ctor = readerClass.getConstructor(new Class[] { FSDataInputStream.class, FileSplit.class, TaskAttemptContext.class, Configuration.class, FileSystem.class });
} catch (NoSuchMethodException nsm) {
throw new RuntimeException(nsm);
}
RecordReader<Text, Text> reader;
try {
reader = (RecordReader<Text, Text>) ctor.newInstance(new Object[] { in, split, context, conf, fs });
} catch (Exception nsm) {
throw new RuntimeException(nsm);
}
return reader;
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project hadoop by apache.
the class TestUniformSizeInputFormat method checkSplits.
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
long lastEnd = 0;
//we are not missing anything
for (InputSplit split : splits) {
FileSplit fileSplit = (FileSplit) split;
long start = fileSplit.getStart();
Assert.assertEquals(lastEnd, start);
lastEnd = start + fileSplit.getLength();
}
//Verify there is nothing more to read from the input file
SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem().getConf(), SequenceFile.Reader.file(listFile));
try {
reader.seek(lastEnd);
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
} finally {
IOUtils.closeStream(reader);
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project hadoop by apache.
the class TestJobSplitWriter method testMaxBlockLocationsNewSplits.
@Test
public void testMaxBlockLocationsNewSplits() throws Exception {
TEST_DIR.mkdirs();
try {
Configuration conf = new Configuration();
conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4);
Path submitDir = new Path(TEST_DIR.getAbsolutePath());
FileSystem fs = FileSystem.getLocal(conf);
FileSplit split = new FileSplit(new Path("/some/path"), 0, 1, new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" });
JobSplitWriter.createSplitFiles(submitDir, conf, fs, new FileSplit[] { split });
JobSplit.TaskSplitMetaInfo[] infos = SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf, submitDir);
assertEquals("unexpected number of splits", 1, infos.length);
assertEquals("unexpected number of split locations", 4, infos[0].getLocations().length);
} finally {
FileUtil.fullyDelete(TEST_DIR);
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project druid by druid-io.
the class DruidOrcInputFormatTest method setUp.
@Before
public void setUp() throws IOException {
Configuration conf = new Configuration();
job = Job.getInstance(conf);
config = HadoopDruidIndexerConfig.fromFile(new File("example/hadoop_orc_job.json"));
config.intoConfiguration(job);
testFile = makeOrcFile();
path = new Path(testFile.getAbsoluteFile().toURI());
split = new FileSplit(path, 0, testFile.length(), null);
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project pinot by linkedin.
the class DelegatingAvroKeyInputFormat method createRecordReader.
public org.apache.hadoop.mapreduce.RecordReader<org.apache.avro.mapred.AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
LOGGER.info("DelegatingAvroKeyInputFormat.createRecordReader() for split:{}", split);
FileSplit fileSplit = (FileSplit) split;
Configuration configuration = context.getConfiguration();
String sourceName = getSourceNameFromPath(fileSplit, configuration);
LOGGER.info("Source Name for path {} : {}", fileSplit.getPath(), sourceName);
Map<String, String> schemaJSONMapping = new ObjectMapper().readValue(configuration.get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
String sourceSchemaJSON = schemaJSONMapping.get(sourceName);
Schema schema = new Schema.Parser().parse(sourceSchemaJSON);
return new AvroKeyRecordReader<T>(schema);
}
Aggregations