use of org.apache.hadoop.mapred.FileSplit in project hadoop by apache.
the class DumpTypedBytes method dumpTypedBytes.
/**
* Dump given list of files to standard output as typed bytes.
*/
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
JobConf job = new JobConf(getConf());
DataOutputStream dout = new DataOutputStream(System.out);
AutoInputFormat autoInputFormat = new AutoInputFormat();
for (FileStatus fileStatus : files) {
FileSplit split = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen() * fileStatus.getBlockSize(), (String[]) null);
RecordReader recReader = null;
try {
recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
Object key = recReader.createKey();
Object value = recReader.createValue();
while (recReader.next(key, value)) {
if (key instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) key);
} else {
TypedBytesOutput.get(dout).write(key);
}
if (value instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) value);
} else {
TypedBytesOutput.get(dout).write(value);
}
}
} finally {
if (recReader != null) {
recReader.close();
}
}
}
dout.flush();
return 0;
}
use of org.apache.hadoop.mapred.FileSplit in project flink by apache.
the class HadoopInputFormatTest method testCreateInputSplits.
@Test
public void testCreateInputSplits() throws Exception {
FileSplit[] result = new FileSplit[1];
result[0] = getFileSplit();
DummyInputFormat inputFormat = mock(DummyInputFormat.class);
when(inputFormat.getSplits(any(JobConf.class), anyInt())).thenReturn(result);
HadoopInputFormat<String, Long> hadoopInputFormat = new HadoopInputFormat<>(inputFormat, String.class, Long.class, new JobConf());
hadoopInputFormat.createInputSplits(2);
verify(inputFormat, times(1)).getSplits(any(JobConf.class), anyInt());
}
use of org.apache.hadoop.mapred.FileSplit in project trevni by cutting.
the class AvroTrevniInputFormat method getRecordReader.
@Override
public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException {
final FileSplit file = (FileSplit) split;
reporter.setStatus(file.toString());
final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
params.setModel(ReflectData.get());
if (job.get(AvroJob.INPUT_SCHEMA) != null)
params.setSchema(AvroJob.getInputSchema(job));
return new RecordReader<AvroWrapper<T>, NullWritable>() {
private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
private float rows = reader.getRowCount();
private long row;
public AvroWrapper<T> createKey() {
return new AvroWrapper<T>(null);
}
public NullWritable createValue() {
return NullWritable.get();
}
public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
if (!reader.hasNext())
return false;
wrapper.datum(reader.next());
row++;
return true;
}
public float getProgress() throws IOException {
return row / rows;
}
public long getPos() throws IOException {
return row;
}
public void close() throws IOException {
reader.close();
}
};
}
use of org.apache.hadoop.mapred.FileSplit in project hadoop-pcap by RIPE-NCC.
the class PcapInputFormat method getRecordReader.
@Override
public RecordReader<LongWritable, ObjectWritable> getRecordReader(InputSplit split, JobConf config, Reporter reporter) throws IOException {
FileSplit fileSplit = (FileSplit) split;
Path path = fileSplit.getPath();
LOG.info("Reading PCAP: " + path.toString());
long start = 0L;
long length = fileSplit.getLength();
return initPcapRecordReader(path, start, length, reporter, config);
}
use of org.apache.hadoop.mapred.FileSplit in project asterixdb by apache.
the class HDFSInputStream method nextInputSplit.
private boolean nextInputSplit() throws IOException {
for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
/**
* read all the partitions scheduled to the current node
*/
if (readSchedule[currentSplitIndex].equals(nodeName)) {
/**
* pick an unread split to read synchronize among
* simultaneous partitions in the same machine
*/
synchronized (read) {
if (read[currentSplitIndex] == false) {
read[currentSplitIndex] = true;
} else {
continue;
}
}
if (snapshot != null) {
String fileName = ((FileSplit) (inputSplits[currentSplitIndex])).getPath().toUri().getPath();
FileStatus fileStatus = hdfs.getFileStatus(new Path(fileName));
// Skip if not the same file stored in the files snapshot
if (fileStatus.getModificationTime() != snapshot.get(currentSplitIndex).getLastModefiedTime().getTime()) {
continue;
}
}
reader.close();
reader = getRecordReader(currentSplitIndex);
return true;
}
}
return false;
}
Aggregations