use of org.apache.hadoop.mapred.RecordReader in project hadoop by apache.
the class DumpTypedBytes method dumpTypedBytes.
/**
* Dump given list of files to standard output as typed bytes.
*/
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
JobConf job = new JobConf(getConf());
DataOutputStream dout = new DataOutputStream(System.out);
AutoInputFormat autoInputFormat = new AutoInputFormat();
for (FileStatus fileStatus : files) {
FileSplit split = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen() * fileStatus.getBlockSize(), (String[]) null);
RecordReader recReader = null;
try {
recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
Object key = recReader.createKey();
Object value = recReader.createValue();
while (recReader.next(key, value)) {
if (key instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) key);
} else {
TypedBytesOutput.get(dout).write(key);
}
if (value instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) value);
} else {
TypedBytesOutput.get(dout).write(value);
}
}
} finally {
if (recReader != null) {
recReader.close();
}
}
}
dout.flush();
return 0;
}
use of org.apache.hadoop.mapred.RecordReader in project hadoop by apache.
the class TestAutoInputFormat method testFormat.
@SuppressWarnings({ "unchecked", "deprecation" })
@Test
public void testFormat() throws IOException {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path txtFile = new Path(dir, "auto.txt");
Path seqFile = new Path(dir, "auto.seq");
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
Writer txtWriter = new OutputStreamWriter(fs.create(txtFile));
try {
for (int i = 0; i < LINES_COUNT; i++) {
txtWriter.write("" + (10 * i));
txtWriter.write("\n");
}
} finally {
txtWriter.close();
}
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, seqFile, IntWritable.class, LongWritable.class);
try {
for (int i = 0; i < RECORDS_COUNT; i++) {
IntWritable key = new IntWritable(11 * i);
LongWritable value = new LongWritable(12 * i);
seqWriter.append(key, value);
}
} finally {
seqWriter.close();
}
AutoInputFormat format = new AutoInputFormat();
InputSplit[] splits = format.getSplits(job, SPLITS_COUNT);
for (InputSplit split : splits) {
RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
Object key = reader.createKey();
Object value = reader.createValue();
try {
while (reader.next(key, value)) {
if (key instanceof LongWritable) {
assertEquals("Wrong value class.", Text.class, value.getClass());
assertTrue("Invalid value", Integer.parseInt(((Text) value).toString()) % 10 == 0);
} else {
assertEquals("Wrong key class.", IntWritable.class, key.getClass());
assertEquals("Wrong value class.", LongWritable.class, value.getClass());
assertTrue("Invalid key.", ((IntWritable) key).get() % 11 == 0);
assertTrue("Invalid value.", ((LongWritable) value).get() % 12 == 0);
}
}
} finally {
reader.close();
}
}
}
use of org.apache.hadoop.mapred.RecordReader in project hive by apache.
the class HiveHBaseTableInputFormat method getRecordReader.
@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
HBaseSplit hbaseSplit = (HBaseSplit) split;
TableSplit tableSplit = hbaseSplit.getTableSplit();
if (conn == null) {
conn = ConnectionFactory.createConnection(HBaseConfiguration.create(jobConf));
}
initializeTable(conn, tableSplit.getTable());
setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
Job job = new Job(jobConf);
TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);
final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(tableSplit, tac);
try {
recordReader.initialize(tableSplit, tac);
} catch (InterruptedException e) {
// Free up the HTable connections
closeTable();
if (conn != null) {
conn.close();
conn = null;
}
throw new IOException("Failed to initialize RecordReader", e);
}
return new RecordReader<ImmutableBytesWritable, ResultWritable>() {
@Override
public void close() throws IOException {
recordReader.close();
closeTable();
if (conn != null) {
conn.close();
conn = null;
}
}
@Override
public ImmutableBytesWritable createKey() {
return new ImmutableBytesWritable();
}
@Override
public ResultWritable createValue() {
return new ResultWritable(new Result());
}
@Override
public long getPos() throws IOException {
return 0;
}
@Override
public float getProgress() throws IOException {
float progress = 0.0F;
try {
progress = recordReader.getProgress();
} catch (InterruptedException e) {
throw new IOException(e);
}
return progress;
}
@Override
public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
boolean next = false;
try {
next = recordReader.nextKeyValue();
if (next) {
rowKey.set(recordReader.getCurrentValue().getRow());
value.setResult(recordReader.getCurrentValue());
}
} catch (InterruptedException e) {
throw new IOException(e);
}
return next;
}
};
}
use of org.apache.hadoop.mapred.RecordReader in project hive by apache.
the class HiveContextAwareRecordReader method initIOContext.
public void initIOContext(FileSplit split, JobConf job, Class inputFormatClass, RecordReader recordReader) throws IOException {
boolean blockPointer = false;
long blockStart = -1;
FileSplit fileSplit = split;
Path path = fileSplit.getPath();
FileSystem fs = path.getFileSystem(job);
if (inputFormatClass.getName().contains("SequenceFile")) {
SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
blockPointer = in.isBlockCompressed();
in.sync(fileSplit.getStart());
blockStart = in.getPosition();
in.close();
} else if (recordReader instanceof RCFileRecordReader) {
blockPointer = true;
blockStart = ((RCFileRecordReader) recordReader).getStart();
} else if (inputFormatClass.getName().contains("RCFile")) {
blockPointer = true;
RCFile.Reader in = new RCFile.Reader(fs, path, job);
in.sync(fileSplit.getStart());
blockStart = in.getPosition();
in.close();
}
this.jobConf = job;
this.initIOContext(blockStart, blockPointer, path.makeQualified(fs));
this.initIOContextSortedProps(split, recordReader, job);
}
use of org.apache.hadoop.mapred.RecordReader in project hive by apache.
the class SymlinkTextInputFormat method getRecordReader.
@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
InputSplit targetSplit = ((SymlinkTextInputSplit) split).getTargetSplit();
// The target data is in TextInputFormat.
TextInputFormat inputFormat = new TextInputFormat();
inputFormat.configure(job);
RecordReader innerReader = null;
try {
innerReader = inputFormat.getRecordReader(targetSplit, job, reporter);
} catch (Exception e) {
innerReader = HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(e, job);
}
HiveRecordReader rr = new HiveRecordReader(innerReader, job);
rr.initIOContext((FileSplit) targetSplit, job, TextInputFormat.class, innerReader);
return rr;
}
Aggregations