use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestFileOutputFormat method testCheckOutputSpecsException.
public void testCheckOutputSpecsException() throws Exception {
Job job = Job.getInstance();
Path outDir = new Path(System.getProperty("test.build.data", "/tmp"), "output");
FileSystem fs = outDir.getFileSystem(new Configuration());
// Create the output dir so it already exists and set it for the job
fs.mkdirs(outDir);
FileOutputFormat.setOutputPath(job, outDir);
// We don't need a "full" implementation of FileOutputFormat for this test
FileOutputFormat fof = new FileOutputFormat() {
@Override
public RecordWriter getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
return null;
}
};
try {
try {
// This should throw a FileAlreadyExistsException because the outputDir
// already exists
fof.checkOutputSpecs(job);
fail("Should have thrown a FileAlreadyExistsException");
} catch (FileAlreadyExistsException re) {
// correct behavior
}
} finally {
// Cleanup
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.
the class MultiHFileOutputFormat method createMultiHFileRecordWriter.
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createMultiHFileRecordWriter(final TaskAttemptContext context) throws IOException {
// Get the path of the output directory
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputDir.getFileSystem(conf);
// Map of tables to writers
final Map<ImmutableBytesWritable, RecordWriter<ImmutableBytesWritable, V>> tableWriters = new HashMap<>();
return new RecordWriter<ImmutableBytesWritable, V>() {
@Override
public void write(ImmutableBytesWritable tableName, V cell) throws IOException, InterruptedException {
RecordWriter<ImmutableBytesWritable, V> tableWriter = tableWriters.get(tableName);
// if there is new table, verify that table directory exists
if (tableWriter == null) {
// using table name as directory name
final Path tableOutputDir = new Path(outputDir, Bytes.toString(tableName.copyBytes()));
fs.mkdirs(tableOutputDir);
LOG.info("Writing Table '" + tableName.toString() + "' data into following directory" + tableOutputDir.toString());
// Create writer for one specific table
tableWriter = new HFileOutputFormat2.HFileRecordWriter<>(context, tableOutputDir);
// Put table into map
tableWriters.put(tableName, tableWriter);
}
// Write <Row, Cell> into tableWriter
// in the original code, it does not use Row
tableWriter.write(null, cell);
}
@Override
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
for (RecordWriter<ImmutableBytesWritable, V> writer : tableWriters.values()) {
writer.close(c);
}
}
};
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.
the class MultiTableInputFormatBase method createRecordReader.
/**
* Builds a TableRecordReader. If no TableRecordReader was provided, uses the
* default.
*
* @param split The split to work with.
* @param context The current context.
* @return The newly created record reader.
* @throws IOException When creating the reader fails.
* @throws InterruptedException when record reader initialization fails
* @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
* org.apache.hadoop.mapreduce.InputSplit,
* org.apache.hadoop.mapreduce.TaskAttemptContext)
*/
@Override
public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
TableSplit tSplit = (TableSplit) split;
LOG.info(MessageFormat.format("Input split length: {0} bytes.", tSplit.getLength()));
if (tSplit.getTable() == null) {
throw new IOException("Cannot create a record reader because of a" + " previous error. Please look at the previous logs lines from" + " the task's full log for more details.");
}
final Connection connection = ConnectionFactory.createConnection(context.getConfiguration());
Table table = connection.getTable(tSplit.getTable());
if (this.tableRecordReader == null) {
this.tableRecordReader = new TableRecordReader();
}
final TableRecordReader trr = this.tableRecordReader;
try {
Scan sc = tSplit.getScan();
sc.setStartRow(tSplit.getStartRow());
sc.setStopRow(tSplit.getEndRow());
trr.setScan(sc);
trr.setTable(table);
return new RecordReader<ImmutableBytesWritable, Result>() {
@Override
public void close() throws IOException {
trr.close();
connection.close();
}
@Override
public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
return trr.getCurrentKey();
}
@Override
public Result getCurrentValue() throws IOException, InterruptedException {
return trr.getCurrentValue();
}
@Override
public float getProgress() throws IOException, InterruptedException {
return trr.getProgress();
}
@Override
public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException {
trr.initialize(inputsplit, context);
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
return trr.nextKeyValue();
}
};
} catch (IOException ioe) {
// If there is an exception make sure that all
// resources are closed and released.
trr.close();
connection.close();
throw ioe;
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class HiveHBaseTableInputFormat method getRecordReader.
@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
HBaseSplit hbaseSplit = (HBaseSplit) split;
TableSplit tableSplit = hbaseSplit.getTableSplit();
if (conn == null) {
conn = ConnectionFactory.createConnection(HBaseConfiguration.create(jobConf));
}
initializeTable(conn, tableSplit.getTable());
setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
Job job = new Job(jobConf);
TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);
final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(tableSplit, tac);
try {
recordReader.initialize(tableSplit, tac);
} catch (InterruptedException e) {
// Free up the HTable connections
closeTable();
if (conn != null) {
conn.close();
conn = null;
}
throw new IOException("Failed to initialize RecordReader", e);
}
return new RecordReader<ImmutableBytesWritable, ResultWritable>() {
@Override
public void close() throws IOException {
recordReader.close();
closeTable();
if (conn != null) {
conn.close();
conn = null;
}
}
@Override
public ImmutableBytesWritable createKey() {
return new ImmutableBytesWritable();
}
@Override
public ResultWritable createValue() {
return new ResultWritable(new Result());
}
@Override
public long getPos() throws IOException {
return 0;
}
@Override
public float getProgress() throws IOException {
float progress = 0.0F;
try {
progress = recordReader.getProgress();
} catch (InterruptedException e) {
throw new IOException(e);
}
return progress;
}
@Override
public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
boolean next = false;
try {
next = recordReader.nextKeyValue();
if (next) {
rowKey.set(recordReader.getCurrentValue().getRow());
value.setResult(recordReader.getCurrentValue());
}
} catch (InterruptedException e) {
throw new IOException(e);
}
return next;
}
};
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hbase by apache.
the class TestTableSnapshotInputFormat method verifyWithMockedMapReduce.
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
List<InputSplit> splits = tsif.getSplits(job);
Assert.assertEquals(expectedNumSplits, splits.size());
HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
for (int i = 0; i < splits.size(); i++) {
// validate input split
InputSplit split = splits.get(i);
Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
// validate record reader
TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext);
rr.initialize(split, taskAttemptContext);
// validate we can read all the data back
while (rr.nextKeyValue()) {
byte[] row = rr.getCurrentKey().get();
verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
rowTracker.addRow(row);
}
rr.close();
}
// validate all rows are seen
rowTracker.validate();
}
Aggregations