use of org.apache.hadoop.mapreduce.TaskAttemptContext in project eiger by wlloyd.
the class ColumnFamilyInputFormat method getRecordReader.
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
@Override
public void progress() {
reporter.progress();
}
};
ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
return recordReader;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.
the class StreamInputFormatTest method testStreamRecordReader.
@Test
public void testStreamRecordReader() throws Exception {
File inputDir = tmpFolder.newFolder();
File partition = new File(inputDir, "1.1000");
partition.mkdirs();
File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
// write 1 event
StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
writer.append(StreamFileTestUtils.createEvent(1000, "test"));
writer.flush();
// get splits from the input format. Expect to get 2 splits,
// one from 0 - some offset and one from offset - Long.MAX_VALUE.
Configuration conf = new Configuration();
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
AbstractStreamInputFormat format = new AbstractStreamInputFormat() {
@Override
public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
return new NoOpAuthorizer();
}
@Override
public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
return new AuthenticationTestContext();
}
};
List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
Assert.assertEquals(2, splits.size());
// write another event so that the 2nd split has something to read
writer.append(StreamFileTestUtils.createEvent(1001, "test"));
writer.close();
// create a record reader for the 2nd split
StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(new IdentityStreamEventDecoder(), new NoOpAuthorizer(), new AuthenticationTestContext(), DUMMY_ID);
recordReader.initialize(splits.get(1), context);
// check that we read the 2nd stream event
Assert.assertTrue(recordReader.nextKeyValue());
StreamEvent output = recordReader.getCurrentValue();
Assert.assertEquals(1001, output.getTimestamp());
Assert.assertEquals("test", Bytes.toString(output.getBody()));
// check that there is nothing more to read
Assert.assertFalse(recordReader.nextKeyValue());
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.
the class MultiWriter method write.
public void write(K key, V value) throws IOException, InterruptedException {
PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
if (rw == null) {
// if we don't have the record writer yet for the final path, create one and add it to the cache
TaskAttemptContext taskAttemptContext = getKeySpecificContext(partitionKey);
rw = getBaseRecordWriter(taskAttemptContext);
this.recordWriters.put(partitionKey, rw);
this.contexts.put(partitionKey, taskAttemptContext);
}
rw.write(key, value);
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.
the class MultipleOutputs method getContext.
// Create a taskAttemptContext for the named output with
// output format and output key/value types put in the context
private synchronized TaskAttemptContext getContext(String nameOutput) throws IOException {
TaskAttemptContext taskContext = taskContexts.get(nameOutput);
if (taskContext != null) {
return taskContext;
}
taskContext = getNamedTaskContext(context, nameOutput);
taskContexts.put(nameOutput, taskContext);
return taskContext;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.
the class MultipleOutputsCommitter method abortTask.
@Override
public void abortTask(TaskAttemptContext taskContext) throws IOException {
for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
TaskAttemptContext namedTaskContext = MultipleOutputs.getNamedTaskContext(taskContext, committer.getKey());
committer.getValue().abortTask(namedTaskContext);
}
}
Aggregations