Search in sources :

Example 51 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project eiger by wlloyd.

the class ColumnFamilyInputFormat method getRecordReader.

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };
    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}
Also used : org.apache.hadoop.mapreduce(org.apache.hadoop.mapreduce) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 52 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.

the class StreamInputFormatTest method testStreamRecordReader.

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();
    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
    AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
    AbstractStreamInputFormat format = new AbstractStreamInputFormat() {

        @Override
        public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
            return new NoOpAuthorizer();
        }

        @Override
        public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
            return new AuthenticationTestContext();
        }
    };
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());
    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();
    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(new IdentityStreamEventDecoder(), new NoOpAuthorizer(), new AuthenticationTestContext(), DUMMY_ID);
    recordReader.initialize(splits.get(1), context);
    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) AuthenticationTestContext(co.cask.cdap.security.auth.context.AuthenticationTestContext) NoOpAuthorizer(co.cask.cdap.security.spi.authorization.NoOpAuthorizer) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) IdentityStreamEventDecoder(co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JobID(org.apache.hadoop.mapred.JobID) Test(org.junit.Test)

Example 53 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.

the class MultiWriter method write.

public void write(K key, V value) throws IOException, InterruptedException {
    PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
    RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
    if (rw == null) {
        // if we don't have the record writer yet for the final path, create one and add it to the cache
        TaskAttemptContext taskAttemptContext = getKeySpecificContext(partitionKey);
        rw = getBaseRecordWriter(taskAttemptContext);
        this.recordWriters.put(partitionKey, rw);
        this.contexts.put(partitionKey, taskAttemptContext);
    }
    rw.write(key, value);
}
Also used : PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 54 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.

the class MultipleOutputs method getContext.

// Create a taskAttemptContext for the named output with
// output format and output key/value types put in the context
private synchronized TaskAttemptContext getContext(String nameOutput) throws IOException {
    TaskAttemptContext taskContext = taskContexts.get(nameOutput);
    if (taskContext != null) {
        return taskContext;
    }
    taskContext = getNamedTaskContext(context, nameOutput);
    taskContexts.put(nameOutput, taskContext);
    return taskContext;
}
Also used : TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 55 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project cdap by caskdata.

the class MultipleOutputsCommitter method abortTask.

@Override
public void abortTask(TaskAttemptContext taskContext) throws IOException {
    for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
        TaskAttemptContext namedTaskContext = MultipleOutputs.getNamedTaskContext(taskContext, committer.getKey());
        committer.getValue().abortTask(namedTaskContext);
    }
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Map(java.util.Map)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10