Search in sources :

Example 46 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project jena by apache.

the class AbstractNodeTupleInputFormatTests method testMultipleInputs.

/**
     * Runs a multiple input test
     * 
     * @param inputs
     *            Inputs
     * @param expectedSplits
     *            Number of splits expected
     * @param expectedTuples
     *            Number of tuples expected
     * @throws IOException
     * @throws InterruptedException
     */
protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException, InterruptedException {
    // Prepare configuration and inputs
    Configuration config = this.prepareConfiguration();
    // Set up fake job
    InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
    Job job = Job.getInstance(config);
    job.setInputFormatClass(inputFormat.getClass());
    for (File input : inputs) {
        this.addInputPath(input, job.getConfiguration(), job);
    }
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length);
    NLineInputFormat.setNumLinesPerSplit(job, expectedTuples);
    // Check splits
    List<InputSplit> splits = inputFormat.getSplits(context);
    Assert.assertEquals(expectedSplits, splits.size());
    // Check tuples
    int count = 0;
    for (InputSplit split : splits) {
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
        RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
        reader.initialize(split, taskContext);
        count += this.countTuples(reader);
    }
    Assert.assertEquals(expectedTuples, count);
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 47 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project cdap by caskdata.

the class StreamInputFormatTest method testFormatStreamRecordReader.

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();
    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
    AbstractStreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    AbstractStreamInputFormat format = new AbstractStreamInputFormat() {

        @Override
        public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
            return new NoOpAuthorizer();
        }

        @Override
        public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
            return new AuthenticationTestContext();
        }
    };
    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format.createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }
    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}
Also used : TextRecordFormat(co.cask.cdap.format.TextRecordFormat) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) AuthenticationTestContext(co.cask.cdap.security.auth.context.AuthenticationTestContext) NoOpAuthorizer(co.cask.cdap.security.spi.authorization.NoOpAuthorizer) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) GenericStreamEventData(co.cask.cdap.api.stream.GenericStreamEventData) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)47 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)38 Configuration (org.apache.hadoop.conf.Configuration)35 File (java.io.File)25 Job (org.apache.hadoop.mapreduce.Job)23 Path (org.apache.hadoop.fs.Path)22 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)22 JobContext (org.apache.hadoop.mapreduce.JobContext)21 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 Test (org.junit.Test)17 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)12 LongWritable (org.apache.hadoop.io.LongWritable)11 IOException (java.io.IOException)10 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 MapFile (org.apache.hadoop.io.MapFile)9 NullWritable (org.apache.hadoop.io.NullWritable)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FileAttribute (java.nio.file.attribute.FileAttribute)4 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)4