Search in sources :

Example 11 with MapContextImpl

use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.

the class TestJoinProperties method testFormat.

@SuppressWarnings("unchecked")
public int testFormat(Configuration conf, int tupleSize, boolean firstTuple, boolean secondTuple, TestType ttype) throws Exception {
    Job job = Job.getInstance(conf);
    CompositeInputFormat format = new CompositeInputFormat();
    int count = 0;
    for (InputSplit split : (List<InputSplit>) format.getSplits(job)) {
        TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(conf);
        RecordReader reader = format.createRecordReader(split, context);
        MapContext mcontext = new MapContextImpl(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        WritableComparable key = null;
        Writable value = null;
        while (reader.nextKeyValue()) {
            key = (WritableComparable) reader.getCurrentKey();
            value = (Writable) reader.getCurrentValue();
            validateKeyValue(key, value, tupleSize, firstTuple, secondTuple, ttype);
            count++;
        }
    }
    return count;
}
Also used : MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) WritableComparable(org.apache.hadoop.io.WritableComparable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) List(java.util.List)

Example 12 with MapContextImpl

use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.

the class TestGridMixClasses method testLoadMapper.

/*
   * test LoadMapper loadMapper should write to writer record for each reduce
   */
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test(timeout = 10000)
public void testLoadMapper() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(JobContext.NUM_REDUCES, 2);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    TaskAttemptID taskId = new TaskAttemptID();
    RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();
    LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();
    OutputCommitter committer = new CustomOutputCommitter();
    StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
    LoadSplit split = getLoadSplit();
    MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(conf, taskId, reader, writer, committer, reporter, split);
    // context
    Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>().getMapContext(mapContext);
    reader.initialize(split, ctx);
    ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    CompressionEmulationUtil.setCompressionEmulationEnabled(ctx.getConfiguration(), true);
    LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
    // setup, map, clean
    mapper.run(ctx);
    Map<GridmixKey, GridmixRecord> data = writer.getData();
    // check result
    assertEquals(2, data.size());
}
Also used : Context(org.apache.hadoop.mapreduce.Mapper.Context) ReduceContext(org.apache.hadoop.mapreduce.ReduceContext) MapContext(org.apache.hadoop.mapreduce.MapContext) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapred.JobContext) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DummyReporter(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter) NullWritable(org.apache.hadoop.io.NullWritable) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Example 13 with MapContextImpl

use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.

the class MapTask method runNewMapper.

@SuppressWarnings("unchecked")
private <INKEY, INVALUE, OUTKEY, OUTVALUE> void runNewMapper(final JobConf job, final TaskSplitIndex splitIndex, final TaskUmbilicalProtocol umbilical, TaskReporter reporter) throws IOException, ClassNotFoundException, InterruptedException {
    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job, getTaskID(), reporter);
    // make a mapper
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
    // make the input format
    org.apache.hadoop.mapreduce.InputFormat<INKEY, INVALUE> inputFormat = (org.apache.hadoop.mapreduce.InputFormat<INKEY, INVALUE>) ReflectionUtils.newInstance(taskContext.getInputFormatClass(), job);
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    split = getSplitDetails(new Path(splitIndex.getSplitLocation()), splitIndex.getStartOffset());
    LOG.info("Processing split: " + split);
    org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = new NewTrackingRecordReader<INKEY, INVALUE>(split, inputFormat, reporter, taskContext);
    job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
    org.apache.hadoop.mapreduce.RecordWriter output = null;
    // get an output object
    if (job.getNumReduceTasks() == 0) {
        output = new NewDirectOutputCollector(taskContext, job, umbilical, reporter);
    } else {
        output = new NewOutputCollector(taskContext, job, umbilical, reporter);
    }
    org.apache.hadoop.mapreduce.MapContext<INKEY, INVALUE, OUTKEY, OUTVALUE> mapContext = new MapContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, getTaskID(), input, output, committer, reporter, split);
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context<INKEY, INVALUE, OUTKEY, OUTVALUE> mapperContext = new WrappedMapper<INKEY, INVALUE, OUTKEY, OUTVALUE>().getMapContext(mapContext);
    try {
        input.initialize(split, mapperContext);
        mapper.run(mapperContext);
        mapPhase.complete();
        setPhase(TaskStatus.Phase.SORT);
        statusUpdate(umbilical);
        input.close();
        input = null;
        output.close(mapperContext);
        output = null;
    } finally {
        closeQuietly(input);
        closeQuietly(output, mapperContext);
    }
}
Also used : MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Path(org.apache.hadoop.fs.Path)

Example 14 with MapContextImpl

use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.

the class TestFixedLengthInputFormat method runRandomTests.

private void runRandomTests(CompressionCodec codec) throws Exception {
    StringBuilder fileName = new StringBuilder("testFormat.txt");
    if (codec != null) {
        fileName.append(".gz");
    }
    localFs.delete(workDir, true);
    Path file = new Path(workDir, fileName.toString());
    int seed = new Random().nextInt();
    LOG.info("Seed = " + seed);
    Random random = new Random(seed);
    int MAX_TESTS = 20;
    LongWritable key;
    BytesWritable value;
    for (int i = 0; i < MAX_TESTS; i++) {
        LOG.info("----------------------------------------------------------");
        // Maximum total records of 999
        int totalRecords = random.nextInt(999) + 1;
        // Test an empty file
        if (i == 8) {
            totalRecords = 0;
        }
        // Maximum bytes in a record of 100K
        int recordLength = random.nextInt(1024 * 100) + 1;
        // For the 11th test, force a record length of 1
        if (i == 10) {
            recordLength = 1;
        }
        // The total bytes in the test file
        int fileSize = (totalRecords * recordLength);
        LOG.info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
        // Create the job 
        Job job = Job.getInstance(defaultConf);
        if (codec != null) {
            ReflectionUtils.setConf(codec, job.getConfiguration());
        }
        // Create the test file
        ArrayList<String> recordList = createFile(file, codec, recordLength, totalRecords);
        assertTrue(localFs.exists(file));
        //set the fixed length record length config property for the job
        FixedLengthInputFormat.setRecordLength(job.getConfiguration(), recordLength);
        int numSplits = 1;
        // Arbitrarily set number of splits.
        if (i > 0) {
            if (i == (MAX_TESTS - 1)) {
                // Test a split size that is less than record len
                numSplits = (int) (fileSize / Math.floor(recordLength / 2));
            } else {
                if (MAX_TESTS % i == 0) {
                    // Let us create a split size that is forced to be 
                    // smaller than the end file itself, (ensures 1+ splits)
                    numSplits = fileSize / (fileSize - random.nextInt(fileSize));
                } else {
                    // Just pick a random split size with no upper bound 
                    numSplits = Math.max(1, fileSize / random.nextInt(Integer.MAX_VALUE));
                }
            }
            LOG.info("Number of splits set to: " + numSplits);
        }
        job.getConfiguration().setLong("mapreduce.input.fileinputformat.split.maxsize", (long) (fileSize / numSplits));
        // setup the input path
        FileInputFormat.setInputPaths(job, workDir);
        // Try splitting the file in a variety of sizes
        FixedLengthInputFormat format = new FixedLengthInputFormat();
        List<InputSplit> splits = format.getSplits(job);
        LOG.info("Actual number of splits = " + splits.size());
        // Test combined split lengths = total file size
        long recordOffset = 0;
        int recordNumber = 0;
        for (InputSplit split : splits) {
            TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
            RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
            MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
            reader.initialize(split, mcontext);
            Class<?> clazz = reader.getClass();
            assertEquals("RecordReader class should be FixedLengthRecordReader:", FixedLengthRecordReader.class, clazz);
            // Plow through the records in this split
            while (reader.nextKeyValue()) {
                key = reader.getCurrentKey();
                value = reader.getCurrentValue();
                assertEquals("Checking key", (long) (recordNumber * recordLength), key.get());
                String valueString = new String(value.getBytes(), 0, value.getLength());
                assertEquals("Checking record length:", recordLength, value.getLength());
                assertTrue("Checking for more records than expected:", recordNumber < totalRecords);
                String origRecord = recordList.get(recordNumber);
                assertEquals("Checking record content:", origRecord, valueString);
                recordNumber++;
            }
            reader.close();
        }
        assertEquals("Total original records should be total read records:", recordList.size(), recordNumber);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Random(java.util.Random) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 15 with MapContextImpl

use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.

the class TestFixedLengthInputFormat method testNoRecordLength.

/**
   * Test with no record length set.
   */
@Test(timeout = 5000)
public void testNoRecordLength() throws Exception {
    localFs.delete(workDir, true);
    Path file = new Path(workDir, new String("testFormat.txt"));
    createFile(file, null, 10, 10);
    // Create the job and do not set fixed record length
    Job job = Job.getInstance(defaultConf);
    FileInputFormat.setInputPaths(job, workDir);
    FixedLengthInputFormat format = new FixedLengthInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    boolean exceptionThrown = false;
    for (InputSplit split : splits) {
        try {
            TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
            RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
            MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
            reader.initialize(split, mcontext);
        } catch (IOException ioe) {
            exceptionThrown = true;
            LOG.info("Exception message:" + ioe.getMessage());
        }
    }
    assertTrue("Exception for not setting record length:", exceptionThrown);
}
Also used : Path(org.apache.hadoop.fs.Path) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)22 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)20 InputSplit (org.apache.hadoop.mapreduce.InputSplit)14 Test (org.junit.Test)13 LongWritable (org.apache.hadoop.io.LongWritable)12 Job (org.apache.hadoop.mapreduce.Job)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 Configuration (org.apache.hadoop.conf.Configuration)9 Path (org.apache.hadoop.fs.Path)9 Random (java.util.Random)8 Text (org.apache.hadoop.io.Text)6 BitSet (java.util.BitSet)5 IOException (java.io.IOException)4 IntWritable (org.apache.hadoop.io.IntWritable)4 ArrayList (java.util.ArrayList)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 NullWritable (org.apache.hadoop.io.NullWritable)3 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)3 StatusReporter (org.apache.hadoop.mapreduce.StatusReporter)3 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)3