Search in sources :

Example 1 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.

the class DatasourceRecordReaderTest method testSanity.

@Test
public void testSanity() throws Exception {
    DataSegment segment = new DefaultObjectMapper().readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"), DataSegment.class).withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path", this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()));
    InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment)), null);
    Configuration config = new Configuration();
    config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(), segment.getInterval(), null, null, null, null, segment.getDimensions(), segment.getMetrics(), false)));
    TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(context);
    DatasourceRecordReader rr = new DatasourceRecordReader();
    rr.initialize(split, context);
    Assert.assertEquals(0, rr.getProgress(), 0.0001);
    List<InputRow> rows = Lists.newArrayList();
    while (rr.nextKeyValue()) {
        rows.add(rr.getCurrentValue());
    }
    verifyRows(rows);
    Assert.assertEquals(1, rr.getProgress(), 0.0001);
    rr.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputRow(io.druid.data.input.InputRow) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DataSegment(io.druid.timeline.DataSegment) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 2 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestCombineSequenceFileInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws IOException, InterruptedException {
    Job job = Job.getInstance(conf);
    Random random = new Random();
    long seed = random.nextLong();
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    // create files with a variety of lengths
    createFiles(length, numFiles, random, job);
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    // create a combine split for the files
    InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        List<InputSplit> splits = format.getSplits(job);
        LOG.info("splitting: got =        " + splits.size());
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.size());
        InputSplit split = splits.get(0);
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check the split
        BitSet bits = new BitSet(length);
        RecordReader<IntWritable, BytesWritable> reader = format.createRecordReader(split, context);
        MapContext<IntWritable, BytesWritable, IntWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, BytesWritable, IntWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass());
        try {
            while (reader.nextKeyValue()) {
                IntWritable key = reader.getCurrentKey();
                BytesWritable value = reader.getCurrentValue();
                assertNotNull("Value should not be null.", value);
                final int k = key.get();
                LOG.debug("read " + k);
                assertFalse("Key in multiple partitions.", bits.get(k));
                bits.set(k);
            }
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BitSet(java.util.BitSet) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 3 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestCombineTextInputFormat method readSplit.

private static List<Text> readSplit(InputFormat<LongWritable, Text> format, InputSplit split, Job job) throws IOException, InterruptedException {
    List<Text> result = new ArrayList<Text>();
    Configuration conf = job.getConfiguration();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(conf);
    RecordReader<LongWritable, Text> reader = format.createRecordReader(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
    MapContext<LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl<LongWritable, Text, LongWritable, Text>(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
    reader.initialize(split, mcontext);
    while (reader.nextKeyValue()) {
        result.add(new Text(reader.getCurrentValue()));
    }
    return result;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) LongWritable(org.apache.hadoop.io.LongWritable)

Example 4 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestCombineTextInputFormat method testFormat.

@Test(timeout = 10000)
public void testFormat() throws Exception {
    Job job = Job.getInstance(new Configuration(defaultConf));
    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = " + seed);
    random.setSeed(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int length = 10000;
    final int numFiles = 10;
    // create files with various lengths
    createFiles(length, numFiles, random);
    // create a combined split for the files
    CombineTextInputFormat format = new CombineTextInputFormat();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / 20) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        List<InputSplit> splits = format.getSplits(job);
        LOG.info("splitting: got =        " + splits.size());
        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.size());
        InputSplit split = splits.get(0);
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
        // check the split
        BitSet bits = new BitSet(length);
        LOG.debug("split= " + split);
        TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
        RecordReader<LongWritable, Text> reader = format.createRecordReader(split, context);
        assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass());
        MapContext<LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl<LongWritable, Text, LongWritable, Text>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int count = 0;
            while (reader.nextKeyValue()) {
                LongWritable key = reader.getCurrentKey();
                assertNotNull("Key should not be null.", key);
                Text value = reader.getCurrentValue();
                final int v = Integer.parseInt(value.toString());
                LOG.debug("read " + v);
                assertFalse("Key in multiple partitions.", bits.get(v));
                bits.set(v);
                count++;
            }
            LOG.debug("split=" + split + " count=" + count);
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BitSet(java.util.BitSet) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Text(org.apache.hadoop.io.Text) Random(java.util.Random) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 5 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestMRKeyValueTextInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    Job job = Job.getInstance(new Configuration(defaultConf));
    Path file = new Path(workDir, "test.txt");
    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    final int MAX_LENGTH = 10000;
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        LOG.debug("creating; entries = " + length);
        // create a file with length entries
        Writer writer = new OutputStreamWriter(localFs.create(file));
        try {
            for (int i = 0; i < length; i++) {
                writer.write(Integer.toString(i * 2));
                writer.write("\t");
                writer.write(Integer.toString(i));
                writer.write("\n");
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        KeyValueTextInputFormat format = new KeyValueTextInputFormat();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / 20) + 1;
            LOG.debug("splitting: requesting = " + numSplits);
            List<InputSplit> splits = format.getSplits(job);
            LOG.debug("splitting: got =        " + splits.size());
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.size(); j++) {
                LOG.debug("split[" + j + "]= " + splits.get(j));
                TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
                RecordReader<Text, Text> reader = format.createRecordReader(splits.get(j), context);
                Class<?> clazz = reader.getClass();
                assertEquals("reader class is KeyValueLineRecordReader.", KeyValueLineRecordReader.class, clazz);
                MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), splits.get(j));
                reader.initialize(splits.get(j), mcontext);
                Text key = null;
                Text value = null;
                try {
                    int count = 0;
                    while (reader.nextKeyValue()) {
                        key = reader.getCurrentKey();
                        clazz = key.getClass();
                        assertEquals("Key class is Text.", Text.class, clazz);
                        value = reader.getCurrentValue();
                        clazz = value.getClass();
                        assertEquals("Value class is Text.", Text.class, clazz);
                        final int k = Integer.parseInt(key.toString());
                        final int v = Integer.parseInt(value.toString());
                        assertEquals("Bad key", 0, k % 2);
                        assertEquals("Mismatched key/value", k / 2, v);
                        LOG.debug("read " + v);
                        assertFalse("Key in multiple partitions.", bits.get(v));
                        bits.set(v);
                        count++;
                    }
                    LOG.debug("splits[" + j + "]=" + splits.get(j) + " count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) BitSet(java.util.BitSet) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Random(java.util.Random) OutputStreamWriter(java.io.OutputStreamWriter) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Test(org.junit.Test)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)151 Configuration (org.apache.hadoop.conf.Configuration)79 Path (org.apache.hadoop.fs.Path)57 Job (org.apache.hadoop.mapreduce.Job)55 InputSplit (org.apache.hadoop.mapreduce.InputSplit)53 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)53 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)51 Test (org.junit.Test)46 IOException (java.io.IOException)34 JobContext (org.apache.hadoop.mapreduce.JobContext)33 File (java.io.File)29 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)27 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)25 ArrayList (java.util.ArrayList)23 LongWritable (org.apache.hadoop.io.LongWritable)23 FileSystem (org.apache.hadoop.fs.FileSystem)22 Text (org.apache.hadoop.io.Text)20 RecordReader (org.apache.hadoop.mapreduce.RecordReader)18 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)13