Search in sources :

Example 41 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project druid by druid-io.

the class DruidOrcInputFormatTest method testRead.

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
    reader.initialize(split, context);
    reader.nextKeyValue();
    OrcStruct data = (OrcStruct) reader.getCurrentValue();
    MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
    reader.close();
}
Also used : OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) OrcNewInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 42 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project flink by apache.

the class HCatInputFormatBase method open.

@Override
public void open(HadoopInputSplit split) throws IOException {
    TaskAttemptContext context = null;
    try {
        context = HadoopUtils.instantiateTaskAttemptContext(configuration, new TaskAttemptID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    try {
        this.recordReader = this.hCatInputFormat.createRecordReader(split.getHadoopInputSplit(), context);
        this.recordReader.initialize(split.getHadoopInputSplit(), context);
    } catch (InterruptedException e) {
        throw new IOException("Could not create RecordReader.", e);
    } finally {
        this.fetched = false;
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException)

Example 43 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project flink by apache.

the class HadoopInputFormatBase method open.

@Override
public void open(HadoopInputSplit split) throws IOException {
    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        TaskAttemptContext context;
        try {
            context = HadoopUtils.instantiateTaskAttemptContext(configuration, new TaskAttemptID());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        try {
            this.recordReader = this.mapreduceInputFormat.createRecordReader(split.getHadoopInputSplit(), context);
            this.recordReader.initialize(split.getHadoopInputSplit(), context);
        } catch (InterruptedException e) {
            throw new IOException("Could not create RecordReader.", e);
        } finally {
            this.fetched = false;
        }
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) IOException(java.io.IOException)

Example 44 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project flink by apache.

the class HadoopOutputFormatBase method open.

/**
	 * create the temporary output file for hadoop RecordWriter.
	 * @param taskNumber The number of the parallel instance.
	 * @param numTasks The number of parallel tasks.
	 * @throws java.io.IOException
	 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }
        this.taskNumber = taskNumber + 1;
        // for hadoop 2.2
        this.configuration.set("mapreduce.output.basename", "tmp");
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
        this.configuration.set("mapred.task.id", taskAttemptID.toString());
        this.configuration.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.configuration.setInt("mapreduce.task.partition", taskNumber + 1);
        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID);
            this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
            this.outputCommitter.setupJob(HadoopUtils.instantiateJobContext(this.configuration, new JobID()));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        this.context.getCredentials().addAll(this.credentials);
        Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
        if (currentUserCreds != null) {
            this.context.getCredentials().addAll(currentUserCreds);
        }
        // compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
        if (outputCommitter instanceof FileOutputCommitter) {
            this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());
        }
        try {
            this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
        } catch (InterruptedException e) {
            throw new IOException("Could not create RecordWriter.", e);
        }
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) IOException(java.io.IOException) JobID(org.apache.hadoop.mapreduce.JobID) IOException(java.io.IOException) Credentials(org.apache.hadoop.security.Credentials)

Example 45 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project druid by druid-io.

the class HadoopDruidIndexerConfigTest method shouldMakeDefaultSegmentOutputPathIfNotHDFS.

@Test
public void shouldMakeDefaultSegmentOutputPathIfNotHDFS() {
    final HadoopIngestionSpec schema;
    try {
        schema = jsonReadWriteRead("{\n" + "    \"dataSchema\": {\n" + "        \"dataSource\": \"the:data:source\",\n" + "        \"metricsSpec\": [],\n" + "        \"granularitySpec\": {\n" + "            \"type\": \"uniform\",\n" + "            \"segmentGranularity\": \"hour\",\n" + "            \"intervals\": [\"2012-07-10/P1D\"]\n" + "        }\n" + "    },\n" + "    \"ioConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"segmentOutputPath\": \"/tmp/dru:id/data:test\"\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
    HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
    Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30), 4712);
    Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.INDEX_ZIP);
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/index.zip", path.toString());
    path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.DESCRIPTOR_JSON);
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/descriptor.json", path.toString());
    path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new LocalFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0));
    Assert.assertEquals("file:/tmp/dru:id/data:test/the:data:source/2012-07-10T05:00:00.000Z_2012-07-10T06:00:00.000Z/some:brand:new:" + "version/4712/4712_index.zip.0", path.toString());
}
Also used : Path(org.apache.hadoop.fs.Path) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7