Search in sources :

Example 36 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class TestHCatOutputFormat method publishTest.

public void publishTest(Job job) throws Exception {
    HCatOutputFormat hcof = new HCatOutputFormat();
    TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID());
    OutputCommitter committer = hcof.getOutputCommitter(tac);
    committer.setupJob(job);
    committer.setupTask(tac);
    committer.commitTask(tac);
    committer.commitJob(job);
    Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1"));
    assertNotNull(part);
    StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters());
    assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue");
    assertTrue(part.getSd().getLocation().indexOf("p1") != -1);
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Partition(org.apache.hadoop.hive.metastore.api.Partition) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 37 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.

the class CrunchMultipleOutputs method write.

/**
   * Write key value to an output file name.
   * 
   * Gets the record writer from job's output format.  
   * Job's output format should be a FileOutputFormat.
   * 
   * @param key       the key
   * @param value     the value
   * @param baseOutputPath base-output path to write the record to.
   * Note: Framework will generate unique filename for the baseOutputPath
   */
@SuppressWarnings("unchecked")
public void write(KEYOUT key, VALUEOUT value, String baseOutputPath) throws IOException, InterruptedException {
    checkBaseOutputPath(baseOutputPath);
    TaskAttemptContext taskContext = TaskAttemptContextFactory.create(context.getConfiguration(), context.getTaskAttemptID());
    getRecordWriter(taskContext, baseOutputPath).write(key, value);
}
Also used : TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 38 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.

the class AvroOutputFormat method getRecordWriter.

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    Schema schema = null;
    String outputName = conf.get("crunch.namedoutput");
    if (outputName != null && !outputName.isEmpty()) {
        schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
    } else {
        schema = AvroJob.getOutputSchema(context.getConfiguration());
    }
    ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
    final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
    return new RecordWriter<AvroWrapper<T>, NullWritable>() {

        @Override
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) AvroWrapper(org.apache.avro.mapred.AvroWrapper)

Example 39 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.

the class DatasourceRecordReaderTest method testSanity.

@Test
public void testSanity() throws Exception {
    DataSegment segment = new DefaultObjectMapper().readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"), DataSegment.class).withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path", this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()));
    InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment)), null);
    Configuration config = new Configuration();
    config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(), segment.getInterval(), null, null, null, null, segment.getDimensions(), segment.getMetrics(), false)));
    TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(context);
    DatasourceRecordReader rr = new DatasourceRecordReader();
    rr.initialize(split, context);
    Assert.assertEquals(0, rr.getProgress(), 0.0001);
    List<InputRow> rows = Lists.newArrayList();
    while (rr.nextKeyValue()) {
        rows.add(rr.getCurrentValue());
    }
    verifyRows(rows);
    Assert.assertEquals(1, rr.getProgress(), 0.0001);
    rr.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputRow(io.druid.data.input.InputRow) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DataSegment(io.druid.timeline.DataSegment) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 40 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project mongo-hadoop by mongodb.

the class GridFSInputFormatTest method testReadWholeFile.

@Test
public void testReadWholeFile() throws IOException, InterruptedException {
    Configuration conf = getConfiguration();
    MongoConfigUtil.setGridFSWholeFileSplit(conf, true);
    JobContext jobContext = mockJobContext(conf);
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    // Empty delimiter == no delimiter.
    MongoConfigUtil.setGridFSDelimiterPattern(conf, "#+");
    TaskAttemptContext context = mockTaskAttemptContext(conf);
    assertEquals(1, splits.size());
    List<String> sections = new ArrayList<String>();
    for (InputSplit split : splits) {
        GridFSInputFormat.GridFSTextRecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
        reader.initialize(split, context);
        int i;
        for (i = 0; reader.nextKeyValue(); ++i) {
            sections.add(reader.getCurrentValue().toString());
        }
    }
    assertEquals(Arrays.asList(readmeSections), sections);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test) BaseHadoopTest(com.mongodb.hadoop.testutils.BaseHadoopTest)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10