use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class TestHCatOutputFormat method publishTest.
public void publishTest(Job job) throws Exception {
HCatOutputFormat hcof = new HCatOutputFormat();
TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID());
OutputCommitter committer = hcof.getOutputCommitter(tac);
committer.setupJob(job);
committer.setupTask(tac);
committer.commitTask(tac);
committer.commitJob(job);
Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1"));
assertNotNull(part);
StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters());
assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue");
assertTrue(part.getSd().getLocation().indexOf("p1") != -1);
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.
the class CrunchMultipleOutputs method write.
/**
* Write key value to an output file name.
*
* Gets the record writer from job's output format.
* Job's output format should be a FileOutputFormat.
*
* @param key the key
* @param value the value
* @param baseOutputPath base-output path to write the record to.
* Note: Framework will generate unique filename for the baseOutputPath
*/
@SuppressWarnings("unchecked")
public void write(KEYOUT key, VALUEOUT value, String baseOutputPath) throws IOException, InterruptedException {
checkBaseOutputPath(baseOutputPath);
TaskAttemptContext taskContext = TaskAttemptContextFactory.create(context.getConfiguration(), context.getTaskAttemptID());
getRecordWriter(taskContext, baseOutputPath).write(key, value);
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.
the class AvroOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Schema schema = null;
String outputName = conf.get("crunch.namedoutput");
if (outputName != null && !outputName.isEmpty()) {
schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
} else {
schema = AvroJob.getOutputSchema(context.getConfiguration());
}
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
@Override
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
WRITER.append(wrapper.datum());
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
WRITER.close();
}
};
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.
the class DatasourceRecordReaderTest method testSanity.
@Test
public void testSanity() throws Exception {
DataSegment segment = new DefaultObjectMapper().readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"), DataSegment.class).withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path", this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()));
InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment)), null);
Configuration config = new Configuration();
config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(), segment.getInterval(), null, null, null, null, segment.getDimensions(), segment.getMetrics(), false)));
TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
EasyMock.replay(context);
DatasourceRecordReader rr = new DatasourceRecordReader();
rr.initialize(split, context);
Assert.assertEquals(0, rr.getProgress(), 0.0001);
List<InputRow> rows = Lists.newArrayList();
while (rr.nextKeyValue()) {
rows.add(rr.getCurrentValue());
}
verifyRows(rows);
Assert.assertEquals(1, rr.getProgress(), 0.0001);
rr.close();
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project mongo-hadoop by mongodb.
the class GridFSInputFormatTest method testReadWholeFile.
@Test
public void testReadWholeFile() throws IOException, InterruptedException {
Configuration conf = getConfiguration();
MongoConfigUtil.setGridFSWholeFileSplit(conf, true);
JobContext jobContext = mockJobContext(conf);
List<InputSplit> splits = inputFormat.getSplits(jobContext);
// Empty delimiter == no delimiter.
MongoConfigUtil.setGridFSDelimiterPattern(conf, "#+");
TaskAttemptContext context = mockTaskAttemptContext(conf);
assertEquals(1, splits.size());
List<String> sections = new ArrayList<String>();
for (InputSplit split : splits) {
GridFSInputFormat.GridFSTextRecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
reader.initialize(split, context);
int i;
for (i = 0; reader.nextKeyValue(); ++i) {
sections.add(reader.getCurrentValue().toString());
}
}
assertEquals(Arrays.asList(readmeSections), sections);
}
Aggregations