use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.
the class CrunchMultipleOutputs method write.
/**
* Write key and value to baseOutputPath using the namedOutput.
*
* @param namedOutput the named output name
* @param key the key
* @param value the value
* @param baseOutputPath base-output path to write the record to.
* Note: Framework will generate unique filename for the baseOutputPath
*/
@SuppressWarnings("unchecked")
public <K, V> void write(String namedOutput, K key, V value, String baseOutputPath) throws IOException, InterruptedException {
checkNamedOutputName(context, namedOutput, false);
checkBaseOutputPath(baseOutputPath);
if (!namedOutputs.contains(namedOutput)) {
throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'");
}
TaskAttemptContext taskContext = getContext(namedOutput);
getRecordWriter(taskContext, baseOutputPath).write(key, value);
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.
the class DruidParquetInputTest method getFirstRecord.
private GenericRecord getFirstRecord(Job job, String parquetPath) throws IOException, InterruptedException {
File testFile = new File(parquetPath);
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), null);
DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class, job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = inputFormat.createRecordReader(split, context);
reader.initialize(split, context);
reader.nextKeyValue();
GenericRecord data = (GenericRecord) reader.getCurrentValue();
reader.close();
return data;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project druid by druid-io.
the class DruidOrcInputFormatTest method testRead.
@Test
public void testRead() throws IOException, InterruptedException {
InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = inputFormat.createRecordReader(split, context);
OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
reader.initialize(split, context);
reader.nextKeyValue();
OrcStruct data = (OrcStruct) reader.getCurrentValue();
MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
Assert.assertTrue(row.getEvent().keySet().size() == 4);
Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
Assert.assertEquals(col1, row.getEvent().get("col1"));
Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
reader.close();
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project goldenorb by jzachr.
the class OrbPartition method dumpData.
private void dumpData() {
Configuration conf = new Configuration();
Job job = null;
JobContext jobContext = null;
TaskAttemptContext tao = null;
RecordWriter rw;
VertexWriter vw;
FileOutputFormat outputFormat;
boolean tryAgain = true;
int count = 0;
while (tryAgain && count < 15) try {
count++;
tryAgain = false;
if (job == null) {
job = new Job(conf);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
}
if (jobContext == null) {
jobContext = new JobContext(job.getConfiguration(), new JobID());
}
System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
rw = outputFormat.getRecordWriter(tao);
vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
for (Vertex v : vertices.values()) {
OrbContext oc = vw.vertexWrite(v);
rw.write(oc.getKey(), oc.getValue());
// orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
// oc.getKey().toString() + ", " + oc.getValue().toString());
}
rw.close(tao);
FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
if (cm.needsTaskCommit(tao)) {
cm.commitTask(tao);
cm.cleanupJob(jobContext);
} else {
cm.cleanupJob(jobContext);
tryAgain = true;
}
} catch (IOException e) {
tryAgain = true;
e.printStackTrace();
} catch (InstantiationException e) {
tryAgain = true;
e.printStackTrace();
} catch (IllegalAccessException e) {
tryAgain = true;
e.printStackTrace();
} catch (ClassNotFoundException e) {
tryAgain = true;
e.printStackTrace();
} catch (InterruptedException e) {
tryAgain = true;
e.printStackTrace();
}
if (tryAgain) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project goldenorb by jzachr.
the class VertexInput method initialize.
/**
*
*/
@SuppressWarnings("unchecked")
public void initialize() {
// rebuild the input split
org.apache.hadoop.mapreduce.InputSplit split = null;
DataInputBuffer splitBuffer = new DataInputBuffer();
splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
SerializationFactory factory = new SerializationFactory(orbConf);
Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
try {
deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory.getDeserializer(orbConf.getClassByName(splitClass));
deserializer.open(splitBuffer);
split = deserializer.deserialize(null);
JobConf job = new JobConf(orbConf);
JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
recordReader = inputFormat.createRecordReader(split, tao);
recordReader.initialize(split, tao);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
Aggregations