use of org.apache.hadoop.mapreduce.TaskAttemptID in project goldenorb by jzachr.
the class OrbPartition method dumpData.
private void dumpData() {
Configuration conf = new Configuration();
Job job = null;
JobContext jobContext = null;
TaskAttemptContext tao = null;
RecordWriter rw;
VertexWriter vw;
FileOutputFormat outputFormat;
boolean tryAgain = true;
int count = 0;
while (tryAgain && count < 15) try {
count++;
tryAgain = false;
if (job == null) {
job = new Job(conf);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
}
if (jobContext == null) {
jobContext = new JobContext(job.getConfiguration(), new JobID());
}
System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
rw = outputFormat.getRecordWriter(tao);
vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
for (Vertex v : vertices.values()) {
OrbContext oc = vw.vertexWrite(v);
rw.write(oc.getKey(), oc.getValue());
// orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
// oc.getKey().toString() + ", " + oc.getValue().toString());
}
rw.close(tao);
FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
if (cm.needsTaskCommit(tao)) {
cm.commitTask(tao);
cm.cleanupJob(jobContext);
} else {
cm.cleanupJob(jobContext);
tryAgain = true;
}
} catch (IOException e) {
tryAgain = true;
e.printStackTrace();
} catch (InstantiationException e) {
tryAgain = true;
e.printStackTrace();
} catch (IllegalAccessException e) {
tryAgain = true;
e.printStackTrace();
} catch (ClassNotFoundException e) {
tryAgain = true;
e.printStackTrace();
} catch (InterruptedException e) {
tryAgain = true;
e.printStackTrace();
}
if (tryAgain) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project goldenorb by jzachr.
the class VertexInput method initialize.
/**
*
*/
@SuppressWarnings("unchecked")
public void initialize() {
// rebuild the input split
org.apache.hadoop.mapreduce.InputSplit split = null;
DataInputBuffer splitBuffer = new DataInputBuffer();
splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
SerializationFactory factory = new SerializationFactory(orbConf);
Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
try {
deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory.getDeserializer(orbConf.getClassByName(splitClass));
deserializer.open(splitBuffer);
split = deserializer.deserialize(null);
JobConf job = new JobConf(orbConf);
JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
recordReader = inputFormat.createRecordReader(split, tao);
recordReader.initialize(split, tao);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hive by apache.
the class HCatOutputFormatWriter method write.
@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {
int id = sp.getId();
setVarsInConf(id);
HCatOutputFormat outFormat = new HCatOutputFormat();
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
OutputCommitter committer = null;
RecordWriter<WritableComparable<?>, HCatRecord> writer;
try {
committer = outFormat.getOutputCommitter(cntxt);
committer.setupTask(cntxt);
writer = outFormat.getRecordWriter(cntxt);
while (recordItr.hasNext()) {
HCatRecord rec = recordItr.next();
writer.write(null, rec);
}
writer.close(cntxt);
if (committer.needsTaskCommit(cntxt)) {
committer.commitTask(cntxt);
}
} catch (IOException e) {
if (null != committer) {
try {
committer.abortTask(cntxt);
} catch (IOException e1) {
throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
}
}
throw new HCatException("Failed while writing", e);
} catch (InterruptedException e) {
if (null != committer) {
try {
committer.abortTask(cntxt);
} catch (IOException e1) {
throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
}
}
throw new HCatException("Failed while writing", e);
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hive by apache.
the class HCatInputFormatReader method read.
@Override
public Iterator<HCatRecord> read() throws HCatException {
HCatInputFormat inpFmt = new HCatInputFormat();
RecordReader<WritableComparable, HCatRecord> rr;
try {
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID());
rr = inpFmt.createRecordReader(split, cntxt);
rr.initialize(split, cntxt);
} catch (IOException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
} catch (InterruptedException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
}
return new HCatRecordItr(rr);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project druid by druid-io.
the class DruidParquetInputTest method getFirstRecord.
private GenericRecord getFirstRecord(Job job, String parquetPath) throws IOException, InterruptedException {
File testFile = new File(parquetPath);
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), null);
DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class, job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = inputFormat.createRecordReader(split, context);
reader.initialize(split, context);
reader.nextKeyValue();
GenericRecord data = (GenericRecord) reader.getCurrentValue();
reader.close();
return data;
}
Aggregations