use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.
the class HadoopInputFormatBase method createInputSplits.
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
jobContext.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
jobContext.getCredentials().addAll(currentUserCreds);
}
List<org.apache.hadoop.mapreduce.InputSplit> splits;
try {
splits = this.mapreduceInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
use of org.apache.hadoop.mapreduce.JobContext in project goldenorb by jzachr.
the class OrbPartition method dumpData.
private void dumpData() {
Configuration conf = new Configuration();
Job job = null;
JobContext jobContext = null;
TaskAttemptContext tao = null;
RecordWriter rw;
VertexWriter vw;
FileOutputFormat outputFormat;
boolean tryAgain = true;
int count = 0;
while (tryAgain && count < 15) try {
count++;
tryAgain = false;
if (job == null) {
job = new Job(conf);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
}
if (jobContext == null) {
jobContext = new JobContext(job.getConfiguration(), new JobID());
}
System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
rw = outputFormat.getRecordWriter(tao);
vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
for (Vertex v : vertices.values()) {
OrbContext oc = vw.vertexWrite(v);
rw.write(oc.getKey(), oc.getValue());
// orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
// oc.getKey().toString() + ", " + oc.getValue().toString());
}
rw.close(tao);
FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
if (cm.needsTaskCommit(tao)) {
cm.commitTask(tao);
cm.cleanupJob(jobContext);
} else {
cm.cleanupJob(jobContext);
tryAgain = true;
}
} catch (IOException e) {
tryAgain = true;
e.printStackTrace();
} catch (InstantiationException e) {
tryAgain = true;
e.printStackTrace();
} catch (IllegalAccessException e) {
tryAgain = true;
e.printStackTrace();
} catch (ClassNotFoundException e) {
tryAgain = true;
e.printStackTrace();
} catch (InterruptedException e) {
tryAgain = true;
e.printStackTrace();
}
if (tryAgain) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
use of org.apache.hadoop.mapreduce.JobContext in project goldenorb by jzachr.
the class VertexInput method initialize.
/**
*/
@SuppressWarnings("unchecked")
public void initialize() {
// rebuild the input split
org.apache.hadoop.mapreduce.InputSplit split = null;
DataInputBuffer splitBuffer = new DataInputBuffer();
splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
SerializationFactory factory = new SerializationFactory(orbConf);
Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
try {
deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory.getDeserializer(orbConf.getClassByName(splitClass));
deserializer.open(splitBuffer);
split = deserializer.deserialize(null);
JobConf job = new JobConf(orbConf);
JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
recordReader = inputFormat.createRecordReader(split, tao);
recordReader.initialize(split, tao);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project goldenorb by jzachr.
the class InputSplitAllocator method assignInputSplits.
/**
* This method gets the raw splits and calls another method to assign them.
*
* @returns Map
*/
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
List<RawSplit> rawSplits = null;
JobConf job = new JobConf(orbConf);
LOG.debug(orbConf.getJobNumber().toString());
JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
try {
input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
rawSplits = new ArrayList<RawSplit>(splits.size());
DataOutputBuffer buffer = new DataOutputBuffer();
SerializationFactory factory = new SerializationFactory(orbConf);
Serializer serializer = factory.getSerializer(splits.get(0).getClass());
serializer.open(buffer);
for (int i = 0; i < splits.size(); i++) {
buffer.reset();
serializer.serialize(splits.get(i));
RawSplit rawSplit = new RawSplit();
rawSplit.setClassName(splits.get(i).getClass().getName());
rawSplit.setDataLength(splits.get(i).getLength());
rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
rawSplit.setLocations(splits.get(i).getLocations());
rawSplits.add(rawSplit);
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (InterruptedException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
return assignInputSplits(rawSplits);
}
use of org.apache.hadoop.mapreduce.JobContext in project hbase by apache.
the class TestTableInputFormatBase method testNonSuccessiveSplitsAreNotMerged.
@Test
public void testNonSuccessiveSplitsAreNotMerged() throws IOException {
JobContext context = mock(JobContext.class);
Configuration conf = HBaseConfiguration.create();
conf.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL, ConnectionForMergeTesting.class.getName());
conf.set(TableInputFormat.INPUT_TABLE, "testTable");
conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
when(context.getConfiguration()).thenReturn(conf);
TableInputFormat tifExclude = new TableInputFormatForMergeTesting();
tifExclude.setConf(conf);
// split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged,
// but split["a", "b"] and split["c", "d"] are not merged.
assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1, tifExclude.getSplits(context).size());
}
Aggregations