use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.
the class TestCommitterEventHandler method testBasic.
@Test
public void testBasic() throws Exception {
AppContext mockContext = mock(AppContext.class);
OutputCommitter mockCommitter = mock(OutputCommitter.class);
Clock mockClock = mock(Clock.class);
CommitterEventHandler handler = new CommitterEventHandler(mockContext, mockCommitter, new TestingRMHeartbeatHandler());
YarnConfiguration conf = new YarnConfiguration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
JobContext mockJobContext = mock(JobContext.class);
ApplicationAttemptId attemptid = ApplicationAttemptId.fromString("appattempt_1234567890000_0001_0");
JobId jobId = TypeConverter.toYarn(TypeConverter.fromYarn(attemptid.getApplicationId()));
WaitForItHandler waitForItHandler = new WaitForItHandler();
when(mockContext.getApplicationID()).thenReturn(attemptid.getApplicationId());
when(mockContext.getApplicationAttemptId()).thenReturn(attemptid);
when(mockContext.getEventHandler()).thenReturn(waitForItHandler);
when(mockContext.getClock()).thenReturn(mockClock);
handler.init(conf);
handler.start();
try {
handler.handle(new CommitterJobCommitEvent(jobId, mockJobContext));
String user = UserGroupInformation.getCurrentUser().getShortUserName();
Path startCommitFile = MRApps.getStartJobCommitFile(conf, user, jobId);
Path endCommitSuccessFile = MRApps.getEndJobCommitSuccessFile(conf, user, jobId);
Path endCommitFailureFile = MRApps.getEndJobCommitFailureFile(conf, user, jobId);
Event e = waitForItHandler.getAndClearEvent();
assertNotNull(e);
assertTrue(e instanceof JobCommitCompletedEvent);
FileSystem fs = FileSystem.get(conf);
assertTrue(startCommitFile.toString(), fs.exists(startCommitFile));
assertTrue(endCommitSuccessFile.toString(), fs.exists(endCommitSuccessFile));
assertFalse(endCommitFailureFile.toString(), fs.exists(endCommitFailureFile));
verify(mockCommitter).commitJob(any(JobContext.class));
} finally {
handler.stop();
}
}
use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.
the class TestJobImpl method testKilledDuringKillAbort.
@Test(timeout = 20000)
public void testKilledDuringKillAbort() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
// not initializing dispatcher to avoid potential race condition between
// the dispatcher thread & test thread - see MAPREDUCE-6831
AsyncDispatcher dispatcher = new AsyncDispatcher();
OutputCommitter committer = new StubbedOutputCommitter() {
@Override
public synchronized void abortJob(JobContext jobContext, State state) throws IOException {
while (!Thread.interrupted()) {
try {
wait();
} catch (InterruptedException e) {
}
}
}
};
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
JobId jobId = job.getID();
job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
assertJobState(job, JobStateInternal.INITED);
job.handle(new JobStartEvent(jobId));
assertJobState(job, JobStateInternal.SETUP);
job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
assertJobState(job, JobStateInternal.KILL_ABORT);
job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
assertJobState(job, JobStateInternal.KILLED);
dispatcher.stop();
commitHandler.stop();
}
use of org.apache.hadoop.mapreduce.JobContext in project beam by apache.
the class HadoopInputFormatIOTest method testComputeSplitsIfGetSplitsReturnsEmptyList.
/**
* This test validates behavior of
* {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} when Hadoop
* InputFormat's {@link InputFormat#getSplits(JobContext)} returns empty list.
*/
@Test
public void testComputeSplitsIfGetSplitsReturnsEmptyList() throws Exception {
InputFormat<?, ?> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
SerializableSplit mockInputSplit = Mockito.mock(SerializableSplit.class);
Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(new ArrayList<InputSplit>());
HadoopInputFormatBoundedSource<Text, Employee> hifSource = new HadoopInputFormatBoundedSource<Text, Employee>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, mockInputSplit);
thrown.expect(IOException.class);
thrown.expectMessage("Error in computing splits, getSplits() returns a empty list");
hifSource.setInputFormatObj(mockInputFormat);
hifSource.computeSplitsIfNecessary();
}
use of org.apache.hadoop.mapreduce.JobContext in project jena by apache.
the class AbstractBlankNodeTests method blank_node_divergence_02.
/**
* Test that starts with two blank nodes with the same identity in a single
* file, splits them over two files and shows that they diverge in the
* subsequent job when the JENA-820 workaround is not enabled
*
* @throws IOException
* @throws InterruptedException
*/
@Test
public void blank_node_divergence_02() throws IOException, InterruptedException {
Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());
// Temporary files
File a = File.createTempFile("bnode_divergence", getInitialInputExtension());
File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile();
try {
// Prepare the input data
// Two mentions of the same blank node in the same file
List<T> tuples = new ArrayList<>();
Node bnode = NodeFactory.createBlankNode();
Node pred = NodeFactory.createURI("http://example.org/predicate");
tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
writeTuples(a, tuples);
// Set up fake job which will process the file as a single split
Configuration config = new Configuration(true);
InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
Job job = Job.getInstance(config);
job.setInputFormatClass(inputFormat.getClass());
NLineInputFormat.setNumLinesPerSplit(job, 100);
FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()));
FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
// Get the splits
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(1, splits.size());
for (InputSplit split : splits) {
// Initialize the input reading
TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1));
RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
reader.initialize(split, inputTaskContext);
// Copy the input to the output - each triple goes to a separate
// output file
// This is how we force multiple files to be produced
int taskID = 1;
while (reader.nextKeyValue()) {
// Prepare the output writing
OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, ++taskID, 1));
RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);
writer.write(reader.getCurrentKey(), reader.getCurrentValue());
writer.close(outputTaskContext);
}
}
// Promote outputs from temporary status
promoteInputs(intermediateOutputDir);
// Now we need to create a subsequent job that reads the
// intermediate outputs
// As described in JENA-820 at this point the blank nodes are
// consistent, however when we read them from different files they
// by default get treated as different nodes and so the blank nodes
// diverge which is incorrect and undesirable behaviour in
// multi-stage pipelines. However it is the default behaviour
// because when we start from external inputs we want them to be
// file scoped.
LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
job = Job.getInstance(config);
inputFormat = createIntermediateInputFormat();
job.setInputFormatClass(inputFormat.getClass());
FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
// Make sure JENA-820 flag is disabled
job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false);
context = new JobContextImpl(job.getConfiguration(), job.getJobID());
// Get the splits
splits = inputFormat.getSplits(context);
Assert.assertEquals(2, splits.size());
// Expect to end up with a single blank node
Set<Node> nodes = new HashSet<Node>();
for (InputSplit split : splits) {
TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
reader.initialize(split, inputTaskContext);
while (reader.nextKeyValue()) {
nodes.add(getSubject(reader.getCurrentValue().get()));
}
}
// Nodes should have diverged
Assert.assertEquals(2, nodes.size());
} finally {
a.delete();
deleteDirectory(intermediateOutputDir);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project jena by apache.
the class AbstractBlankNodeTests method blank_node_identity_01.
/**
* Test that starts with two blank nodes in two different files and checks
* that writing them to a single file does not conflate them
*
* @throws IOException
* @throws InterruptedException
*/
@Test
public void blank_node_identity_01() throws IOException, InterruptedException {
Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());
// Temporary files
File a = File.createTempFile("bnode_identity", getInitialInputExtension());
File b = File.createTempFile("bnode_identity", getInitialInputExtension());
File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();
try {
// Prepare the input data
// Different blank nodes in different files
List<T> tuples = new ArrayList<>();
Node bnode1 = NodeFactory.createBlankNode();
Node bnode2 = NodeFactory.createBlankNode();
Node pred = NodeFactory.createURI("http://example.org/predicate");
tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first")));
writeTuples(a, tuples);
tuples.clear();
tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second")));
writeTuples(b, tuples);
// Set up fake job which will process the two files
Configuration config = new Configuration(true);
InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
Job job = Job.getInstance(config);
job.setInputFormatClass(inputFormat.getClass());
NLineInputFormat.setNumLinesPerSplit(job, 100);
FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
// Get the splits
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(2, splits.size());
// Prepare the output writing - putting all output to a single file
OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 2, 1));
RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);
for (InputSplit split : splits) {
// Initialize the input reading
TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1));
RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
reader.initialize(split, inputTaskContext);
// output
while (reader.nextKeyValue()) {
writer.write(reader.getCurrentKey(), reader.getCurrentValue());
}
}
writer.close(outputTaskContext);
// Promote outputs from temporary status
promoteInputs(intermediateOutputDir);
// Now we need to create a subsequent job that reads the
// intermediate outputs
// The Blank nodes should have been given separate identities so we
// should not be conflating them, this is the opposite problem to
// that described in JENA-820
LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
job = Job.getInstance(config);
inputFormat = createIntermediateInputFormat();
job.setInputFormatClass(inputFormat.getClass());
NLineInputFormat.setNumLinesPerSplit(job, 100);
FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
context = new JobContextImpl(job.getConfiguration(), job.getJobID());
// Get the splits
splits = inputFormat.getSplits(context);
Assert.assertEquals(1, splits.size());
// Expect to end up with a single blank node
Set<Node> nodes = new HashSet<Node>();
for (InputSplit split : splits) {
TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
reader.initialize(split, inputTaskContext);
while (reader.nextKeyValue()) {
nodes.add(getSubject(reader.getCurrentValue().get()));
}
}
// Nodes must not have converged
Assert.assertEquals(2, nodes.size());
} finally {
a.delete();
b.delete();
deleteDirectory(intermediateOutputDir);
}
}
Aggregations