use of org.apache.tez.dag.api.PreWarmVertex in project tez by apache.
the class TestOrderedWordCount method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
boolean generateSplitsInClient;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true);
long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000;
boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false);
boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true);
// TODO needs to use auto reduce parallelism
int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2);
int maxDataLengthThroughIPC = conf.getInt(MAX_IPC_DATA_LENGTH, -1);
int exceedDataLimit = conf.getInt(EXCEED_IPC_DATA_LIMIT, 3);
if (maxDataLengthThroughIPC > 0) {
conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, maxDataLengthThroughIPC * 1024 * 1024);
}
if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) {
printUsage();
return 2;
}
List<String> inputPaths = new ArrayList<String>();
List<String> outputPaths = new ArrayList<String>();
TezConfiguration tezConf = new TezConfiguration(conf);
for (int i = 0; i < otherArgs.length; i += 2) {
FileSystem inputPathFs = new Path(otherArgs[i]).getFileSystem(tezConf);
inputPaths.add(inputPathFs.makeQualified(new Path(otherArgs[i])).toString());
FileSystem outputPathFs = new Path(otherArgs[i + 1]).getFileSystem(tezConf);
outputPaths.add(outputPathFs.makeQualified(new Path(otherArgs[i + 1])).toString());
}
UserGroupInformation.setConfiguration(conf);
HadoopShim hadoopShim = new HadoopShimsLoader(tezConf).getHadoopShim();
TestOrderedWordCount instance = new TestOrderedWordCount();
FileSystem fs = FileSystem.get(conf);
String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
Path stagingDir = new Path(stagingDirStr);
FileSystem pathFs = stagingDir.getFileSystem(tezConf);
pathFs.mkdirs(new Path(stagingDirStr));
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
stagingDir = pathFs.makeQualified(new Path(stagingDirStr));
TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf);
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
if (useTezSession) {
LOG.info("Creating Tez Session");
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
} else {
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
}
TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials);
tezSession.start();
if (tezSession.getAppMasterApplicationId() != null) {
TezUtilsInternal.setHadoopCallerContext(hadoopShim, tezSession.getAppMasterApplicationId());
}
DAGStatus dagStatus = null;
DAGClient dagClient = null;
String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" };
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
try {
for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
if (dagIndex != 1 && interJobSleepTimeout > 0) {
try {
LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000));
Thread.sleep(interJobSleepTimeout);
} catch (InterruptedException e) {
LOG.info("Main thread interrupted. Breaking out of job loop");
break;
}
}
String inputPath = inputPaths.get(dagIndex - 1);
String outputPath = outputPaths.get(dagIndex - 1);
if (fs.exists(new Path(outputPath))) {
throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
}
LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath + ", outputPath=" + outputPath);
Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
DAG dag = instance.createDAG(fs, tezConf, localResources, stagingDir, dagIndex, inputPath, outputPath, generateSplitsInClient, useMRSettings, intermediateNumReduceTasks, maxDataLengthThroughIPC, exceedDataLimit);
String callerType = "TestOrderedWordCount";
String callerId = tezSession.getAppMasterApplicationId() == null ? ("UnknownApp_" + System.currentTimeMillis() + dagIndex) : (tezSession.getAppMasterApplicationId().toString() + "_" + dagIndex);
dag.setCallerContext(CallerContext.create("Tez", callerId, callerType, "TestOrderedWordCount Job"));
boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true);
int preWarmNumContainers = 0;
if (doPreWarm) {
preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
if (preWarmNumContainers <= 0) {
doPreWarm = false;
}
}
if (doPreWarm) {
LOG.info("Pre-warming Session");
PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers, dag.getVertex("initialmap").getTaskResource());
preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles());
preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment());
preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts());
tezSession.preWarm(preWarmVertex);
}
if (useTezSession) {
LOG.info("Waiting for TezSession to get into ready state");
waitForTezSessionReady(tezSession);
LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
} else {
LOG.info("Submitting DAG as a new Tez Application");
dagClient = tezSession.submitDAG(dag);
}
while (true) {
dagStatus = dagClient.getDAGStatus(statusGetOpts);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() != DAGStatus.State.SUCCEEDED && dagStatus.getState() != DAGStatus.State.FAILED && dagStatus.getState() != DAGStatus.State.KILLED && dagStatus.getState() != DAGStatus.State.ERROR) {
if (dagStatus.getState() == DAGStatus.State.RUNNING) {
ExampleDriver.printDAGStatus(dagClient, vNames);
}
try {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(statusGetOpts);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState());
if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics());
}
}
} catch (Exception e) {
LOG.error("Error occurred when submitting/running DAGs", e);
throw e;
} finally {
if (!retainStagingDir) {
pathFs.delete(stagingDir, true);
}
LOG.info("Shutting down session");
tezSession.stop();
}
if (!useTezSession) {
ExampleDriver.printDAGStatus(dagClient, vNames);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
}
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
use of org.apache.tez.dag.api.PreWarmVertex in project tez by apache.
the class TestTezClient method testPreWarm.
@Test(timeout = 5000)
public void testPreWarm() throws Exception {
TezClientForTest client = configureAndCreateTezClient();
client.start();
when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()).thenReturn(YarnApplicationState.RUNNING);
when(client.sessionAmProxy.getAMStatus((RpcController) any(), (GetAMStatusRequestProto) any())).thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.READY).build());
PreWarmVertex vertex = PreWarmVertex.create("PreWarm", 1, Resource.newInstance(1, 1));
client.preWarm(vertex);
ArgumentCaptor<SubmitDAGRequestProto> captor1 = ArgumentCaptor.forClass(SubmitDAGRequestProto.class);
verify(client.sessionAmProxy, times(1)).submitDAG((RpcController) any(), captor1.capture());
SubmitDAGRequestProto proto = captor1.getValue();
assertTrue(proto.getDAGPlan().getName().startsWith(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX));
client.stop();
}
use of org.apache.tez.dag.api.PreWarmVertex in project hive by apache.
the class TezSessionState method startSessionAndContainers.
private TezClient startSessionAndContainers(TezClient session, HiveConf conf, Map<String, LocalResource> commonLocalResources, TezConfiguration tezConfig, boolean isOnThread) throws TezException, IOException {
boolean isSuccessful = false;
try {
session.start();
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
LOG.info("Prewarming " + n + " containers (id: " + sessionId + ", scratch dir: " + tezScratchDir + ")");
PreWarmVertex prewarmVertex = utils.createPreWarmVertex(tezConfig, n, commonLocalResources);
try {
session.preWarm(prewarmVertex);
} catch (IOException ie) {
if (!isOnThread && ie.getMessage().contains("Interrupted while waiting")) {
LOG.warn("Hive Prewarm threw an exception ", ie);
} else {
throw ie;
}
}
}
try {
session.waitTillReady();
} catch (InterruptedException ie) {
if (isOnThread) {
throw new IOException(ie);
// ignore
}
}
isSuccessful = true;
// sessionState.getQueueName() comes from cluster wide configured queue names.
// sessionState.getConf().get("tez.queue.name") is explicitly set by user in a session.
// TezSessionPoolManager sets tez.queue.name if user has specified one or use the one from
// cluster wide queue names.
// There is no way to differentiate how this was set (user vs system).
// Unset this after opening the session so that reopening of session uses the correct queue
// names i.e, if client has not died and if the user has explicitly set a queue name
// then reopened session will use user specified queue name else default cluster queue names.
conf.unset(TezConfiguration.TEZ_QUEUE_NAME);
return session;
} finally {
if (isOnThread && !isSuccessful) {
closeAndIgnoreExceptions(session);
}
if (!isSuccessful) {
cleanupScratchDir();
cleanupDagResources();
}
}
}
use of org.apache.tez.dag.api.PreWarmVertex in project tez by apache.
the class TestTezClient method testPreWarmWithTimeout.
@Test(timeout = 30000)
public void testPreWarmWithTimeout() throws Exception {
long startTime = 0, endTime = 0;
TezClientForTest client = configureAndCreateTezClient();
final TezClientForTest spyClient = spy(client);
doCallRealMethod().when(spyClient).start();
doCallRealMethod().when(spyClient).stop();
spyClient.start();
when(spyClient.mockYarnClient.getApplicationReport(spyClient.mockAppId).getYarnApplicationState()).thenReturn(YarnApplicationState.RUNNING);
when(spyClient.sessionAmProxy.getAMStatus((RpcController) any(), (GetAMStatusRequestProto) any())).thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.INITIALIZING).build());
PreWarmVertex vertex = PreWarmVertex.create("PreWarm", 1, Resource.newInstance(1, 1));
int timeout = 5000;
try {
startTime = Time.monotonicNow();
spyClient.preWarm(vertex, timeout, TimeUnit.MILLISECONDS);
fail("PreWarm should have encountered an Exception!");
} catch (SessionNotReady te) {
endTime = Time.monotonicNow();
assertTrue("Time taken is not as expected", (endTime - startTime) > timeout);
verify(spyClient, times(0)).submitDAG(any(DAG.class));
Assert.assertTrue("Unexpected Exception message", te.getMessage().contains("Tez AM not ready"));
}
when(spyClient.sessionAmProxy.getAMStatus((RpcController) any(), (GetAMStatusRequestProto) any())).thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.READY).build());
try {
startTime = Time.monotonicNow();
spyClient.preWarm(vertex, timeout, TimeUnit.MILLISECONDS);
endTime = Time.monotonicNow();
assertTrue("Time taken is not as expected", (endTime - startTime) <= timeout);
verify(spyClient, times(1)).submitDAG(any(DAG.class));
} catch (TezException te) {
fail("PreWarm should have succeeded!");
}
Thread amStateThread = new Thread() {
@Override
public void run() {
CountDownLatch latch = new CountDownLatch(1);
try {
when(spyClient.sessionAmProxy.getAMStatus((RpcController) any(), (GetAMStatusRequestProto) any())).thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.INITIALIZING).build());
latch.await(1000, TimeUnit.MILLISECONDS);
when(spyClient.sessionAmProxy.getAMStatus((RpcController) any(), (GetAMStatusRequestProto) any())).thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.READY).build());
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ServiceException e) {
e.printStackTrace();
}
}
};
amStateThread.start();
startTime = Time.monotonicNow();
spyClient.preWarm(vertex, timeout, TimeUnit.MILLISECONDS);
endTime = Time.monotonicNow();
assertTrue("Time taken is not as expected", (endTime - startTime) <= timeout);
verify(spyClient, times(2)).submitDAG(any(DAG.class));
spyClient.stop();
client.stop();
}
use of org.apache.tez.dag.api.PreWarmVertex in project hive by apache.
the class DagUtils method createPreWarmVertex.
/**
* @param numContainers number of containers to pre-warm
* @param localResources additional resources to pre-warm with
* @return prewarm vertex to run
*/
public PreWarmVertex createPreWarmVertex(TezConfiguration conf, int numContainers, Map<String, LocalResource> localResources) throws IOException, TezException {
ProcessorDescriptor prewarmProcDescriptor = ProcessorDescriptor.create(HivePreWarmProcessor.class.getName());
prewarmProcDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(conf));
PreWarmVertex prewarmVertex = PreWarmVertex.create("prewarm", prewarmProcDescriptor, numContainers, getContainerResource(conf));
Map<String, LocalResource> combinedResources = new HashMap<String, LocalResource>();
if (localResources != null) {
combinedResources.putAll(localResources);
}
prewarmVertex.addTaskLocalFiles(localResources);
prewarmVertex.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
prewarmVertex.setTaskEnvironment(getContainerEnvironment(conf, false));
return prewarmVertex;
}
Aggregations