use of org.apache.tez.client.TezClient in project hive by apache.
the class TezTask method updateSession.
/**
* Ensures that the Tez Session is open and the AM has all necessary jars configured.
*/
void updateSession(TezSessionState session, JobConf jobConf, Path scratchDir, String[] inputOutputJars, Map<String, LocalResource> extraResources) throws Exception {
final boolean missingLocalResources = !session.hasResources(inputOutputJars);
TezClient client = session.getSession();
// TODO null can also mean that this operation was interrupted. Should we really try to re-create the session in that case ?
if (client == null) {
// can happen if the user sets the tez flag after the session was
// established
LOG.info("Tez session hasn't been created yet. Opening session");
session.open(conf, inputOutputJars);
} else {
LOG.info("Session is already open");
// Ensure the open session has the necessary resources (StorageHandler)
if (missingLocalResources) {
LOG.info("Tez session missing resources," + " adding additional necessary resources");
client.addAppMasterLocalFiles(extraResources);
}
session.refreshLocalResourcesFromConf(conf);
}
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class MRRSleepJob method run.
public int run(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("MRRSleepJob [-m numMapper] [-r numReducer]" + " [-ir numIntermediateReducer]" + " [-irs numIntermediateReducerStages]" + " [-mt mapSleepTime (msec)] [-rt reduceSleepTime (msec)]" + " [-irt intermediateReduceSleepTime]" + " [-recordt recordSleepTime (msec)]" + " [-generateSplitsInAM (false)/true]" + " [-writeSplitsToDfs (false)/true]" + " [-numDags numDagsToSubmit");
ToolRunner.printGenericCommandUsage(System.err);
return 2;
}
int numMapper = 1, numReducer = 1, numIReducer = 1;
long mapSleepTime = 100, reduceSleepTime = 100, recSleepTime = 100, iReduceSleepTime = 1;
int mapSleepCount = 1, reduceSleepCount = 1, iReduceSleepCount = 1;
int iReduceStagesCount = 1;
boolean writeSplitsToDfs = false;
boolean generateSplitsInAM = false;
boolean splitsOptionFound = false;
boolean isSession = false;
int numDags = 1;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-m")) {
numMapper = Integer.parseInt(args[++i]);
} else if (args[i].equals("-r")) {
numReducer = Integer.parseInt(args[++i]);
} else if (args[i].equals("-ir")) {
numIReducer = Integer.parseInt(args[++i]);
} else if (args[i].equals("-mt")) {
mapSleepTime = Long.parseLong(args[++i]);
} else if (args[i].equals("-rt")) {
reduceSleepTime = Long.parseLong(args[++i]);
} else if (args[i].equals("-irt")) {
iReduceSleepTime = Long.parseLong(args[++i]);
} else if (args[i].equals("-irs")) {
iReduceStagesCount = Integer.parseInt(args[++i]);
} else if (args[i].equals("-recordt")) {
recSleepTime = Long.parseLong(args[++i]);
} else if (args[i].equals("-generateSplitsInAM")) {
if (splitsOptionFound) {
throw new RuntimeException("Cannot use both -generateSplitsInAm and -writeSplitsToDfs together");
}
splitsOptionFound = true;
generateSplitsInAM = Boolean.parseBoolean(args[++i]);
} else if (args[i].equals("-writeSplitsToDfs")) {
if (splitsOptionFound) {
throw new RuntimeException("Cannot use both -generateSplitsInAm and -writeSplitsToDfs together");
}
splitsOptionFound = true;
writeSplitsToDfs = Boolean.parseBoolean(args[++i]);
} else if (args[i].equals("-numDags")) {
numDags = Integer.parseInt(args[++i]);
if (numDags < 1) {
throw new RuntimeException("numDags should be positive");
}
isSession = numDags > 1;
}
}
if (numIReducer > 0 && numReducer <= 0) {
throw new RuntimeException("Cannot have intermediate reduces without" + " a final reduce");
}
// sleep for *SleepTime duration in Task by recSleepTime per record
mapSleepCount = (int) Math.ceil(mapSleepTime / ((double) recSleepTime));
reduceSleepCount = (int) Math.ceil(reduceSleepTime / ((double) recSleepTime));
iReduceSleepCount = (int) Math.ceil(iReduceSleepTime / ((double) recSleepTime));
TezConfiguration conf = new TezConfiguration(getConf());
conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT));
String stagingBaseDir = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT);
Path stagingDir = new Path(stagingBaseDir, Long.toString(System.currentTimeMillis()));
stagingDir = stagingDir.getFileSystem(conf).makeQualified(stagingDir);
TezClientUtils.ensureStagingDirExists(conf, stagingDir);
DAG dag = createDAG(conf, stagingDir, numMapper, numReducer, iReduceStagesCount, numIReducer, mapSleepTime, mapSleepCount, reduceSleepTime, reduceSleepCount, iReduceSleepTime, iReduceSleepCount, writeSplitsToDfs, generateSplitsInAM);
TezClient tezSession = TezClient.create("MRRSleep", conf, isSession, null, credentials);
tezSession.start();
try {
for (; numDags > 0; --numDags) {
DAGClient dagClient = tezSession.submitDAG(dag);
dagClient.waitForCompletion();
if (!dagClient.getDAGStatus(null).getState().equals(DAGStatus.State.SUCCEEDED)) {
return 1;
}
}
} finally {
tezSession.stop();
}
return 0;
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class TestOrderedWordCount method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
boolean generateSplitsInClient;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true);
long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000;
boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false);
boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true);
// TODO needs to use auto reduce parallelism
int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2);
int maxDataLengthThroughIPC = conf.getInt(MAX_IPC_DATA_LENGTH, -1);
int exceedDataLimit = conf.getInt(EXCEED_IPC_DATA_LIMIT, 3);
if (maxDataLengthThroughIPC > 0) {
conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, maxDataLengthThroughIPC * 1024 * 1024);
}
if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) {
printUsage();
return 2;
}
List<String> inputPaths = new ArrayList<String>();
List<String> outputPaths = new ArrayList<String>();
TezConfiguration tezConf = new TezConfiguration(conf);
for (int i = 0; i < otherArgs.length; i += 2) {
FileSystem inputPathFs = new Path(otherArgs[i]).getFileSystem(tezConf);
inputPaths.add(inputPathFs.makeQualified(new Path(otherArgs[i])).toString());
FileSystem outputPathFs = new Path(otherArgs[i + 1]).getFileSystem(tezConf);
outputPaths.add(outputPathFs.makeQualified(new Path(otherArgs[i + 1])).toString());
}
UserGroupInformation.setConfiguration(conf);
HadoopShim hadoopShim = new HadoopShimsLoader(tezConf).getHadoopShim();
TestOrderedWordCount instance = new TestOrderedWordCount();
FileSystem fs = FileSystem.get(conf);
String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
Path stagingDir = new Path(stagingDirStr);
FileSystem pathFs = stagingDir.getFileSystem(tezConf);
pathFs.mkdirs(new Path(stagingDirStr));
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
stagingDir = pathFs.makeQualified(new Path(stagingDirStr));
TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf);
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
if (useTezSession) {
LOG.info("Creating Tez Session");
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
} else {
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
}
TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials);
tezSession.start();
if (tezSession.getAppMasterApplicationId() != null) {
TezUtilsInternal.setHadoopCallerContext(hadoopShim, tezSession.getAppMasterApplicationId());
}
DAGStatus dagStatus = null;
DAGClient dagClient = null;
String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" };
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
try {
for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
if (dagIndex != 1 && interJobSleepTimeout > 0) {
try {
LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000));
Thread.sleep(interJobSleepTimeout);
} catch (InterruptedException e) {
LOG.info("Main thread interrupted. Breaking out of job loop");
break;
}
}
String inputPath = inputPaths.get(dagIndex - 1);
String outputPath = outputPaths.get(dagIndex - 1);
if (fs.exists(new Path(outputPath))) {
throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
}
LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath + ", outputPath=" + outputPath);
Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
DAG dag = instance.createDAG(fs, tezConf, localResources, stagingDir, dagIndex, inputPath, outputPath, generateSplitsInClient, useMRSettings, intermediateNumReduceTasks, maxDataLengthThroughIPC, exceedDataLimit);
String callerType = "TestOrderedWordCount";
String callerId = tezSession.getAppMasterApplicationId() == null ? ("UnknownApp_" + System.currentTimeMillis() + dagIndex) : (tezSession.getAppMasterApplicationId().toString() + "_" + dagIndex);
dag.setCallerContext(CallerContext.create("Tez", callerId, callerType, "TestOrderedWordCount Job"));
boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true);
int preWarmNumContainers = 0;
if (doPreWarm) {
preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
if (preWarmNumContainers <= 0) {
doPreWarm = false;
}
}
if (doPreWarm) {
LOG.info("Pre-warming Session");
PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers, dag.getVertex("initialmap").getTaskResource());
preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles());
preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment());
preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts());
tezSession.preWarm(preWarmVertex);
}
if (useTezSession) {
LOG.info("Waiting for TezSession to get into ready state");
waitForTezSessionReady(tezSession);
LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
} else {
LOG.info("Submitting DAG as a new Tez Application");
dagClient = tezSession.submitDAG(dag);
}
while (true) {
dagStatus = dagClient.getDAGStatus(statusGetOpts);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() != DAGStatus.State.SUCCEEDED && dagStatus.getState() != DAGStatus.State.FAILED && dagStatus.getState() != DAGStatus.State.KILLED && dagStatus.getState() != DAGStatus.State.ERROR) {
if (dagStatus.getState() == DAGStatus.State.RUNNING) {
ExampleDriver.printDAGStatus(dagClient, vNames);
}
try {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(statusGetOpts);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState());
if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics());
}
}
} catch (Exception e) {
LOG.error("Error occurred when submitting/running DAGs", e);
throw e;
} finally {
if (!retainStagingDir) {
pathFs.delete(stagingDir, true);
}
LOG.info("Shutting down session");
tezSession.stop();
}
if (!useTezSession) {
ExampleDriver.printDAGStatus(dagClient, vNames);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
}
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class BroadcastAndOneToOneExample method run.
public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception {
System.out.println("Running BroadcastAndOneToOneExample");
// conf and UGI
TezConfiguration tezConf;
if (conf != null) {
tezConf = new TezConfiguration(conf);
} else {
tezConf = new TezConfiguration();
}
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
UserGroupInformation.setConfiguration(tezConf);
// staging dir
FileSystem fs = FileSystem.get(tezConf);
String stagingDirStr = tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + "BroadcastAndOneToOneExample" + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
Path stagingDir = new Path(stagingDirStr);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
stagingDir = fs.makeQualified(stagingDir);
// No need to add jar containing this class as assumed to be part of
// the tez jars.
// TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
// is the same filesystem as the one used for Input/Output.
TezClient tezSession = null;
// needs session or else TaskScheduler does not hold onto containers
tezSession = TezClient.create("broadcastAndOneToOneExample", tezConf);
tezSession.start();
DAGClient dagClient = null;
try {
DAG dag = createDAG(fs, tezConf, stagingDir, doLocalityCheck);
tezSession.waitTillReady();
dagClient = tezSession.submitDAG(dag);
// monitoring
DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
return false;
}
return true;
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class FilterLinesByWordOneToOne method run.
@Override
public int run(String[] otherArgs) throws Exception {
boolean generateSplitsInClient = false;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
if (otherArgs.length != 3) {
printUsage();
return 2;
}
String inputPath = otherArgs[0];
String outputPath = otherArgs[1];
String filterWord = otherArgs[2];
Configuration conf = getConf();
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputPath))) {
System.err.println("Output directory : " + outputPath + " already exists");
return 2;
}
TezConfiguration tezConf = new TezConfiguration(conf);
fs.getWorkingDirectory();
Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
String jarPath = ClassUtil.findContainingJar(FilterLinesByWordOneToOne.class);
if (jarPath == null) {
throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWordOneToOne.class.getName() + " in the classpath");
}
Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, null);
// Why do I need to start the TezSession.
tezSession.start();
Configuration stage1Conf = new JobConf(conf);
stage1Conf.set(FILTER_PARAM_NAME, filterWord);
Configuration stage2Conf = new JobConf(conf);
stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
stage2Conf.setBoolean("mapred.mapper.new-api", false);
UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
// Setup stage1 Vertex
Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
DataSourceDescriptor dsd;
if (generateSplitsInClient) {
// TODO TEZ-1406. Dont' use MRInputLegacy
stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
stage1Conf.setBoolean("mapred.mapper.new-api", false);
dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
} else {
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
}
stage1Vertex.addDataSource("MRInput", dsd);
// Setup stage2 Vertex
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), dsd.getNumberOfShards());
stage2Vertex.addTaskLocalFiles(commonLocalResources);
// Configure the Output for stage2
stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
DAG dag = DAG.create("FilterLinesByWord");
Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultOneToOneEdgeProperty());
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session");
DAGStatus dagStatus = null;
String[] vNames = { "stage1", "stage2" };
try {
while (true) {
dagStatus = dagClient.getDAGStatus(null);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() == DAGStatus.State.RUNNING) {
try {
ExampleDriver.printDAGStatus(dagClient, vNames);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(null);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
ExampleDriver.printDAGStatus(dagClient, vNames);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
Aggregations