use of org.apache.hadoop.classification.InterfaceAudience.Private in project tez by apache.
the class MRInput method initializeInternal.
@Private
void initializeInternal() throws IOException {
// Primarily for visibility
rrLock.lock();
try {
if (splitInfoViaEvents) {
if (useNewApi) {
mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
} else {
mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter, getContext());
}
} else {
TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(), thisTaskMetaInfo.getStartOffset());
long splitLength = -1;
if (useNewApi) {
org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES));
try {
splitLength = newInputSplit.getLength();
} catch (InterruptedException e) {
LOG.warn("Got interrupted while reading split length: ", e);
}
mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
} else {
org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES));
splitLength = oldInputSplit.getLength();
mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(), inputRecordCounter, getContext());
}
if (splitLength != -1) {
getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).increment(splitLength);
}
}
} finally {
rrLock.unlock();
}
LOG.info("Initialized MRInput: " + getContext().getSourceVertexName());
}
use of org.apache.hadoop.classification.InterfaceAudience.Private in project tez by apache.
the class TezClientUtils method getAMProxy.
@Private
public static DAGClientAMProtocolBlockingPB getAMProxy(final Configuration conf, String amHost, int amRpcPort, org.apache.hadoop.yarn.api.records.Token clientToAMToken) throws IOException {
final InetSocketAddress serviceAddr = NetUtils.createSocketAddrForHost(amHost, amRpcPort);
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(UserGroupInformation.getCurrentUser().getUserName());
if (clientToAMToken != null) {
Token<ClientToAMTokenIdentifier> token = ConverterUtils.convertFromYarn(clientToAMToken, serviceAddr);
userUgi.addToken(token);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Connecting to Tez AM at " + serviceAddr);
}
DAGClientAMProtocolBlockingPB proxy = null;
try {
proxy = userUgi.doAs(new PrivilegedExceptionAction<DAGClientAMProtocolBlockingPB>() {
@Override
public DAGClientAMProtocolBlockingPB run() throws IOException {
RPC.setProtocolEngine(conf, DAGClientAMProtocolBlockingPB.class, ProtobufRpcEngine.class);
return (DAGClientAMProtocolBlockingPB) RPC.getProxy(DAGClientAMProtocolBlockingPB.class, 0, serviceAddr, conf);
}
});
} catch (InterruptedException e) {
throw new IOException("Failed to connect to AM", e);
}
return proxy;
}
use of org.apache.hadoop.classification.InterfaceAudience.Private in project tez by apache.
the class TezClientUtils method setupDAGCredentials.
/**
* Obtains tokens for the DAG based on the list of URIs setup in the DAG. The
* fetched credentials are populated back into the DAG and can be retrieved
* via dag.getCredentials
*
* @param dag
* the dag for which credentials need to be setup
* @param sessionCredentials
* session credentials which have already been obtained, and will be
* required for the DAG
* @param conf
* @throws IOException
*/
@Private
static Credentials setupDAGCredentials(DAG dag, Credentials sessionCredentials, Configuration conf) throws IOException {
Preconditions.checkNotNull(sessionCredentials);
TezCommonUtils.logCredentials(LOG, sessionCredentials, "session");
Credentials dagCredentials = new Credentials();
// All session creds are required for the DAG.
dagCredentials.mergeAll(sessionCredentials);
// Add additional credentials based on any URIs that the user may have specified.
// Obtain Credentials for any paths that the user may have configured.
addFileSystemCredentialsFromURIs(dag.getURIsForCredentials(), dagCredentials, conf);
// Obtain Credentials for the local resources configured on the DAG
try {
Set<Path> lrPaths = new HashSet<Path>();
for (Vertex v : dag.getVertices()) {
for (LocalResource lr : v.getTaskLocalFiles().values()) {
lrPaths.add(ConverterUtils.getPathFromYarnURL(lr.getResource()));
}
List<DataSourceDescriptor> dataSources = v.getDataSources();
for (DataSourceDescriptor dataSource : dataSources) {
addFileSystemCredentialsFromURIs(dataSource.getURIsForCredentials(), dagCredentials, conf);
}
List<DataSinkDescriptor> dataSinks = v.getDataSinks();
for (DataSinkDescriptor dataSink : dataSinks) {
addFileSystemCredentialsFromURIs(dataSink.getURIsForCredentials(), dagCredentials, conf);
}
}
for (LocalResource lr : dag.getTaskLocalFiles().values()) {
lrPaths.add(ConverterUtils.getPathFromYarnURL(lr.getResource()));
}
Path[] paths = lrPaths.toArray(new Path[lrPaths.size()]);
TokenCache.obtainTokensForFileSystems(dagCredentials, paths, conf);
} catch (URISyntaxException e) {
throw new IOException(e);
}
return dagCredentials;
}
use of org.apache.hadoop.classification.InterfaceAudience.Private in project tez by apache.
the class TezClientUtils method createApplicationSubmissionContext.
/**
* Create an ApplicationSubmissionContext to launch a Tez AM
* @param appId Application Id
* @param dag DAG to be submitted
* @param amName Name for the application
* @param amConfig AM Configuration
* @param tezJarResources Resources to be used by the AM
* @param sessionCreds the credential object which will be populated with session specific
* @param servicePluginsDescriptor descriptor for services which may be running in the AM
* @return an ApplicationSubmissionContext to launch a Tez AM
* @throws IOException
* @throws YarnException
*/
@Private
@VisibleForTesting
public static ApplicationSubmissionContext createApplicationSubmissionContext(ApplicationId appId, DAG dag, String amName, AMConfiguration amConfig, Map<String, LocalResource> tezJarResources, Credentials sessionCreds, boolean tezLrsAsArchive, TezApiVersionInfo apiVersionInfo, ServicePluginsDescriptor servicePluginsDescriptor, JavaOptsChecker javaOptsChecker) throws IOException, YarnException {
Preconditions.checkNotNull(sessionCreds);
TezConfiguration conf = amConfig.getTezConfiguration();
FileSystem fs = TezClientUtils.ensureStagingDirExists(conf, TezCommonUtils.getTezBaseStagingPath(conf));
String strAppId = appId.toString();
Path tezSysStagingPath = TezCommonUtils.createTezSystemStagingPath(conf, strAppId);
Path binaryConfPath = TezCommonUtils.getTezConfStagingPath(tezSysStagingPath);
binaryConfPath = fs.makeQualified(binaryConfPath);
// Setup resource requirements
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB_DEFAULT));
capability.setVirtualCores(amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES_DEFAULT));
if (LOG.isDebugEnabled()) {
LOG.debug("AppMaster capability = " + capability);
}
// Setup required Credentials for the AM launch. DAG specific credentials
// are handled separately.
ByteBuffer securityTokens = null;
// Setup security tokens
Credentials amLaunchCredentials = new Credentials();
if (amConfig.getCredentials() != null) {
amLaunchCredentials.addAll(amConfig.getCredentials());
}
// Add Staging dir creds to the list of session credentials.
TokenCache.obtainTokensForFileSystems(sessionCreds, new Path[] { binaryConfPath }, conf);
// Add session specific credentials to the AM credentials.
amLaunchCredentials.mergeAll(sessionCreds);
DataOutputBuffer dob = new DataOutputBuffer();
amLaunchCredentials.writeTokenStorageToStream(dob);
securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
// Setup the command to run the AM
List<String> vargs = new ArrayList<String>(8);
vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
String amOpts = constructAMLaunchOpts(amConfig.getTezConfiguration(), capability);
vargs.add(amOpts);
String amLogLevelString = amConfig.getTezConfiguration().get(TezConfiguration.TEZ_AM_LOG_LEVEL, TezConfiguration.TEZ_AM_LOG_LEVEL_DEFAULT);
String[] amLogParams = parseLogParams(amLogLevelString);
String amLogLevel = amLogParams[0];
maybeAddDefaultLoggingJavaOpts(amLogLevel, vargs);
// FIX sun bug mentioned in TEZ-327
vargs.add("-Dsun.nio.ch.bugLevel=''");
vargs.add(TezConstants.TEZ_APPLICATION_MASTER_CLASS);
if (dag == null) {
vargs.add("--" + TezConstants.TEZ_SESSION_MODE_CLI_OPTION);
}
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + File.separator + ApplicationConstants.STDOUT);
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + File.separator + ApplicationConstants.STDERR);
Vector<String> vargsFinal = new Vector<String>(8);
// Final command
StringBuilder mergedCommand = new StringBuilder();
for (CharSequence str : vargs) {
mergedCommand.append(str).append(" ");
}
vargsFinal.add(mergedCommand.toString());
if (LOG.isDebugEnabled()) {
LOG.debug("Command to launch container for ApplicationMaster is : " + mergedCommand);
}
Map<String, String> environment = new TreeMap<String, String>();
TezYARNUtils.setupDefaultEnv(environment, conf, TezConfiguration.TEZ_AM_LAUNCH_ENV, TezConfiguration.TEZ_AM_LAUNCH_ENV_DEFAULT, TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_ENV, TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_ENV_DEFAULT, tezLrsAsArchive);
addVersionInfoToEnv(environment, apiVersionInfo);
addLogParamsToEnv(environment, amLogParams);
Map<String, LocalResource> amLocalResources = new TreeMap<String, LocalResource>();
// Not fetching credentials for AMLocalResources. Expect this to be provided via AMCredentials.
if (amConfig.getAMLocalResources() != null) {
amLocalResources.putAll(amConfig.getAMLocalResources());
}
amLocalResources.putAll(tezJarResources);
TezConfiguration tezConf = amConfig.getTezConfiguration();
// Merge the dag access controls into tez am config.
if (dag != null && dag.getDagAccessControls() != null) {
// Merge updates the conf object passed. In non session mode, same client object can be used
// to submit multiple dags, copying this prevents ACL of one DAG from being used in another.
tezConf = new TezConfiguration(amConfig.getTezConfiguration());
dag.getDagAccessControls().mergeIntoAmAcls(tezConf);
}
// don't overwrite existing conf, needed for TezClient.getClient() so existing containers have stable resource fingerprints
if (!binaryConfPath.getFileSystem(tezConf).exists(binaryConfPath)) {
ConfigurationProto finalConfProto = createFinalConfProtoForApp(tezConf, servicePluginsDescriptor);
FSDataOutputStream amConfPBOutBinaryStream = null;
try {
amConfPBOutBinaryStream = TezCommonUtils.createFileForAM(fs, binaryConfPath);
finalConfProto.writeTo(amConfPBOutBinaryStream);
} finally {
if (amConfPBOutBinaryStream != null) {
amConfPBOutBinaryStream.close();
}
}
}
LocalResource binaryConfLRsrc = TezClientUtils.createLocalResource(fs, binaryConfPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION);
amConfig.setBinaryConfLR(binaryConfLRsrc);
amLocalResources.put(TezConstants.TEZ_PB_BINARY_CONF_NAME, binaryConfLRsrc);
// Create Session Jars definition to be sent to AM as a local resource
Path sessionJarsPath = TezCommonUtils.getTezAMJarStagingPath(tezSysStagingPath);
FSDataOutputStream sessionJarsPBOutStream = null;
try {
sessionJarsPBOutStream = TezCommonUtils.createFileForAM(fs, sessionJarsPath);
// Write out the initial list of resources which will be available in the AM
DAGProtos.PlanLocalResourcesProto amResourceProto;
if (amLocalResources != null && !amLocalResources.isEmpty()) {
amResourceProto = DagTypeConverters.convertFromLocalResources(amLocalResources);
} else {
amResourceProto = DAGProtos.PlanLocalResourcesProto.getDefaultInstance();
}
amResourceProto.writeDelimitedTo(sessionJarsPBOutStream);
} finally {
if (sessionJarsPBOutStream != null) {
sessionJarsPBOutStream.close();
}
}
LocalResource sessionJarsPBLRsrc = TezClientUtils.createLocalResource(fs, sessionJarsPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION);
amLocalResources.put(TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME, sessionJarsPBLRsrc);
String user = UserGroupInformation.getCurrentUser().getShortUserName();
ACLManager aclManager = new ACLManager(user, amConfig.getTezConfiguration());
Map<ApplicationAccessType, String> acls = aclManager.toYARNACls();
if (dag != null) {
DAGPlan dagPB = prepareAndCreateDAGPlan(dag, amConfig, tezJarResources, tezLrsAsArchive, sessionCreds, servicePluginsDescriptor, javaOptsChecker);
// emit protobuf DAG file style
Path binaryPath = TezCommonUtils.getTezBinPlanStagingPath(tezSysStagingPath);
if (LOG.isDebugEnabled()) {
LOG.debug("Stage directory information for AppId :" + appId + " tezSysStagingPath :" + tezSysStagingPath + " binaryConfPath :" + binaryConfPath + " sessionJarsPath :" + sessionJarsPath + " binaryPlanPath :" + binaryPath);
}
FSDataOutputStream dagPBOutBinaryStream = null;
try {
// binary output
dagPBOutBinaryStream = TezCommonUtils.createFileForAM(fs, binaryPath);
dagPB.writeTo(dagPBOutBinaryStream);
} finally {
if (dagPBOutBinaryStream != null) {
dagPBOutBinaryStream.close();
}
}
amLocalResources.put(TezConstants.TEZ_PB_PLAN_BINARY_NAME, TezClientUtils.createLocalResource(fs, binaryPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
if (Level.DEBUG.isGreaterOrEqual(Level.toLevel(amLogLevel))) {
Path textPath = localizeDagPlanAsText(dagPB, fs, amConfig, strAppId, tezSysStagingPath);
amLocalResources.put(TezConstants.TEZ_PB_PLAN_TEXT_NAME, TezClientUtils.createLocalResource(fs, textPath, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
}
}
// Send the shuffle token as part of the AM launch context, so that the NM running the AM can
// provide this to AuxServices running on the AM node - in case tasks run within the AM,
// and no other task runs on this node.
Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
serviceData.put(auxiliaryService, TezCommonUtils.serializeServiceData(TokenCache.getSessionToken(amLaunchCredentials)));
// Setup ContainerLaunchContext for AM container
ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(amLocalResources, environment, vargsFinal, serviceData, securityTokens, acls);
// Set up the ApplicationSubmissionContext
ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
Collection<String> tagsFromConf = amConfig.getTezConfiguration().getTrimmedStringCollection(TezConfiguration.TEZ_APPLICATION_TAGS);
appContext.setApplicationType(TezConstants.TEZ_APPLICATION_TYPE);
if (tagsFromConf != null && !tagsFromConf.isEmpty()) {
appContext.setApplicationTags(new HashSet<String>(tagsFromConf));
}
appContext.setApplicationId(appId);
appContext.setResource(capability);
String queueName = amConfig.getQueueName();
if (queueName != null && !queueName.isEmpty()) {
appContext.setQueue(amConfig.getQueueName());
}
// set the application priority
setApplicationPriority(appContext, amConfig);
appContext.setApplicationName(amName);
appContext.setCancelTokensWhenComplete(amConfig.getTezConfiguration().getBoolean(TezConfiguration.TEZ_CANCEL_DELEGATION_TOKENS_ON_COMPLETION, TezConfiguration.TEZ_CANCEL_DELEGATION_TOKENS_ON_COMPLETION_DEFAULT));
appContext.setAMContainerSpec(amContainer);
appContext.setMaxAppAttempts(amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS_DEFAULT));
return appContext;
}
use of org.apache.hadoop.classification.InterfaceAudience.Private in project tez by apache.
the class TezClientUtils method constructAMLaunchOpts.
@Private
@VisibleForTesting
static String constructAMLaunchOpts(TezConfiguration tezConf, Resource capability) {
String defaultOpts = tezConf.get(TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS, TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS_DEFAULT);
Path tmpDir = new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR);
String amOpts = "-Djava.io.tmpdir=" + tmpDir + " ";
if (defaultOpts != null && !defaultOpts.isEmpty()) {
amOpts = amOpts + defaultOpts + " ";
}
amOpts = amOpts + tezConf.get(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT);
amOpts = maybeAddDefaultMemoryJavaOpts(amOpts, capability, tezConf.getDouble(TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION, TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_DEFAULT));
return amOpts;
}
Aggregations