Search in sources :

Example 1 with HadoopShim

use of org.apache.tez.hadoop.shim.HadoopShim in project tez by apache.

the class TestOrderedWordCount method run.

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    boolean generateSplitsInClient;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }
    boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true);
    long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000;
    boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false);
    boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true);
    // TODO needs to use auto reduce parallelism
    int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2);
    int maxDataLengthThroughIPC = conf.getInt(MAX_IPC_DATA_LENGTH, -1);
    int exceedDataLimit = conf.getInt(EXCEED_IPC_DATA_LIMIT, 3);
    if (maxDataLengthThroughIPC > 0) {
        conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, maxDataLengthThroughIPC * 1024 * 1024);
    }
    if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) {
        printUsage();
        return 2;
    }
    List<String> inputPaths = new ArrayList<String>();
    List<String> outputPaths = new ArrayList<String>();
    TezConfiguration tezConf = new TezConfiguration(conf);
    for (int i = 0; i < otherArgs.length; i += 2) {
        FileSystem inputPathFs = new Path(otherArgs[i]).getFileSystem(tezConf);
        inputPaths.add(inputPathFs.makeQualified(new Path(otherArgs[i])).toString());
        FileSystem outputPathFs = new Path(otherArgs[i + 1]).getFileSystem(tezConf);
        outputPaths.add(outputPathFs.makeQualified(new Path(otherArgs[i + 1])).toString());
    }
    UserGroupInformation.setConfiguration(conf);
    HadoopShim hadoopShim = new HadoopShimsLoader(tezConf).getHadoopShim();
    TestOrderedWordCount instance = new TestOrderedWordCount();
    FileSystem fs = FileSystem.get(conf);
    String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    FileSystem pathFs = stagingDir.getFileSystem(tezConf);
    pathFs.mkdirs(new Path(stagingDirStr));
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = pathFs.makeQualified(new Path(stagingDirStr));
    TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf);
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
    if (useTezSession) {
        LOG.info("Creating Tez Session");
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
    } else {
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
    }
    TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials);
    tezSession.start();
    if (tezSession.getAppMasterApplicationId() != null) {
        TezUtilsInternal.setHadoopCallerContext(hadoopShim, tezSession.getAppMasterApplicationId());
    }
    DAGStatus dagStatus = null;
    DAGClient dagClient = null;
    String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" };
    Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
    try {
        for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
            if (dagIndex != 1 && interJobSleepTimeout > 0) {
                try {
                    LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000));
                    Thread.sleep(interJobSleepTimeout);
                } catch (InterruptedException e) {
                    LOG.info("Main thread interrupted. Breaking out of job loop");
                    break;
                }
            }
            String inputPath = inputPaths.get(dagIndex - 1);
            String outputPath = outputPaths.get(dagIndex - 1);
            if (fs.exists(new Path(outputPath))) {
                throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
            }
            LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath + ", outputPath=" + outputPath);
            Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
            DAG dag = instance.createDAG(fs, tezConf, localResources, stagingDir, dagIndex, inputPath, outputPath, generateSplitsInClient, useMRSettings, intermediateNumReduceTasks, maxDataLengthThroughIPC, exceedDataLimit);
            String callerType = "TestOrderedWordCount";
            String callerId = tezSession.getAppMasterApplicationId() == null ? ("UnknownApp_" + System.currentTimeMillis() + dagIndex) : (tezSession.getAppMasterApplicationId().toString() + "_" + dagIndex);
            dag.setCallerContext(CallerContext.create("Tez", callerId, callerType, "TestOrderedWordCount Job"));
            boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true);
            int preWarmNumContainers = 0;
            if (doPreWarm) {
                preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
                if (preWarmNumContainers <= 0) {
                    doPreWarm = false;
                }
            }
            if (doPreWarm) {
                LOG.info("Pre-warming Session");
                PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers, dag.getVertex("initialmap").getTaskResource());
                preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles());
                preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment());
                preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts());
                tezSession.preWarm(preWarmVertex);
            }
            if (useTezSession) {
                LOG.info("Waiting for TezSession to get into ready state");
                waitForTezSessionReady(tezSession);
                LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
                dagClient = tezSession.submitDAG(dag);
                LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
            } else {
                LOG.info("Submitting DAG as a new Tez Application");
                dagClient = tezSession.submitDAG(dag);
            }
            while (true) {
                dagStatus = dagClient.getDAGStatus(statusGetOpts);
                if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
                    break;
                }
                try {
                    Thread.sleep(500);
                } catch (InterruptedException e) {
                // continue;
                }
            }
            while (dagStatus.getState() != DAGStatus.State.SUCCEEDED && dagStatus.getState() != DAGStatus.State.FAILED && dagStatus.getState() != DAGStatus.State.KILLED && dagStatus.getState() != DAGStatus.State.ERROR) {
                if (dagStatus.getState() == DAGStatus.State.RUNNING) {
                    ExampleDriver.printDAGStatus(dagClient, vNames);
                }
                try {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                    // continue;
                    }
                    dagStatus = dagClient.getDAGStatus(statusGetOpts);
                } catch (TezException e) {
                    LOG.error("Failed to get application progress. Exiting");
                    return -1;
                }
            }
            ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
            LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState());
            if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
                LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics());
            }
        }
    } catch (Exception e) {
        LOG.error("Error occurred when submitting/running DAGs", e);
        throw e;
    } finally {
        if (!retainStagingDir) {
            pathFs.delete(stagingDir, true);
        }
        LOG.info("Shutting down session");
        tezSession.stop();
    }
    if (!useTezSession) {
        ExampleDriver.printDAGStatus(dagClient, vNames);
        LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    }
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
Also used : TezException(org.apache.tez.dag.api.TezException) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) HadoopShim(org.apache.tez.hadoop.shim.HadoopShim) ArrayList(java.util.ArrayList) HadoopShimsLoader(org.apache.tez.hadoop.shim.HadoopShimsLoader) TezClient(org.apache.tez.client.TezClient) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) FileSystem(org.apache.hadoop.fs.FileSystem) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) DAG(org.apache.tez.dag.api.DAG) TreeMap(java.util.TreeMap) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) ParseException(org.apache.commons.cli.ParseException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) SplitsInClientOptionParser(org.apache.tez.mapreduce.examples.helpers.SplitsInClientOptionParser) DAGClient(org.apache.tez.dag.api.client.DAGClient) ParseException(org.apache.commons.cli.ParseException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 2 with HadoopShim

use of org.apache.tez.hadoop.shim.HadoopShim in project tez by apache.

the class TestHistoryEventHandler method createHandler.

private HistoryEventHandler createHandler(HistoryLogLevel logLevel) {
    Configuration conf = new Configuration(baseConfig);
    conf.setBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, false);
    conf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, InMemoryHistoryLoggingService.class.getName());
    if (logLevel != null) {
        conf.setEnum(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL, logLevel);
    }
    DAG dag = mock(DAG.class);
    when(dag.getConf()).thenReturn(conf);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getApplicationID()).thenReturn(appId);
    when(appContext.getHadoopShim()).thenReturn(new HadoopShim() {
    });
    when(appContext.getAMConf()).thenReturn(conf);
    when(appContext.getCurrentDAG()).thenReturn(dag);
    HistoryEventHandler handler = new HistoryEventHandler(appContext);
    handler.init(conf);
    return handler;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HadoopShim(org.apache.tez.hadoop.shim.HadoopShim) AppContext(org.apache.tez.dag.app.AppContext) DAG(org.apache.tez.dag.app.dag.DAG)

Example 3 with HadoopShim

use of org.apache.tez.hadoop.shim.HadoopShim in project tez by apache.

the class TestATSV15HistoryLoggingService method createService.

private ATSV15HistoryLoggingService createService(int numDagsPerGroup) throws IOException, YarnException {
    ATSV15HistoryLoggingService service = new ATSV15HistoryLoggingService();
    appContext = mock(AppContext.class);
    when(appContext.getApplicationID()).thenReturn(appId);
    when(appContext.getHadoopShim()).thenReturn(new HadoopShim() {
    });
    service.setAppContext(appContext);
    Configuration conf = new Configuration(false);
    if (numDagsPerGroup != -1) {
        conf.setInt(TezConfiguration.TEZ_HISTORY_LOGGING_TIMELINE_NUM_DAGS_PER_GROUP, numDagsPerGroup);
    }
    service.init(conf);
    // Set timeline service.
    timelineClient = mock(TimelineClient.class);
    entityLog = new HashMap<>();
    // timelineClient.init(conf);
    when(timelineClient.getDelegationToken(anyString())).thenReturn(null);
    when(timelineClient.renewDelegationToken(Matchers.<Token<TimelineDelegationTokenIdentifier>>any())).thenReturn(0L);
    when(timelineClient.putEntities(Matchers.<TimelineEntity>anyVararg())).thenAnswer(new Answer() {

        @Override
        public TimelinePutResponse answer(InvocationOnMock invocation) throws Throwable {
            return putEntityHelper(DEFAULT_GROUP_ID, invocation.getArguments(), 0);
        }
    });
    when(timelineClient.putEntities(any(ApplicationAttemptId.class), any(TimelineEntityGroupId.class), Matchers.<TimelineEntity>anyVararg())).thenAnswer(new Answer() {

        @Override
        public TimelinePutResponse answer(InvocationOnMock invocation) throws Throwable {
            return putEntityHelper(invocation.getArgumentAt(1, TimelineEntityGroupId.class), invocation.getArguments(), 2);
        }
    });
    service.timelineClient = timelineClient;
    return service;
}
Also used : HadoopShim(org.apache.tez.hadoop.shim.HadoopShim) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) AppContext(org.apache.tez.dag.app.AppContext) TimelineDelegationTokenIdentifier(org.apache.hadoop.yarn.security.client.TimelineDelegationTokenIdentifier) TimelinePutResponse(org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TimelineEntityGroupId(org.apache.hadoop.yarn.api.records.timeline.TimelineEntityGroupId) TimelineClient(org.apache.hadoop.yarn.client.api.TimelineClient) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock)

Example 4 with HadoopShim

use of org.apache.tez.hadoop.shim.HadoopShim in project tez by apache.

the class TezChild method main.

public static void main(String[] args) throws IOException, InterruptedException, TezException {
    final Configuration defaultConf = new Configuration();
    Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
    final String pid = System.getenv().get("JVM_PID");
    assert args.length == 5;
    String host = args[0];
    int port = Integer.parseInt(args[1]);
    final String containerIdentifier = args[2];
    final String tokenIdentifier = args[3];
    final int attemptNumber = Integer.parseInt(args[4]);
    final String[] localDirs = TezCommonUtils.getTrimmedStrings(System.getenv(Environment.LOCAL_DIRS.name()));
    LOG.info("TezChild starting with PID=" + pid + ", containerIdentifier=" + containerIdentifier);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Info from cmd line: AM-host: " + host + " AM-port: " + port + " containerIdentifier: " + containerIdentifier + " appAttemptNumber: " + attemptNumber + " tokenIdentifier: " + tokenIdentifier);
    }
    // Security framework already loaded the tokens into current ugi
    DAGProtos.ConfigurationProto confProto = TezUtilsInternal.readUserSpecifiedTezConfiguration(System.getenv(Environment.PWD.name()));
    TezUtilsInternal.addUserSpecifiedTezConfiguration(defaultConf, confProto.getConfKeyValuesList());
    UserGroupInformation.setConfiguration(defaultConf);
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    HadoopShim hadoopShim = new HadoopShimsLoader(defaultConf).getHadoopShim();
    // log the system properties
    if (LOG.isInfoEnabled()) {
        String systemPropsToLog = TezCommonUtils.getSystemPropertiesToLog(defaultConf);
        if (systemPropsToLog != null) {
            LOG.info(systemPropsToLog);
        }
    }
    TezChild tezChild = newTezChild(defaultConf, host, port, containerIdentifier, tokenIdentifier, attemptNumber, localDirs, System.getenv(Environment.PWD.name()), System.getenv(), pid, new ExecutionContextImpl(System.getenv(Environment.NM_HOST.name())), credentials, Runtime.getRuntime().maxMemory(), System.getenv(ApplicationConstants.Environment.USER.toString()), null, true, hadoopShim);
    tezChild.run();
}
Also used : YarnUncaughtExceptionHandler(org.apache.hadoop.yarn.YarnUncaughtExceptionHandler) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HadoopShim(org.apache.tez.hadoop.shim.HadoopShim) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) DAGProtos(org.apache.tez.dag.api.records.DAGProtos) Credentials(org.apache.hadoop.security.Credentials) HadoopShimsLoader(org.apache.tez.hadoop.shim.HadoopShimsLoader)

Example 5 with HadoopShim

use of org.apache.tez.hadoop.shim.HadoopShim in project tez by apache.

the class TestATSHistoryV15 method testGetGroupId.

@Test
public void testGetGroupId() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(1000l, 1);
    TezDAGID dagid = TezDAGID.getInstance(appId, 1);
    for (final HistoryEventType eventType : HistoryEventType.values()) {
        HistoryEvent historyEvent = new HistoryEvent() {

            @Override
            public HistoryEventType getEventType() {
                return eventType;
            }

            @Override
            public boolean isRecoveryEvent() {
                return false;
            }

            @Override
            public boolean isHistoryEvent() {
                return false;
            }

            @Override
            public void toProtoStream(OutputStream outputStream) throws IOException {
            }

            @Override
            public void fromProtoStream(InputStream inputStream) throws IOException {
            }
        };
        DAGHistoryEvent event = new DAGHistoryEvent(dagid, historyEvent);
        ATSV15HistoryLoggingService service = new ATSV15HistoryLoggingService();
        AppContext appContext = mock(AppContext.class);
        when(appContext.getApplicationID()).thenReturn(appId);
        when(appContext.getHadoopShim()).thenReturn(new HadoopShim() {
        });
        service.setAppContext(appContext);
        TimelineEntityGroupId grpId = service.getGroupId(event);
        Assert.assertNotNull(grpId);
        Assert.assertEquals(appId, grpId.getApplicationId());
        switch(eventType) {
            case AM_LAUNCHED:
            case APP_LAUNCHED:
            case AM_STARTED:
            case CONTAINER_LAUNCHED:
            case CONTAINER_STOPPED:
                Assert.assertEquals(appId.toString(), grpId.getTimelineEntityGroupId());
                break;
            default:
                Assert.assertEquals(dagid.toString(), grpId.getTimelineEntityGroupId());
        }
        service.close();
    }
}
Also used : HadoopShim(org.apache.tez.hadoop.shim.HadoopShim) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) AppContext(org.apache.tez.dag.app.AppContext) TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) HistoryEventType(org.apache.tez.dag.history.HistoryEventType) TimelineEntityGroupId(org.apache.hadoop.yarn.api.records.timeline.TimelineEntityGroupId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) ATSV15HistoryLoggingService(org.apache.tez.dag.history.logging.ats.ATSV15HistoryLoggingService) Test(org.junit.Test)

Aggregations

HadoopShim (org.apache.tez.hadoop.shim.HadoopShim)5 Configuration (org.apache.hadoop.conf.Configuration)4 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4 AppContext (org.apache.tez.dag.app.AppContext)3 TimelineEntityGroupId (org.apache.hadoop.yarn.api.records.timeline.TimelineEntityGroupId)2 HadoopShimsLoader (org.apache.tez.hadoop.shim.HadoopShimsLoader)2 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 ArrayList (java.util.ArrayList)1 TreeMap (java.util.TreeMap)1 ParseException (org.apache.commons.cli.ParseException)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 FileAlreadyExistsException (org.apache.hadoop.mapred.FileAlreadyExistsException)1 Credentials (org.apache.hadoop.security.Credentials)1 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)1 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)1 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1