Search in sources :

Example 1 with ContainerResource

use of com.datatorrent.stram.StreamingContainerManager.ContainerResource in project apex-core by apache.

the class StreamingAppMasterService method execute.

/**
   * Main run function for the application master
   *
   * @throws YarnException
   */
@SuppressWarnings("SleepWhileInLoop")
private void execute() throws YarnException, IOException {
    LOG.info("Starting ApplicationMaster");
    final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("number of tokens: {}", credentials.getAllTokens().size());
    Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
    while (iter.hasNext()) {
        Token<?> token = iter.next();
        LOG.debug("token: {}", token);
    }
    final Configuration conf = getConfig();
    long tokenLifeTime = (long) (dag.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * Math.min(dag.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME), dag.getValue(LogicalPlan.RM_TOKEN_LIFE_TIME)));
    long expiryTime = System.currentTimeMillis() + tokenLifeTime;
    LOG.debug(" expiry token time {}", tokenLifeTime);
    String principal = dag.getValue(LogicalPlan.PRINCIPAL);
    String hdfsKeyTabFile = dag.getValue(LogicalPlan.KEY_TAB_FILE);
    // Register self with ResourceManager
    RegisterApplicationMasterResponse response = amRmClient.registerApplicationMaster(appMasterHostname, 0, appMasterTrackingUrl);
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = response.getMaximumResourceCapability().getMemory();
    int maxVcores = response.getMaximumResourceCapability().getVirtualCores();
    int minMem = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    int minVcores = conf.getInt("yarn.scheduler.minimum-allocation-vcores", 0);
    LOG.info("Max mem {}m, Min mem {}m, Max vcores {} and Min vcores {} capabililty of resources in this cluster ", maxMem, minMem, maxVcores, minVcores);
    long blacklistRemovalTime = dag.getValue(DAGContext.BLACKLISTED_NODE_REMOVAL_TIME_MILLIS);
    int maxConsecutiveContainerFailures = dag.getValue(DAGContext.MAX_CONSECUTIVE_CONTAINER_FAILURES_FOR_BLACKLIST);
    LOG.info("Blacklist removal time in millis = {}, max consecutive node failure count = {}", blacklistRemovalTime, maxConsecutiveContainerFailures);
    // for locality relaxation fall back
    Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources = Maps.newHashMap();
    // Setup heartbeat emitter
    // TODO poll RM every now and then with an empty request to let RM know that we are alive
    // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting:
    // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS
    // The allocate calls to the RM count as heartbeat so, for now, this additional heartbeat emitter
    // is not required.
    int loopCounter = -1;
    long nodeReportUpdateTime = 0;
    // keep track of already requested containers to not request them again while waiting for allocation
    int numRequestedContainers = 0;
    int numReleasedContainers = 0;
    int nextRequestPriority = 0;
    // Use override for resource requestor in case of cloudera distribution, to handle host specific requests
    ResourceRequestHandler resourceRequestor = System.getenv().containsKey("CDH_HADOOP_BIN") ? new BlacklistBasedResourceRequestHandler() : new ResourceRequestHandler();
    List<ContainerStartRequest> pendingContainerStartRequests = new LinkedList<>();
    YarnClient clientRMService = YarnClient.createYarnClient();
    try {
        // YARN-435
        // we need getClusterNodes to populate the initial node list,
        // subsequent updates come through the heartbeat response
        clientRMService.init(conf);
        clientRMService.start();
        ApplicationReport ar = StramClientUtils.getStartedAppInstanceByName(clientRMService, dag.getAttributes().get(DAG.APPLICATION_NAME), UserGroupInformation.getLoginUser().getUserName(), dag.getAttributes().get(DAG.APPLICATION_ID));
        if (ar != null) {
            appDone = true;
            dnmgr.shutdownDiagnosticsMessage = String.format("Application master failed due to application %s with duplicate application name \"%s\" by the same user \"%s\" is already started.", ar.getApplicationId().toString(), ar.getName(), ar.getUser());
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finishApplication(FinalApplicationStatus.FAILED);
            return;
        }
        resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
        nodeReportUpdateTime = System.currentTimeMillis() + UPDATE_NODE_REPORTS_INTERVAL;
    } catch (Exception e) {
        throw new RuntimeException("Failed to retrieve cluster nodes report.", e);
    } finally {
        clientRMService.stop();
    }
    List<Container> containers = response.getContainersFromPreviousAttempts();
    // Running containers might take a while to register with the new app master and send the heartbeat signal.
    int waitForRecovery = containers.size() > 0 ? dag.getValue(LogicalPlan.HEARTBEAT_TIMEOUT_MILLIS) / 1000 : 0;
    List<ContainerId> releasedContainers = previouslyAllocatedContainers(containers);
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    final InetSocketAddress rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);
    while (!appDone) {
        loopCounter++;
        final long currentTimeMillis = System.currentTimeMillis();
        if (UserGroupInformation.isSecurityEnabled() && currentTimeMillis >= expiryTime && hdfsKeyTabFile != null) {
            String applicationId = appAttemptID.getApplicationId().toString();
            expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, FileUtils.getTempDirectoryPath(), applicationId, conf, principal, hdfsKeyTabFile, credentials, rmAddress, true);
        }
        if (currentTimeMillis > nodeReportUpdateTime) {
            resourceRequestor.updateNodeReports(clientRMService.getNodeReports());
            nodeReportUpdateTime = currentTimeMillis + UPDATE_NODE_REPORTS_INTERVAL;
        }
        Runnable r;
        while ((r = this.pendingTasks.poll()) != null) {
            r.run();
        }
        // need not have any available containers
        try {
            sleep(1000);
        } catch (InterruptedException e) {
            LOG.info("Sleep interrupted " + e.getMessage());
        }
        // Setup request to be sent to RM to allocate containers
        List<ContainerRequest> containerRequests = new ArrayList<>();
        List<ContainerRequest> removedContainerRequests = new ArrayList<>();
        // request containers for pending deploy requests
        if (!dnmgr.containerStartRequests.isEmpty()) {
            StreamingContainerAgent.ContainerStartRequest csr;
            while ((csr = dnmgr.containerStartRequests.poll()) != null) {
                if (csr.container.getRequiredMemoryMB() > maxMem) {
                    LOG.warn("Container memory {}m above max threshold of cluster. Using max value {}m.", csr.container.getRequiredMemoryMB(), maxMem);
                    csr.container.setRequiredMemoryMB(maxMem);
                }
                if (csr.container.getRequiredMemoryMB() < minMem) {
                    csr.container.setRequiredMemoryMB(minMem);
                }
                if (csr.container.getRequiredVCores() > maxVcores) {
                    LOG.warn("Container vcores {} above max threshold of cluster. Using max value {}.", csr.container.getRequiredVCores(), maxVcores);
                    csr.container.setRequiredVCores(maxVcores);
                }
                if (csr.container.getRequiredVCores() < minVcores) {
                    csr.container.setRequiredVCores(minVcores);
                }
                csr.container.setResourceRequestPriority(nextRequestPriority++);
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                if (cr == null) {
                    pendingContainerStartRequests.add(csr);
                } else {
                    resourceRequestor.addContainerRequest(requestedResources, loopCounter, containerRequests, csr, cr);
                }
            }
        }
        // If all other requests are allocated, retry pending requests which need host availability
        if (containerRequests.isEmpty() && !pendingContainerStartRequests.isEmpty()) {
            List<ContainerStartRequest> removalList = new LinkedList<>();
            for (ContainerStartRequest csr : pendingContainerStartRequests) {
                ContainerRequest cr = resourceRequestor.createContainerRequest(csr, true);
                if (cr != null) {
                    resourceRequestor.addContainerRequest(requestedResources, loopCounter, containerRequests, csr, cr);
                    removalList.add(csr);
                }
            }
            pendingContainerStartRequests.removeAll(removalList);
        }
        resourceRequestor.reissueContainerRequests(amRmClient, requestedResources, loopCounter, resourceRequestor, containerRequests, removedContainerRequests);
        /* Remove nodes from blacklist after timeout */
        List<String> blacklistRemovals = new ArrayList<>();
        for (String hostname : failedBlackListedNodes) {
            Long timeDiff = currentTimeMillis - failedContainerNodesMap.get(hostname).blackListAdditionTime;
            if (timeDiff >= blacklistRemovalTime) {
                blacklistRemovals.add(hostname);
                failedContainerNodesMap.remove(hostname);
            }
        }
        if (!blacklistRemovals.isEmpty()) {
            amRmClient.updateBlacklist(null, blacklistRemovals);
            LOG.info("Removing nodes {} from blacklist: time elapsed since last blacklisting due to failure is greater than specified timeout", blacklistRemovals.toString());
            failedBlackListedNodes.removeAll(blacklistRemovals);
        }
        numRequestedContainers += containerRequests.size() - removedContainerRequests.size();
        AllocateResponse amResp = sendContainerAskToRM(containerRequests, removedContainerRequests, releasedContainers);
        if (amResp.getAMCommand() != null) {
            LOG.info(" statement executed:{}", amResp.getAMCommand());
            switch(amResp.getAMCommand()) {
                case AM_RESYNC:
                case AM_SHUTDOWN:
                    throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
                default:
                    throw new YarnRuntimeException("Received the " + amResp.getAMCommand() + " command from RM");
            }
        }
        releasedContainers.clear();
        // Retrieve list of allocated containers from the response
        List<Container> newAllocatedContainers = amResp.getAllocatedContainers();
        // LOG.info("Got response from RM for container ask, allocatedCnt=" + newAllocatedContainers.size());
        numRequestedContainers -= newAllocatedContainers.size();
        long timestamp = System.currentTimeMillis();
        for (Container allocatedContainer : newAllocatedContainers) {
            LOG.info("Got new container." + ", containerId=" + allocatedContainer.getId() + ", containerNode=" + allocatedContainer.getNodeId() + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + ", containerResourceMemory" + allocatedContainer.getResource().getMemory() + ", priority" + allocatedContainer.getPriority());
            // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString());
            boolean alreadyAllocated = true;
            StreamingContainerAgent.ContainerStartRequest csr = null;
            for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources.entrySet()) {
                if (entry.getKey().container.getResourceRequestPriority() == allocatedContainer.getPriority().getPriority()) {
                    alreadyAllocated = false;
                    csr = entry.getKey();
                    break;
                }
            }
            if (alreadyAllocated) {
                LOG.info("Releasing {} as resource with priority {} was already assigned", allocatedContainer.getId(), allocatedContainer.getPriority());
                releasedContainers.add(allocatedContainer.getId());
                numReleasedContainers++;
                numRequestedContainers--;
                continue;
            }
            if (csr != null) {
                requestedResources.remove(csr);
            }
            // allocate resource to container
            ContainerResource resource = new ContainerResource(allocatedContainer.getPriority().getPriority(), allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString(), allocatedContainer.getResource().getMemory(), allocatedContainer.getResource().getVirtualCores(), allocatedContainer.getNodeHttpAddress());
            StreamingContainerAgent sca = dnmgr.assignContainer(resource, null);
            if (sca == null) {
                // allocated container no longer needed, add release request
                LOG.warn("Container {} allocated but nothing to deploy, going to release this container.", allocatedContainer.getId());
                releasedContainers.add(allocatedContainer.getId());
            } else {
                AllocatedContainer allocatedContainerHolder = new AllocatedContainer(allocatedContainer);
                this.allocatedContainers.put(allocatedContainer.getId().toString(), allocatedContainerHolder);
                ByteBuffer tokens = null;
                if (UserGroupInformation.isSecurityEnabled()) {
                    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                    Token<StramDelegationTokenIdentifier> delegationToken = allocateDelegationToken(ugi.getUserName(), heartbeatListener.getAddress());
                    allocatedContainerHolder.delegationToken = delegationToken;
                    //ByteBuffer tokens = LaunchContainerRunnable.getTokens(delegationTokenManager, heartbeatListener.getAddress());
                    tokens = LaunchContainerRunnable.getTokens(ugi, delegationToken);
                }
                LaunchContainerRunnable launchContainer = new LaunchContainerRunnable(allocatedContainer, nmClient, sca, tokens);
                // Thread launchThread = new Thread(runnableLaunchContainer);
                // launchThreads.add(launchThread);
                // launchThread.start();
                // communication with NMs is now async
                launchContainer.run();
                // record container start event
                StramEvent ev = new StramEvent.StartContainerEvent(allocatedContainer.getId().toString(), allocatedContainer.getNodeId().toString());
                ev.setTimestamp(timestamp);
                dnmgr.recordEventAsync(ev);
            }
        }
        // track node updates for future locality constraint allocations
        // TODO: it seems 2.0.4-alpha doesn't give us any updates
        resourceRequestor.updateNodeReports(amResp.getUpdatedNodes());
        // Check the completed containers
        List<ContainerStatus> completedContainers = amResp.getCompletedContainersStatuses();
        // LOG.debug("Got response from RM for container ask, completedCnt=" + completedContainers.size());
        List<String> blacklistAdditions = new ArrayList<>();
        for (ContainerStatus containerStatus : completedContainers) {
            LOG.info("Completed containerId=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics());
            // non complete containers should not be here
            assert (containerStatus.getState() == ContainerState.COMPLETE);
            AllocatedContainer allocatedContainer = allocatedContainers.remove(containerStatus.getContainerId().toString());
            if (allocatedContainer != null && allocatedContainer.delegationToken != null) {
                UserGroupInformation ugi = UserGroupInformation.getLoginUser();
                delegationTokenManager.cancelToken(allocatedContainer.delegationToken, ugi.getUserName());
            }
            int exitStatus = containerStatus.getExitStatus();
            if (0 != exitStatus) {
                if (allocatedContainer != null) {
                    numFailedContainers.incrementAndGet();
                    if (exitStatus != 1 && maxConsecutiveContainerFailures != Integer.MAX_VALUE) {
                        // If container failure due to framework
                        String hostname = allocatedContainer.container.getNodeId().getHost();
                        if (!failedBlackListedNodes.contains(hostname)) {
                            // Blacklist the node if not already blacklisted
                            if (failedContainerNodesMap.containsKey(hostname)) {
                                NodeFailureStats stats = failedContainerNodesMap.get(hostname);
                                long timeStamp = System.currentTimeMillis();
                                if (timeStamp - stats.lastFailureTimeStamp >= blacklistRemovalTime) {
                                    // Reset failure count if last failure was before Blacklist removal time
                                    stats.failureCount = 1;
                                    stats.lastFailureTimeStamp = timeStamp;
                                } else {
                                    stats.lastFailureTimeStamp = timeStamp;
                                    stats.failureCount++;
                                    if (stats.failureCount >= maxConsecutiveContainerFailures) {
                                        LOG.info("Node {} failed {} times consecutively within {} minutes, marking the node blacklisted", hostname, stats.failureCount, blacklistRemovalTime / (60 * 1000));
                                        blacklistAdditions.add(hostname);
                                        failedBlackListedNodes.add(hostname);
                                    }
                                }
                            } else {
                                failedContainerNodesMap.put(hostname, new NodeFailureStats(System.currentTimeMillis(), 1));
                            }
                        }
                    }
                }
                //          if (exitStatus == 1) {
                //            // non-recoverable StreamingContainer failure
                //            appDone = true;
                //            finalStatus = FinalApplicationStatus.FAILED;
                //            dnmgr.shutdownDiagnosticsMessage = "Unrecoverable failure " + containerStatus.getContainerId();
                //            LOG.info("Exiting due to: {}", dnmgr.shutdownDiagnosticsMessage);
                //          }
                //          else {
                // Recoverable failure or process killed (externally or via stop request by AM)
                // also occurs when a container was released by the application but never assigned/launched
                LOG.debug("Container {} failed or killed.", containerStatus.getContainerId());
                dnmgr.scheduleContainerRestart(containerStatus.getContainerId().toString());
            //          }
            } else {
                // container completed successfully
                numCompletedContainers.incrementAndGet();
                LOG.info("Container completed successfully." + ", containerId=" + containerStatus.getContainerId());
                // Reset counter for node failure, if exists
                String hostname = allocatedContainer.container.getNodeId().getHost();
                NodeFailureStats stats = failedContainerNodesMap.get(hostname);
                if (stats != null) {
                    stats.failureCount = 0;
                }
            }
            String containerIdStr = containerStatus.getContainerId().toString();
            dnmgr.removeContainerAgent(containerIdStr);
            // record container stop event
            StramEvent ev = new StramEvent.StopContainerEvent(containerIdStr, containerStatus.getExitStatus());
            ev.setReason(containerStatus.getDiagnostics());
            dnmgr.recordEventAsync(ev);
        }
        if (!blacklistAdditions.isEmpty()) {
            amRmClient.updateBlacklist(blacklistAdditions, null);
            long timeStamp = System.currentTimeMillis();
            for (String hostname : blacklistAdditions) {
                NodeFailureStats stats = failedContainerNodesMap.get(hostname);
                stats.blackListAdditionTime = timeStamp;
            }
        }
        if (dnmgr.forcedShutdown) {
            LOG.info("Forced shutdown due to {}", dnmgr.shutdownDiagnosticsMessage);
            finalStatus = FinalApplicationStatus.FAILED;
            appDone = true;
        } else if (allocatedContainers.isEmpty() && numRequestedContainers == 0 && dnmgr.containerStartRequests.isEmpty()) {
            LOG.debug("Exiting as no more containers are allocated or requested");
            finalStatus = FinalApplicationStatus.SUCCEEDED;
            appDone = true;
        }
        LOG.debug("Current application state: loop={}, appDone={}, requested={}, released={}, completed={}, failed={}, currentAllocated={}, dnmgr.containerStartRequests={}", loopCounter, appDone, numRequestedContainers, numReleasedContainers, numCompletedContainers, numFailedContainers, allocatedContainers.size(), dnmgr.containerStartRequests);
        // monitor child containers
        dnmgr.monitorHeartbeat(waitForRecovery > 0);
        waitForRecovery = Math.max(waitForRecovery - 1, 0);
    }
    finishApplication(finalStatus);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) Token(org.apache.hadoop.security.token.Token) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) PTContainer(com.datatorrent.stram.plan.physical.PTContainer) Container(org.apache.hadoop.yarn.api.records.Container) StreamingContainer(com.datatorrent.stram.engine.StreamingContainer) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerStartRequest(com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) ContainerStartRequest(com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest) LinkedList(java.util.LinkedList) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ContainerResource(com.datatorrent.stram.StreamingContainerManager.ContainerResource) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) Map(java.util.Map) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) StramDelegationTokenIdentifier(com.datatorrent.stram.security.StramDelegationTokenIdentifier) StramEvent(com.datatorrent.stram.api.StramEvent) MutablePair(org.apache.commons.lang3.tuple.MutablePair) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ByteBuffer(java.nio.ByteBuffer) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) Credentials(org.apache.hadoop.security.Credentials)

Example 2 with ContainerResource

use of com.datatorrent.stram.StreamingContainerManager.ContainerResource in project apex-core by apache.

the class StreamingContainerManagerTest method testProcessHeartbeat.

@Test
public void testProcessHeartbeat() throws Exception {
    TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    dag.setOperatorAttribute(o1, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[] { new PartitioningTest.PartitionLoadWatch() }));
    dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
    StreamingContainerManager scm = new StreamingContainerManager(dag);
    PhysicalPlan plan = scm.getPhysicalPlan();
    Assert.assertEquals("number required containers", 1, plan.getContainers().size());
    PTOperator o1p1 = plan.getOperators(dag.getMeta(o1)).get(0);
    // assign container
    String containerId = "container1";
    StreamingContainerAgent sca = scm.assignContainer(new ContainerResource(0, containerId, "localhost", 512, 0, null), InetSocketAddress.createUnresolved("localhost", 0));
    Assert.assertNotNull(sca);
    Assert.assertEquals(PTContainer.State.ALLOCATED, o1p1.getContainer().getState());
    Assert.assertEquals(PTOperator.State.PENDING_DEPLOY, o1p1.getState());
    ContainerStats cstats = new ContainerStats(containerId);
    ContainerHeartbeat hb = new ContainerHeartbeat();
    hb.setContainerStats(cstats);
    // get deploy request
    ContainerHeartbeatResponse chr = scm.processHeartbeat(hb);
    Assert.assertNotNull(chr.deployRequest);
    Assert.assertEquals("" + chr.deployRequest, 1, chr.deployRequest.size());
    Assert.assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState());
    Assert.assertEquals("state " + o1p1, PTOperator.State.PENDING_DEPLOY, o1p1.getState());
    // first operator heartbeat
    OperatorHeartbeat ohb = new OperatorHeartbeat();
    ohb.setNodeId(o1p1.getId());
    ohb.setState(OperatorHeartbeat.DeployState.ACTIVE);
    OperatorStats stats = new OperatorStats();
    stats.checkpoint = new Checkpoint(2, 0, 0);
    stats.windowId = 3;
    stats.outputPorts = Lists.newArrayList();
    PortStats ps = new PortStats(TestGeneratorInputOperator.OUTPUT_PORT);
    ps.bufferServerBytes = 101;
    ps.tupleCount = 1;
    stats.outputPorts.add(ps);
    ohb.windowStats = Lists.newArrayList(stats);
    cstats.operators.add(ohb);
    // activate operator
    scm.processHeartbeat(hb);
    Assert.assertEquals(PTContainer.State.ACTIVE, o1p1.getContainer().getState());
    Assert.assertEquals("state " + o1p1, PTOperator.State.ACTIVE, o1p1.getState());
    Assert.assertEquals("tuples " + o1p1, 1, o1p1.stats.totalTuplesEmitted.get());
    Assert.assertEquals("tuples " + o1p1, 0, o1p1.stats.totalTuplesProcessed.get());
    Assert.assertEquals("window " + o1p1, 3, o1p1.stats.currentWindowId.get());
    Assert.assertEquals("port stats", 1, o1p1.stats.outputPortStatusList.size());
    PortStatus o1p1ps = o1p1.stats.outputPortStatusList.get(TestGeneratorInputOperator.OUTPUT_PORT);
    Assert.assertNotNull("port stats", o1p1ps);
    Assert.assertEquals("port stats", 1, o1p1ps.totalTuples);
    // second operator heartbeat
    stats = new OperatorStats();
    stats.checkpoint = new Checkpoint(2, 0, 0);
    stats.windowId = 4;
    stats.outputPorts = Lists.newArrayList();
    ps = new PortStats(TestGeneratorInputOperator.OUTPUT_PORT);
    ps.bufferServerBytes = 1;
    ps.tupleCount = 1;
    stats.outputPorts.add(ps);
    ohb.windowStats = Lists.newArrayList(stats);
    cstats.operators.clear();
    cstats.operators.add(ohb);
    scm.processHeartbeat(hb);
    Assert.assertEquals("tuples " + o1p1, 2, o1p1.stats.totalTuplesEmitted.get());
    Assert.assertEquals("window " + o1p1, 4, o1p1.stats.currentWindowId.get());
    Assert.assertEquals("statsQueue " + o1p1, 2, o1p1.stats.listenerStats.size());
    scm.processEvents();
    Assert.assertEquals("statsQueue " + o1p1, 0, o1p1.stats.listenerStats.size());
    Assert.assertEquals("lastStats " + o1p1, 2, o1p1.stats.lastWindowedStats.size());
}
Also used : PhysicalPlan(com.datatorrent.stram.plan.physical.PhysicalPlan) ContainerStats(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats) PTOperator(com.datatorrent.stram.plan.physical.PTOperator) OperatorHeartbeat(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat) ContainerHeartbeatResponse(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeatResponse) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) MockOperatorStats(com.datatorrent.stram.MockContainer.MockOperatorStats) OperatorStats(com.datatorrent.api.Stats.OperatorStats) StatsListener(com.datatorrent.api.StatsListener) Checkpoint(com.datatorrent.stram.api.Checkpoint) ContainerResource(com.datatorrent.stram.StreamingContainerManager.ContainerResource) PortStatus(com.datatorrent.stram.plan.physical.OperatorStatus.PortStatus) MemoryStorageAgent(com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent) PortStats(com.datatorrent.api.Stats.OperatorStats.PortStats) ContainerHeartbeat(com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeat) Test(org.junit.Test) PhysicalPlanTest(com.datatorrent.stram.plan.physical.PhysicalPlanTest)

Example 3 with ContainerResource

use of com.datatorrent.stram.StreamingContainerManager.ContainerResource in project apex-core by apache.

the class StreamingContainerManagerTest method testGenerateDeployInfo.

@Test
public void testGenerateDeployInfo() {
    TestGeneratorInputOperator o1 = dag.addOperator("o1", TestGeneratorInputOperator.class);
    GenericTestOperator o2 = dag.addOperator("o2", GenericTestOperator.class);
    GenericTestOperator o3 = dag.addOperator("o3", GenericTestOperator.class);
    GenericTestOperator o4 = dag.addOperator("o4", GenericTestOperator.class);
    dag.setOutputPortAttribute(o1.outport, PortContext.BUFFER_MEMORY_MB, 256);
    dag.addStream("o1.outport", o1.outport, o2.inport1);
    dag.setOutputPortAttribute(o1.outport, PortContext.SPIN_MILLIS, 99);
    dag.addStream("o2.outport1", o2.outport1, o3.inport1).setLocality(Locality.CONTAINER_LOCAL);
    dag.addStream("o3.outport1", o3.outport1, o4.inport1).setLocality(Locality.THREAD_LOCAL);
    dag.getAttributes().put(LogicalPlan.CONTAINERS_MAX_COUNT, 2);
    dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
    Assert.assertEquals("number operators", 4, dag.getAllOperators().size());
    Assert.assertEquals("number root operators", 1, dag.getRootOperators().size());
    StreamingContainerManager dnm = new StreamingContainerManager(dag);
    Assert.assertEquals("number containers", 2, dnm.getPhysicalPlan().getContainers().size());
    dnm.assignContainer(new ContainerResource(0, "container1Id", "host1", 1024, 0, null), InetSocketAddress.createUnresolved("host1", 9001));
    dnm.assignContainer(new ContainerResource(0, "container2Id", "host2", 1024, 0, null), InetSocketAddress.createUnresolved("host2", 9002));
    StreamingContainerAgent sca1 = dnm.getContainerAgent(dnm.getPhysicalPlan().getContainers().get(0).getExternalId());
    StreamingContainerAgent sca2 = dnm.getContainerAgent(dnm.getPhysicalPlan().getContainers().get(1).getExternalId());
    Assert.assertEquals("", dnm.getPhysicalPlan().getContainers().get(0), sca1.container);
    Assert.assertEquals("", PTContainer.State.ALLOCATED, sca1.container.getState());
    List<OperatorDeployInfo> c1 = sca1.getDeployInfoList(sca1.container.getOperators());
    Assert.assertEquals("number operators assigned to c1", 1, c1.size());
    OperatorDeployInfo o1DI = getNodeDeployInfo(c1, dag.getMeta(o1));
    Assert.assertNotNull(o1 + " assigned to " + sca1.container.getExternalId(), o1DI);
    Assert.assertEquals("type " + o1DI, OperatorDeployInfo.OperatorType.INPUT, o1DI.type);
    Assert.assertEquals("inputs " + o1DI.name, 0, o1DI.inputs.size());
    Assert.assertEquals("outputs " + o1DI.name, 1, o1DI.outputs.size());
    Assert.assertNotNull("contextAttributes " + o1DI.name, o1DI.contextAttributes);
    OutputDeployInfo c1o1outport = o1DI.outputs.get(0);
    Assert.assertNotNull("stream connection for container1", c1o1outport);
    Assert.assertEquals("stream connection for container1", "o1.outport", c1o1outport.declaredStreamId);
    Assert.assertEquals("stream connects to upstream host", sca1.container.host, c1o1outport.bufferServerHost);
    Assert.assertEquals("stream connects to upstream port", sca1.container.bufferServerAddress.getPort(), c1o1outport.bufferServerPort);
    Assert.assertNotNull("contextAttributes " + c1o1outport, c1o1outport.contextAttributes);
    Assert.assertEquals("contextAttributes " + c1o1outport, Integer.valueOf(99), c1o1outport.contextAttributes.get(PortContext.SPIN_MILLIS));
    List<OperatorDeployInfo> c2 = sca2.getDeployInfoList(sca2.container.getOperators());
    Assert.assertEquals("number operators assigned to container", 3, c2.size());
    OperatorDeployInfo o2DI = getNodeDeployInfo(c2, dag.getMeta(o2));
    OperatorDeployInfo o3DI = getNodeDeployInfo(c2, dag.getMeta(o3));
    Assert.assertNotNull(dag.getMeta(o2) + " assigned to " + sca2.container.getExternalId(), o2DI);
    Assert.assertNotNull(dag.getMeta(o3) + " assigned to " + sca2.container.getExternalId(), o3DI);
    Assert.assertTrue("The buffer server memory for container 1", 256 == sca1.getInitContext().getValue(ContainerContext.BUFFER_SERVER_MB));
    Assert.assertTrue("The buffer server memory for container 2", 0 == sca2.getInitContext().getValue(ContainerContext.BUFFER_SERVER_MB));
    // buffer server input o2 from o1
    InputDeployInfo c2o2i1 = getInputDeployInfo(o2DI, "o1.outport");
    Assert.assertNotNull("stream connection for container2", c2o2i1);
    Assert.assertEquals("stream connects to upstream host", sca1.container.host, c2o2i1.bufferServerHost);
    Assert.assertEquals("stream connects to upstream port", sca1.container.bufferServerAddress.getPort(), c2o2i1.bufferServerPort);
    Assert.assertEquals("portName " + c2o2i1, dag.getMeta(o2).getMeta(o2.inport1).getPortName(), c2o2i1.portName);
    Assert.assertNull("partitionKeys " + c2o2i1, c2o2i1.partitionKeys);
    Assert.assertEquals("sourceNodeId " + c2o2i1, o1DI.id, c2o2i1.sourceNodeId);
    Assert.assertEquals("sourcePortName " + c2o2i1, TestGeneratorInputOperator.OUTPUT_PORT, c2o2i1.sourcePortName);
    Assert.assertNotNull("contextAttributes " + c2o2i1, c2o2i1.contextAttributes);
    // inline input o3 from o2
    InputDeployInfo c2o3i1 = getInputDeployInfo(o3DI, "o2.outport1");
    Assert.assertNotNull("input from o2.outport1", c2o3i1);
    Assert.assertEquals("portName " + c2o3i1, GenericTestOperator.IPORT1, c2o3i1.portName);
    Assert.assertNotNull("stream connection for container2", c2o3i1);
    Assert.assertNull("bufferServerHost " + c2o3i1, c2o3i1.bufferServerHost);
    Assert.assertEquals("bufferServerPort " + c2o3i1, 0, c2o3i1.bufferServerPort);
    Assert.assertNull("partitionKeys " + c2o3i1, c2o3i1.partitionKeys);
    Assert.assertEquals("sourceNodeId " + c2o3i1, o2DI.id, c2o3i1.sourceNodeId);
    Assert.assertEquals("sourcePortName " + c2o3i1, GenericTestOperator.OPORT1, c2o3i1.sourcePortName);
    Assert.assertEquals("locality " + c2o3i1, Locality.CONTAINER_LOCAL, c2o3i1.locality);
    // THREAD_LOCAL o4.inport1
    OperatorDeployInfo o4DI = getNodeDeployInfo(c2, dag.getMeta(o4));
    Assert.assertNotNull(dag.getMeta(o4) + " assigned to " + sca2.container.getExternalId(), o4DI);
    InputDeployInfo c2o4i1 = getInputDeployInfo(o4DI, "o3.outport1");
    Assert.assertNotNull("input from o3.outport1", c2o4i1);
    Assert.assertEquals("portName " + c2o4i1, GenericTestOperator.IPORT1, c2o4i1.portName);
    Assert.assertNotNull("stream connection for container2", c2o4i1);
    Assert.assertNull("bufferServerHost " + c2o4i1, c2o4i1.bufferServerHost);
    Assert.assertEquals("bufferServerPort " + c2o4i1, 0, c2o4i1.bufferServerPort);
    Assert.assertNull("partitionKeys " + c2o4i1, c2o4i1.partitionKeys);
    Assert.assertEquals("sourceNodeId " + c2o4i1, o3DI.id, c2o4i1.sourceNodeId);
    Assert.assertEquals("sourcePortName " + c2o4i1, GenericTestOperator.OPORT1, c2o4i1.sourcePortName);
    Assert.assertEquals("locality " + c2o4i1, Locality.THREAD_LOCAL, c2o4i1.locality);
}
Also used : InputDeployInfo(com.datatorrent.stram.api.OperatorDeployInfo.InputDeployInfo) OperatorDeployInfo(com.datatorrent.stram.api.OperatorDeployInfo) ContainerResource(com.datatorrent.stram.StreamingContainerManager.ContainerResource) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) MemoryStorageAgent(com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) OutputDeployInfo(com.datatorrent.stram.api.OperatorDeployInfo.OutputDeployInfo) Test(org.junit.Test) PhysicalPlanTest(com.datatorrent.stram.plan.physical.PhysicalPlanTest)

Aggregations

ContainerResource (com.datatorrent.stram.StreamingContainerManager.ContainerResource)3 TestGeneratorInputOperator (com.datatorrent.stram.engine.TestGeneratorInputOperator)2 PhysicalPlanTest (com.datatorrent.stram.plan.physical.PhysicalPlanTest)2 MemoryStorageAgent (com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent)2 Test (org.junit.Test)2 OperatorStats (com.datatorrent.api.Stats.OperatorStats)1 PortStats (com.datatorrent.api.Stats.OperatorStats.PortStats)1 StatsListener (com.datatorrent.api.StatsListener)1 MockOperatorStats (com.datatorrent.stram.MockContainer.MockOperatorStats)1 ContainerStartRequest (com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest)1 Checkpoint (com.datatorrent.stram.api.Checkpoint)1 OperatorDeployInfo (com.datatorrent.stram.api.OperatorDeployInfo)1 InputDeployInfo (com.datatorrent.stram.api.OperatorDeployInfo.InputDeployInfo)1 OutputDeployInfo (com.datatorrent.stram.api.OperatorDeployInfo.OutputDeployInfo)1 StramEvent (com.datatorrent.stram.api.StramEvent)1 ContainerHeartbeat (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeat)1 ContainerHeartbeatResponse (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeatResponse)1 ContainerStats (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats)1 OperatorHeartbeat (com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat)1 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)1