Search in sources :

Example 1 with SCMException

use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.

the class TestKeyManagerImpl method setUp.

@BeforeClass
public static void setUp() throws Exception {
    conf = new OzoneConfiguration();
    dir = GenericTestUtils.getRandomizedTestDir();
    conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
    conf.set(OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, "true");
    mockScmBlockLocationProtocol = mock(ScmBlockLocationProtocol.class);
    nodeManager = new MockNodeManager(true, 10);
    NodeSchema[] schemas = new NodeSchema[] { ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA };
    NodeSchemaManager schemaManager = NodeSchemaManager.getInstance();
    schemaManager.init(schemas, false);
    NetworkTopology clusterMap = new NetworkTopologyImpl(schemaManager);
    nodeManager.getAllNodes().stream().forEach(node -> {
        node.setNetworkName(node.getUuidString());
        clusterMap.add(node);
    });
    ((MockNodeManager) nodeManager).setNetworkTopology(clusterMap);
    SCMConfigurator configurator = new SCMConfigurator();
    configurator.setScmNodeManager(nodeManager);
    configurator.setNetworkTopology(clusterMap);
    configurator.setSCMHAManager(MockSCMHAManager.getInstance(true));
    configurator.setScmContext(SCMContext.emptyContext());
    scm = HddsTestUtils.getScm(conf, configurator);
    scm.start();
    scm.exitSafeMode();
    scmBlockSize = (long) conf.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT, StorageUnit.BYTES);
    conf.setLong(OZONE_KEY_PREALLOCATION_BLOCKS_MAX, 10);
    mockScmContainerClient = Mockito.mock(StorageContainerLocationProtocol.class);
    OmTestManagers omTestManagers = new OmTestManagers(conf, scm.getBlockProtocolServer(), mockScmContainerClient);
    om = omTestManagers.getOzoneManager();
    metadataManager = omTestManagers.getMetadataManager();
    keyManager = (KeyManagerImpl) omTestManagers.getKeyManager();
    prefixManager = omTestManagers.getPrefixManager();
    writeClient = omTestManagers.getWriteClient();
    mockContainerClient();
    Mockito.when(mockScmBlockLocationProtocol.allocateBlock(Mockito.anyLong(), Mockito.anyInt(), any(ReplicationConfig.class), Mockito.anyString(), any(ExcludeList.class))).thenThrow(new SCMException("SafeModePrecheck failed for allocateBlock", ResultCodes.SAFE_MODE_EXCEPTION));
    createVolume(VOLUME_NAME);
    createBucket(VOLUME_NAME, BUCKET_NAME, false);
    createBucket(VOLUME_NAME, VERSIONED_BUCKET_NAME, true);
}
Also used : ExcludeList(org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList) ReplicationConfig(org.apache.hadoop.hdds.client.ReplicationConfig) RatisReplicationConfig(org.apache.hadoop.hdds.client.RatisReplicationConfig) StandaloneReplicationConfig(org.apache.hadoop.hdds.client.StandaloneReplicationConfig) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) MockNodeManager(org.apache.hadoop.hdds.scm.container.MockNodeManager) ScmBlockLocationProtocol(org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol) NetworkTopologyImpl(org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl) NodeSchemaManager(org.apache.hadoop.hdds.scm.net.NodeSchemaManager) StorageContainerLocationProtocol(org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol) NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology) SCMConfigurator(org.apache.hadoop.hdds.scm.server.SCMConfigurator) NodeSchema(org.apache.hadoop.hdds.scm.net.NodeSchema) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException) BeforeClass(org.junit.BeforeClass)

Example 2 with SCMException

use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.

the class PipelinePlacementPolicy method getResultSet.

/**
 * Get result set based on the pipeline placement algorithm which considers
 * network topology and rack awareness.
 * @param nodesRequired - Nodes Required
 * @param healthyNodes - List of Nodes in the result set.
 * @return a list of datanodes
 * @throws SCMException SCMException
 */
@Override
public List<DatanodeDetails> getResultSet(int nodesRequired, List<DatanodeDetails> healthyNodes) throws SCMException {
    if (nodesRequired != HddsProtos.ReplicationFactor.THREE.getNumber()) {
        throw new SCMException("Nodes required number is not supported: " + nodesRequired, SCMException.ResultCodes.INVALID_CAPACITY);
    }
    // Assume rack awareness is not enabled.
    boolean rackAwareness = false;
    List<DatanodeDetails> results = new ArrayList<>(nodesRequired);
    // Since nodes are widely distributed, the results should be selected
    // base on distance in topology, rack awareness and load balancing.
    List<DatanodeDetails> exclude = new ArrayList<>();
    // First choose an anchor node.
    DatanodeDetails anchor = chooseFirstNode(healthyNodes);
    if (anchor != null) {
        results.add(anchor);
        removePeers(anchor, healthyNodes);
        exclude.add(anchor);
    } else {
        LOG.warn("Unable to find healthy node for anchor(first) node.");
        throw new SCMException("Unable to find anchor node.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("First node chosen: {}", anchor);
    }
    // Choose the second node on different racks from anchor.
    DatanodeDetails nextNode = chooseNodeBasedOnRackAwareness(healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor);
    if (nextNode != null) {
        // Rack awareness is detected.
        rackAwareness = true;
        results.add(nextNode);
        removePeers(nextNode, healthyNodes);
        exclude.add(nextNode);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Second node chosen: {}", nextNode);
        }
    } else {
        LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + "rack based on rack awareness. anchor: {}", anchor);
    }
    // Then choose nodes close to anchor based on network topology
    int nodesToFind = nodesRequired - results.size();
    for (int x = 0; x < nodesToFind; x++) {
        // Pick remaining nodes based on the existence of rack awareness.
        DatanodeDetails pick = null;
        if (rackAwareness) {
            pick = chooseNodeBasedOnSameRack(healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor);
        }
        // fall back protection
        if (pick == null) {
            pick = fallBackPickNodes(healthyNodes, exclude);
            if (rackAwareness) {
                LOG.debug("Failed to choose node based on topology. Fallback " + "picks node as: {}", pick);
            }
        }
        if (pick != null) {
            results.add(pick);
            removePeers(pick, healthyNodes);
            exclude.add(pick);
            LOG.debug("Remaining node chosen: {}", pick);
        } else {
            String msg = String.format("Unable to find suitable node in " + "pipeline allocation. healthyNodes size: %d, " + "excludeNodes size: %d", healthyNodes.size(), exclude.size());
            LOG.warn(msg);
            throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
        }
    }
    if (results.size() < nodesRequired) {
        LOG.warn("Unable to find the required number of " + "healthy nodes that  meet the criteria. Required nodes: {}, " + "Found nodes: {}", nodesRequired, results.size());
        throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
    }
    return results;
}
Also used : DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ArrayList(java.util.ArrayList) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException)

Example 3 with SCMException

use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.

the class TestRatisPipelineCreateAndDestroy method testPipelineCreationOnNodeRestart.

@Test(timeout = 180000)
public void testPipelineCreationOnNodeRestart() throws Exception {
    conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 5, TimeUnit.SECONDS);
    init(3);
    // make sure a pipelines is created
    waitForPipelines(1);
    List<HddsDatanodeService> dns = new ArrayList<>(cluster.getHddsDatanodes());
    List<Pipeline> pipelines = pipelineManager.getPipelines(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
    for (HddsDatanodeService dn : dns) {
        cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
    }
    // try creating another pipeline now
    try {
        pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
        Assert.fail("pipeline creation should fail after shutting down pipeline");
    } catch (IOException ioe) {
        // As now all datanodes are shutdown, they move to stale state, there
        // will be no sufficient datanodes to create the pipeline.
        Assert.assertTrue(ioe instanceof SCMException);
        Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, ((SCMException) ioe).getResult());
    }
    // make sure pipelines is destroyed
    waitForPipelines(0);
    for (HddsDatanodeService dn : dns) {
        cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
    }
    // destroy the existing pipelines
    for (Pipeline pipeline : pipelines) {
        pipelineManager.closePipeline(pipeline, false);
    }
    if (cluster.getStorageContainerManager().getScmNodeManager().getNodeCount(NodeStatus.inServiceHealthy()) >= HddsProtos.ReplicationFactor.THREE.getNumber()) {
        // make sure pipelines is created after node start
        cluster.getStorageContainerManager().getSCMServiceManager().notifyEventTriggered(Event.PRE_CHECK_COMPLETED);
        waitForPipelines(1);
    }
}
Also used : ArrayList(java.util.ArrayList) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException) Test(org.junit.Test)

Example 4 with SCMException

use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.

the class SCMCommonPlacementPolicy method getResultSet.

/**
 * This function invokes the derived classes chooseNode Function to build a
 * list of nodes. Then it verifies that invoked policy was able to return
 * expected number of nodes.
 *
 * @param nodesRequired - Nodes Required
 * @param healthyNodes  - List of Nodes in the result set.
 * @return List of Datanodes that can be used for placement.
 * @throws SCMException SCMException
 */
public List<DatanodeDetails> getResultSet(int nodesRequired, List<DatanodeDetails> healthyNodes) throws SCMException {
    List<DatanodeDetails> results = new ArrayList<>();
    for (int x = 0; x < nodesRequired; x++) {
        // invoke the choose function defined in the derived classes.
        DatanodeDetails nodeId = chooseNode(healthyNodes);
        if (nodeId != null) {
            removePeers(nodeId, healthyNodes);
            results.add(nodeId);
        }
    }
    if (results.size() < nodesRequired) {
        LOG.error("Unable to find the required number of healthy nodes that " + "meet the criteria. Required nodes: {}, Found nodes: {}", nodesRequired, results.size());
        throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
    }
    return results;
}
Also used : DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ArrayList(java.util.ArrayList) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException)

Example 5 with SCMException

use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.

the class BlockManagerImpl method allocateBlock.

/**
 * Allocates a block in a container and returns that info.
 *
 * @param size - Block Size
 * @param replicationConfig - Replication config
 * @param owner - Owner (service) of the container.
 * @param excludeList List of datanodes/containers to exclude during block
 *                    allocation.
 * @return Allocated block
 * @throws IOException on failure.
 */
@Override
public AllocatedBlock allocateBlock(final long size, ReplicationConfig replicationConfig, String owner, ExcludeList excludeList) throws IOException {
    if (LOG.isTraceEnabled()) {
        LOG.trace("Size : {} , replicationConfig: {}", size, replicationConfig);
    }
    if (scm.getScmContext().isInSafeMode()) {
        throw new SCMException("SafeModePrecheck failed for allocateBlock", SCMException.ResultCodes.SAFE_MODE_EXCEPTION);
    }
    if (size < 0 || size > containerSize) {
        LOG.warn("Invalid block size requested : {}", size);
        throw new SCMException("Unsupported block size: " + size, INVALID_BLOCK_SIZE);
    }
    ContainerInfo containerInfo = writableContainerFactory.getContainer(size, replicationConfig, owner, excludeList);
    if (containerInfo != null) {
        return newBlock(containerInfo);
    }
    // we have tried all strategies we know and but somehow we are not able
    // to get a container for this block. Log that info and return a null.
    LOG.error("Unable to allocate a block for the size: {}, replicationConfig: {}", size, replicationConfig);
    return null;
}
Also used : ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) SCMException(org.apache.hadoop.hdds.scm.exceptions.SCMException)

Aggregations

SCMException (org.apache.hadoop.hdds.scm.exceptions.SCMException)36 ArrayList (java.util.ArrayList)9 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)9 Test (org.junit.Test)9 IOException (java.io.IOException)6 ContainerInfo (org.apache.hadoop.hdds.scm.container.ContainerInfo)6 MockDatanodeDetails (org.apache.hadoop.hdds.protocol.MockDatanodeDetails)4 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)4 MockNodeManager (org.apache.hadoop.hdds.scm.container.MockNodeManager)4 UUID (java.util.UUID)3 BlockID (org.apache.hadoop.hdds.client.BlockID)3 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)3 NodeManager (org.apache.hadoop.hdds.scm.node.NodeManager)3 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)3 RatisReplicationConfig (org.apache.hadoop.hdds.client.RatisReplicationConfig)2 ReplicationConfig (org.apache.hadoop.hdds.client.ReplicationConfig)2 ReplicationFactor (org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor)2 ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)2 AllocatedBlock (org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock)2 ContainerWithPipeline (org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline)2