use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.
the class TestKeyManagerImpl method setUp.
@BeforeClass
public static void setUp() throws Exception {
conf = new OzoneConfiguration();
dir = GenericTestUtils.getRandomizedTestDir();
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
conf.set(OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY, "true");
mockScmBlockLocationProtocol = mock(ScmBlockLocationProtocol.class);
nodeManager = new MockNodeManager(true, 10);
NodeSchema[] schemas = new NodeSchema[] { ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA };
NodeSchemaManager schemaManager = NodeSchemaManager.getInstance();
schemaManager.init(schemas, false);
NetworkTopology clusterMap = new NetworkTopologyImpl(schemaManager);
nodeManager.getAllNodes().stream().forEach(node -> {
node.setNetworkName(node.getUuidString());
clusterMap.add(node);
});
((MockNodeManager) nodeManager).setNetworkTopology(clusterMap);
SCMConfigurator configurator = new SCMConfigurator();
configurator.setScmNodeManager(nodeManager);
configurator.setNetworkTopology(clusterMap);
configurator.setSCMHAManager(MockSCMHAManager.getInstance(true));
configurator.setScmContext(SCMContext.emptyContext());
scm = HddsTestUtils.getScm(conf, configurator);
scm.start();
scm.exitSafeMode();
scmBlockSize = (long) conf.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT, StorageUnit.BYTES);
conf.setLong(OZONE_KEY_PREALLOCATION_BLOCKS_MAX, 10);
mockScmContainerClient = Mockito.mock(StorageContainerLocationProtocol.class);
OmTestManagers omTestManagers = new OmTestManagers(conf, scm.getBlockProtocolServer(), mockScmContainerClient);
om = omTestManagers.getOzoneManager();
metadataManager = omTestManagers.getMetadataManager();
keyManager = (KeyManagerImpl) omTestManagers.getKeyManager();
prefixManager = omTestManagers.getPrefixManager();
writeClient = omTestManagers.getWriteClient();
mockContainerClient();
Mockito.when(mockScmBlockLocationProtocol.allocateBlock(Mockito.anyLong(), Mockito.anyInt(), any(ReplicationConfig.class), Mockito.anyString(), any(ExcludeList.class))).thenThrow(new SCMException("SafeModePrecheck failed for allocateBlock", ResultCodes.SAFE_MODE_EXCEPTION));
createVolume(VOLUME_NAME);
createBucket(VOLUME_NAME, BUCKET_NAME, false);
createBucket(VOLUME_NAME, VERSIONED_BUCKET_NAME, true);
}
use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.
the class PipelinePlacementPolicy method getResultSet.
/**
* Get result set based on the pipeline placement algorithm which considers
* network topology and rack awareness.
* @param nodesRequired - Nodes Required
* @param healthyNodes - List of Nodes in the result set.
* @return a list of datanodes
* @throws SCMException SCMException
*/
@Override
public List<DatanodeDetails> getResultSet(int nodesRequired, List<DatanodeDetails> healthyNodes) throws SCMException {
if (nodesRequired != HddsProtos.ReplicationFactor.THREE.getNumber()) {
throw new SCMException("Nodes required number is not supported: " + nodesRequired, SCMException.ResultCodes.INVALID_CAPACITY);
}
// Assume rack awareness is not enabled.
boolean rackAwareness = false;
List<DatanodeDetails> results = new ArrayList<>(nodesRequired);
// Since nodes are widely distributed, the results should be selected
// base on distance in topology, rack awareness and load balancing.
List<DatanodeDetails> exclude = new ArrayList<>();
// First choose an anchor node.
DatanodeDetails anchor = chooseFirstNode(healthyNodes);
if (anchor != null) {
results.add(anchor);
removePeers(anchor, healthyNodes);
exclude.add(anchor);
} else {
LOG.warn("Unable to find healthy node for anchor(first) node.");
throw new SCMException("Unable to find anchor node.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
}
if (LOG.isDebugEnabled()) {
LOG.debug("First node chosen: {}", anchor);
}
// Choose the second node on different racks from anchor.
DatanodeDetails nextNode = chooseNodeBasedOnRackAwareness(healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor);
if (nextNode != null) {
// Rack awareness is detected.
rackAwareness = true;
results.add(nextNode);
removePeers(nextNode, healthyNodes);
exclude.add(nextNode);
if (LOG.isDebugEnabled()) {
LOG.debug("Second node chosen: {}", nextNode);
}
} else {
LOG.debug("Pipeline Placement: Unable to find 2nd node on different " + "rack based on rack awareness. anchor: {}", anchor);
}
// Then choose nodes close to anchor based on network topology
int nodesToFind = nodesRequired - results.size();
for (int x = 0; x < nodesToFind; x++) {
// Pick remaining nodes based on the existence of rack awareness.
DatanodeDetails pick = null;
if (rackAwareness) {
pick = chooseNodeBasedOnSameRack(healthyNodes, exclude, nodeManager.getClusterNetworkTopologyMap(), anchor);
}
// fall back protection
if (pick == null) {
pick = fallBackPickNodes(healthyNodes, exclude);
if (rackAwareness) {
LOG.debug("Failed to choose node based on topology. Fallback " + "picks node as: {}", pick);
}
}
if (pick != null) {
results.add(pick);
removePeers(pick, healthyNodes);
exclude.add(pick);
LOG.debug("Remaining node chosen: {}", pick);
} else {
String msg = String.format("Unable to find suitable node in " + "pipeline allocation. healthyNodes size: %d, " + "excludeNodes size: %d", healthyNodes.size(), exclude.size());
LOG.warn(msg);
throw new SCMException(msg, SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
}
}
if (results.size() < nodesRequired) {
LOG.warn("Unable to find the required number of " + "healthy nodes that meet the criteria. Required nodes: {}, " + "Found nodes: {}", nodesRequired, results.size());
throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
}
return results;
}
use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.
the class TestRatisPipelineCreateAndDestroy method testPipelineCreationOnNodeRestart.
@Test(timeout = 180000)
public void testPipelineCreationOnNodeRestart() throws Exception {
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 5, TimeUnit.SECONDS);
init(3);
// make sure a pipelines is created
waitForPipelines(1);
List<HddsDatanodeService> dns = new ArrayList<>(cluster.getHddsDatanodes());
List<Pipeline> pipelines = pipelineManager.getPipelines(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
for (HddsDatanodeService dn : dns) {
cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
}
// try creating another pipeline now
try {
pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
Assert.fail("pipeline creation should fail after shutting down pipeline");
} catch (IOException ioe) {
// As now all datanodes are shutdown, they move to stale state, there
// will be no sufficient datanodes to create the pipeline.
Assert.assertTrue(ioe instanceof SCMException);
Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, ((SCMException) ioe).getResult());
}
// make sure pipelines is destroyed
waitForPipelines(0);
for (HddsDatanodeService dn : dns) {
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
}
// destroy the existing pipelines
for (Pipeline pipeline : pipelines) {
pipelineManager.closePipeline(pipeline, false);
}
if (cluster.getStorageContainerManager().getScmNodeManager().getNodeCount(NodeStatus.inServiceHealthy()) >= HddsProtos.ReplicationFactor.THREE.getNumber()) {
// make sure pipelines is created after node start
cluster.getStorageContainerManager().getSCMServiceManager().notifyEventTriggered(Event.PRE_CHECK_COMPLETED);
waitForPipelines(1);
}
}
use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.
the class SCMCommonPlacementPolicy method getResultSet.
/**
* This function invokes the derived classes chooseNode Function to build a
* list of nodes. Then it verifies that invoked policy was able to return
* expected number of nodes.
*
* @param nodesRequired - Nodes Required
* @param healthyNodes - List of Nodes in the result set.
* @return List of Datanodes that can be used for placement.
* @throws SCMException SCMException
*/
public List<DatanodeDetails> getResultSet(int nodesRequired, List<DatanodeDetails> healthyNodes) throws SCMException {
List<DatanodeDetails> results = new ArrayList<>();
for (int x = 0; x < nodesRequired; x++) {
// invoke the choose function defined in the derived classes.
DatanodeDetails nodeId = chooseNode(healthyNodes);
if (nodeId != null) {
removePeers(nodeId, healthyNodes);
results.add(nodeId);
}
}
if (results.size() < nodesRequired) {
LOG.error("Unable to find the required number of healthy nodes that " + "meet the criteria. Required nodes: {}, Found nodes: {}", nodesRequired, results.size());
throw new SCMException("Unable to find required number of nodes.", SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
}
return results;
}
use of org.apache.hadoop.hdds.scm.exceptions.SCMException in project ozone by apache.
the class BlockManagerImpl method allocateBlock.
/**
* Allocates a block in a container and returns that info.
*
* @param size - Block Size
* @param replicationConfig - Replication config
* @param owner - Owner (service) of the container.
* @param excludeList List of datanodes/containers to exclude during block
* allocation.
* @return Allocated block
* @throws IOException on failure.
*/
@Override
public AllocatedBlock allocateBlock(final long size, ReplicationConfig replicationConfig, String owner, ExcludeList excludeList) throws IOException {
if (LOG.isTraceEnabled()) {
LOG.trace("Size : {} , replicationConfig: {}", size, replicationConfig);
}
if (scm.getScmContext().isInSafeMode()) {
throw new SCMException("SafeModePrecheck failed for allocateBlock", SCMException.ResultCodes.SAFE_MODE_EXCEPTION);
}
if (size < 0 || size > containerSize) {
LOG.warn("Invalid block size requested : {}", size);
throw new SCMException("Unsupported block size: " + size, INVALID_BLOCK_SIZE);
}
ContainerInfo containerInfo = writableContainerFactory.getContainer(size, replicationConfig, owner, excludeList);
if (containerInfo != null) {
return newBlock(containerInfo);
}
// we have tried all strategies we know and but somehow we are not able
// to get a container for this block. Log that info and return a null.
LOG.error("Unable to allocate a block for the size: {}, replicationConfig: {}", size, replicationConfig);
return null;
}
Aggregations