Search in sources :

Example 1 with StorageContainerException

use of org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException in project ozone by apache.

the class TestContainerSmallFile method testReadWriteWithBCSId.

@Test
public void testReadWriteWithBCSId() throws Exception {
    ContainerWithPipeline container = storageContainerLocationClient.allocateContainer(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
    XceiverClientSpi client = xceiverClientManager.acquireClient(container.getPipeline());
    ContainerProtocolCalls.createContainer(client, container.getContainerInfo().getContainerID(), null);
    BlockID blockID1 = ContainerTestHelper.getTestBlockID(container.getContainerInfo().getContainerID());
    ContainerProtos.PutSmallFileResponseProto responseProto = ContainerProtocolCalls.writeSmallFile(client, blockID1, "data123".getBytes(UTF_8), null);
    long bcsId = responseProto.getCommittedBlockLength().getBlockID().getBlockCommitSequenceId();
    try {
        blockID1.setBlockCommitSequenceId(bcsId + 1);
        // read a file with higher bcsId than the container bcsId
        ContainerProtocolCalls.readSmallFile(client, blockID1, null);
        Assert.fail("Expected exception not thrown");
    } catch (StorageContainerException sce) {
        Assert.assertTrue(sce.getResult() == ContainerProtos.Result.UNKNOWN_BCSID);
    }
    // write a new block again to bump up the container bcsId
    BlockID blockID2 = ContainerTestHelper.getTestBlockID(container.getContainerInfo().getContainerID());
    ContainerProtocolCalls.writeSmallFile(client, blockID2, "data123".getBytes(UTF_8), null);
    try {
        blockID1.setBlockCommitSequenceId(bcsId + 1);
        // read a file with higher bcsId than the committed bcsId for the block
        ContainerProtocolCalls.readSmallFile(client, blockID1, null);
        Assert.fail("Expected exception not thrown");
    } catch (StorageContainerException sce) {
        Assert.assertTrue(sce.getResult() == ContainerProtos.Result.BCSID_MISMATCH);
    }
    blockID1.setBlockCommitSequenceId(bcsId);
    ContainerProtos.GetSmallFileResponseProto response = ContainerProtocolCalls.readSmallFile(client, blockID1, null);
    String readData = response.getData().getDataBuffers().getBuffersList().get(0).toStringUtf8();
    Assert.assertEquals("data123", readData);
    xceiverClientManager.releaseClient(client, false);
}
Also used : ContainerProtos(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos) BlockID(org.apache.hadoop.hdds.client.BlockID) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) Test(org.junit.Test)

Example 2 with StorageContainerException

use of org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException in project ozone by apache.

the class TestGetCommittedBlockLengthAndPutKey method testGetCommittedBlockLengthForInvalidBlock.

@Test
public void testGetCommittedBlockLengthForInvalidBlock() throws Exception {
    ContainerWithPipeline container = storageContainerLocationClient.allocateContainer(SCMTestUtils.getReplicationType(ozoneConfig), HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
    long containerID = container.getContainerInfo().getContainerID();
    XceiverClientSpi client = xceiverClientManager.acquireClient(container.getPipeline());
    ContainerProtocolCalls.createContainer(client, containerID, null);
    BlockID blockID = ContainerTestHelper.getTestBlockID(containerID);
    // move the container to closed state
    ContainerProtocolCalls.closeContainer(client, containerID, null);
    try {
        // There is no block written inside the container. The request should
        // fail.
        ContainerProtocolCalls.getCommittedBlockLength(client, blockID, null);
        Assert.fail("Expected exception not thrown");
    } catch (StorageContainerException sce) {
        Assert.assertTrue(sce.getMessage().contains("Unable to find the block"));
    }
    xceiverClientManager.releaseClient(client, false);
}
Also used : BlockID(org.apache.hadoop.hdds.client.BlockID) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) Test(org.junit.Test)

Example 3 with StorageContainerException

use of org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException in project ozone by apache.

the class HddsDispatcher method dispatchRequest.

@SuppressWarnings("methodlength")
private ContainerCommandResponseProto dispatchRequest(ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) {
    Preconditions.checkNotNull(msg);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Command {}, trace ID: {} ", msg.getCmdType(), msg.getTraceID());
    }
    AuditAction action = ContainerCommandRequestPBHelper.getAuditAction(msg.getCmdType());
    EventType eventType = getEventType(msg);
    Map<String, String> params = ContainerCommandRequestPBHelper.getAuditParams(msg);
    ContainerType containerType;
    ContainerCommandResponseProto responseProto = null;
    long startTime = System.currentTimeMillis();
    Type cmdType = msg.getCmdType();
    long containerID = msg.getContainerID();
    metrics.incContainerOpsMetrics(cmdType);
    Container container = getContainer(containerID);
    boolean isWriteStage = (cmdType == Type.WriteChunk && dispatcherContext != null && dispatcherContext.getStage() == DispatcherContext.WriteChunkStage.WRITE_DATA);
    boolean isWriteCommitStage = (cmdType == Type.WriteChunk && dispatcherContext != null && dispatcherContext.getStage() == DispatcherContext.WriteChunkStage.COMMIT_DATA);
    try {
        validateToken(msg);
    } catch (IOException ioe) {
        StorageContainerException sce = new StorageContainerException("Block token verification failed. " + ioe.getMessage(), ioe, ContainerProtos.Result.BLOCK_TOKEN_VERIFICATION_FAILED);
        return ContainerUtils.logAndReturnError(LOG, sce, msg);
    }
    // if the command gets executed other than Ratis, the default write stage
    // is WriteChunkStage.COMBINED
    boolean isCombinedStage = cmdType == Type.WriteChunk && (dispatcherContext == null || dispatcherContext.getStage() == DispatcherContext.WriteChunkStage.COMBINED);
    Map<Long, Long> container2BCSIDMap = null;
    if (dispatcherContext != null) {
        container2BCSIDMap = dispatcherContext.getContainer2BCSIDMap();
    }
    if (isWriteCommitStage) {
        // check if the container Id exist in the loaded snapshot file. if
        // it does not , it infers that , this is a restart of dn where
        // the we are reapplying the transaction which was not captured in the
        // snapshot.
        // just add it to the list, and remove it from missing container set
        // as it might have been added in the list during "init".
        Preconditions.checkNotNull(container2BCSIDMap);
        if (container != null && container2BCSIDMap.get(containerID) == null) {
            container2BCSIDMap.put(containerID, container.getBlockCommitSequenceId());
            getMissingContainerSet().remove(containerID);
        }
    }
    if (getMissingContainerSet().contains(containerID)) {
        StorageContainerException sce = new StorageContainerException("ContainerID " + containerID + " has been lost and and cannot be recreated on this DataNode", ContainerProtos.Result.CONTAINER_MISSING);
        audit(action, eventType, params, AuditEventStatus.FAILURE, sce);
        return ContainerUtils.logAndReturnError(LOG, sce, msg);
    }
    if (cmdType != Type.CreateContainer) {
        /**
         * Create Container should happen only as part of Write_Data phase of
         * writeChunk.
         */
        if (container == null && ((isWriteStage || isCombinedStage) || cmdType == Type.PutSmallFile)) {
            // If container does not exist, create one for WriteChunk and
            // PutSmallFile request
            responseProto = createContainer(msg);
            if (responseProto.getResult() != Result.SUCCESS) {
                StorageContainerException sce = new StorageContainerException("ContainerID " + containerID + " creation failed", responseProto.getResult());
                audit(action, eventType, params, AuditEventStatus.FAILURE, sce);
                return ContainerUtils.logAndReturnError(LOG, sce, msg);
            }
            Preconditions.checkArgument(isWriteStage && container2BCSIDMap != null || dispatcherContext == null);
            if (container2BCSIDMap != null) {
                // adds this container to list of containers created in the pipeline
                // with initial BCSID recorded as 0.
                container2BCSIDMap.putIfAbsent(containerID, 0L);
            }
            container = getContainer(containerID);
        }
        // if container not found return error
        if (container == null) {
            StorageContainerException sce = new StorageContainerException("ContainerID " + containerID + " does not exist", ContainerProtos.Result.CONTAINER_NOT_FOUND);
            audit(action, eventType, params, AuditEventStatus.FAILURE, sce);
            return ContainerUtils.logAndReturnError(LOG, sce, msg);
        }
        containerType = getContainerType(container);
    } else {
        if (!msg.hasCreateContainer()) {
            audit(action, eventType, params, AuditEventStatus.FAILURE, new Exception("MALFORMED_REQUEST"));
            return malformedRequest(msg);
        }
        containerType = msg.getCreateContainer().getContainerType();
    }
    // write before trying to send CloseContainerAction.
    if (!HddsUtils.isReadOnly(msg)) {
        sendCloseContainerActionIfNeeded(container);
    }
    Handler handler = getHandler(containerType);
    if (handler == null) {
        StorageContainerException ex = new StorageContainerException("Invalid " + "ContainerType " + containerType, ContainerProtos.Result.CONTAINER_INTERNAL_ERROR);
        // log failure
        audit(action, eventType, params, AuditEventStatus.FAILURE, ex);
        return ContainerUtils.logAndReturnError(LOG, ex, msg);
    }
    responseProto = handler.handle(msg, container, dispatcherContext);
    if (responseProto != null) {
        metrics.incContainerOpsLatencies(cmdType, System.currentTimeMillis() - startTime);
        // If the request is of Write Type and the container operation
        // is unsuccessful, it implies the applyTransaction on the container
        // failed. All subsequent transactions on the container should fail and
        // hence replica will be marked unhealthy here. In this case, a close
        // container action will be sent to SCM to close the container.
        // ApplyTransaction called on closed Container will fail with Closed
        // container exception. In such cases, ignore the exception here
        // If the container is already marked unhealthy, no need to change the
        // state here.
        Result result = responseProto.getResult();
        if (cmdType == Type.CreateContainer && result == Result.SUCCESS && dispatcherContext != null) {
            Preconditions.checkNotNull(dispatcherContext.getContainer2BCSIDMap());
            container2BCSIDMap.putIfAbsent(containerID, Long.valueOf(0));
        }
        if (!HddsUtils.isReadOnly(msg) && !canIgnoreException(result)) {
            if (container == null) {
                throw new NullPointerException("Error on creating containers " + result + " " + responseProto.getMessage());
            }
            // For container to be moved to unhealthy state here, the container can
            // only be in open or closing state.
            State containerState = container.getContainerData().getState();
            Preconditions.checkState(containerState == State.OPEN || containerState == State.CLOSING);
            // mark and persist the container state to be unhealthy
            try {
                handler.markContainerUnhealthy(container);
                LOG.info("Marked Container UNHEALTHY, ContainerID: {}", containerID);
            } catch (IOException ioe) {
                // just log the error here in case marking the container fails,
                // Return the actual failure response to the client
                LOG.error("Failed to mark container " + containerID + " UNHEALTHY. ", ioe);
            }
            // in any case, the in memory state of the container should be unhealthy
            Preconditions.checkArgument(container.getContainerData().getState() == State.UNHEALTHY);
            sendCloseContainerActionIfNeeded(container);
        }
        if (result == Result.SUCCESS) {
            updateBCSID(container, dispatcherContext, cmdType);
            audit(action, eventType, params, AuditEventStatus.SUCCESS, null);
        } else {
            audit(action, eventType, params, AuditEventStatus.FAILURE, new Exception(responseProto.getMessage()));
        }
        return responseProto;
    } else {
        // log failure
        audit(action, eventType, params, AuditEventStatus.FAILURE, new Exception("UNSUPPORTED_REQUEST"));
        return unsupportedRequest(msg);
    }
}
Also used : ContainerType(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType) Handler(org.apache.hadoop.ozone.container.common.interfaces.Handler) IOException(java.io.IOException) InvalidContainerStateException(org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException) ServiceException(com.google.protobuf.ServiceException) ContainerNotOpenException(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException) IOException(java.io.IOException) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto) Result(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result) AuditAction(org.apache.hadoop.ozone.audit.AuditAction) AuditLoggerType(org.apache.hadoop.ozone.audit.AuditLoggerType) Type(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type) ContainerType(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) State(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException)

Example 4 with StorageContainerException

use of org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException in project ozone by apache.

the class ContainerStateMachine method handleWriteChunk.

private CompletableFuture<Message> handleWriteChunk(ContainerCommandRequestProto requestProto, long entryIndex, long term, long startTime) {
    final WriteChunkRequestProto write = requestProto.getWriteChunk();
    RaftServer server = ratisServer.getServer();
    Preconditions.checkArgument(!write.getData().isEmpty());
    try {
        if (server.getDivision(gid).getInfo().isLeader()) {
            stateMachineDataCache.put(entryIndex, write.getData());
        }
    } catch (InterruptedException ioe) {
        Thread.currentThread().interrupt();
        return completeExceptionally(ioe);
    } catch (IOException ioe) {
        return completeExceptionally(ioe);
    }
    DispatcherContext context = new DispatcherContext.Builder().setTerm(term).setLogIndex(entryIndex).setStage(DispatcherContext.WriteChunkStage.WRITE_DATA).setContainer2BCSIDMap(container2BCSIDMap).build();
    CompletableFuture<Message> raftFuture = new CompletableFuture<>();
    // ensure the write chunk happens asynchronously in writeChunkExecutor pool
    // thread.
    CompletableFuture<ContainerCommandResponseProto> writeChunkFuture = CompletableFuture.supplyAsync(() -> {
        try {
            return runCommand(requestProto, context);
        } catch (Exception e) {
            LOG.error("{}: writeChunk writeStateMachineData failed: blockId" + "{} logIndex {} chunkName {}", gid, write.getBlockID(), entryIndex, write.getChunkData().getChunkName(), e);
            metrics.incNumWriteDataFails();
            // write chunks go in parallel. It's possible that one write chunk
            // see the stateMachine is marked unhealthy by other parallel thread
            stateMachineHealthy.set(false);
            raftFuture.completeExceptionally(e);
            throw e;
        }
    }, getChunkExecutor(requestProto.getWriteChunk()));
    writeChunkFutureMap.put(entryIndex, writeChunkFuture);
    if (LOG.isDebugEnabled()) {
        LOG.debug("{}: writeChunk writeStateMachineData : blockId" + "{} logIndex {} chunkName {}", gid, write.getBlockID(), entryIndex, write.getChunkData().getChunkName());
    }
    // Remove the future once it finishes execution from the
    // writeChunkFutureMap.
    writeChunkFuture.thenApply(r -> {
        if (r.getResult() != ContainerProtos.Result.SUCCESS && r.getResult() != ContainerProtos.Result.CONTAINER_NOT_OPEN && r.getResult() != ContainerProtos.Result.CLOSED_CONTAINER_IO) {
            StorageContainerException sce = new StorageContainerException(r.getMessage(), r.getResult());
            LOG.error(gid + ": writeChunk writeStateMachineData failed: blockId" + write.getBlockID() + " logIndex " + entryIndex + " chunkName " + write.getChunkData().getChunkName() + " Error message: " + r.getMessage() + " Container Result: " + r.getResult());
            metrics.incNumWriteDataFails();
            // If the write fails currently we mark the stateMachine as unhealthy.
            // This leads to pipeline close. Any change in that behavior requires
            // handling the entry for the write chunk in cache.
            stateMachineHealthy.set(false);
            raftFuture.completeExceptionally(sce);
        } else {
            metrics.incNumBytesWrittenCount(requestProto.getWriteChunk().getChunkData().getLen());
            if (LOG.isDebugEnabled()) {
                LOG.debug(gid + ": writeChunk writeStateMachineData  completed: blockId" + write.getBlockID() + " logIndex " + entryIndex + " chunkName " + write.getChunkData().getChunkName());
            }
            raftFuture.complete(r::toByteString);
            metrics.recordWriteStateMachineCompletion(Time.monotonicNowNanos() - startTime);
        }
        writeChunkFutureMap.remove(entryIndex);
        return r;
    });
    return raftFuture;
}
Also used : WriteChunkRequestProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.WriteChunkRequestProto) Message(org.apache.ratis.protocol.Message) ContainerCommandRequestMessage(org.apache.hadoop.hdds.ratis.ContainerCommandRequestMessage) RaftServer(org.apache.ratis.server.RaftServer) IOException(java.io.IOException) InvalidProtocolBufferException(org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException) StateMachineException(org.apache.ratis.protocol.exceptions.StateMachineException) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException) ContainerNotOpenException(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException) IOException(java.io.IOException) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto) CompletableFuture(java.util.concurrent.CompletableFuture) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ContainerController(org.apache.hadoop.ozone.container.ozoneimpl.ContainerController) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) CheckedSupplier(org.apache.ratis.util.function.CheckedSupplier) ContainerDispatcher(org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher) Logger(org.slf4j.Logger) Consumer(java.util.function.Consumer) RaftServer(org.apache.ratis.server.RaftServer) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException)

Example 5 with StorageContainerException

use of org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException in project ozone by apache.

the class ContainerUtils method getChecksum.

/**
 * Return the SHA-256 checksum of the containerData.
 * @param containerDataYamlStr ContainerData as a Yaml String
 * @return Checksum of the container data
 */
public static String getChecksum(String containerDataYamlStr) throws StorageContainerException {
    MessageDigest sha;
    try {
        sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH);
        sha.update(containerDataYamlStr.getBytes(CHARSET_ENCODING));
        return DigestUtils.sha256Hex(sha.digest());
    } catch (NoSuchAlgorithmException e) {
        throw new StorageContainerException("Unable to create Message Digest, " + "usually this is a java configuration issue.", NO_SUCH_ALGORITHM);
    }
}
Also used : NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException) MessageDigest(java.security.MessageDigest)

Aggregations

StorageContainerException (org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException)55 IOException (java.io.IOException)23 BlockID (org.apache.hadoop.hdds.client.BlockID)16 Test (org.junit.Test)15 ContainerProtos (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos)14 ChunkInfo (org.apache.hadoop.ozone.container.common.helpers.ChunkInfo)11 KeyValueContainer (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer)11 File (java.io.File)10 KeyValueContainerData (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData)9 BlockData (org.apache.hadoop.ozone.container.common.helpers.BlockData)8 State (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State)5 Container (org.apache.hadoop.ozone.container.common.interfaces.Container)5 ByteString (org.apache.ratis.thirdparty.com.google.protobuf.ByteString)5 ChunkInfo (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo)4 ContainerCommandResponseProto (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto)4 WriteChunkRequestProto (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.WriteChunkRequestProto)4 XceiverClientSpi (org.apache.hadoop.hdds.scm.XceiverClientSpi)4 ContainerNotOpenException (org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException)4 HddsVolume (org.apache.hadoop.ozone.container.common.volume.HddsVolume)4 ChunkManager (org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager)4