Search in sources :

Example 1 with ContainerCommandResponseProto

use of org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto in project ozone by apache.

the class TestXceiverClientMetrics method testMetrics.

@Test
public void testMetrics() throws Exception {
    OzoneConfiguration conf = new OzoneConfiguration();
    String metaDir = GenericTestUtils.getTempPath(TestXceiverClientManager.class.getName() + UUID.randomUUID());
    conf.set(HDDS_METADATA_DIR_NAME, metaDir);
    XceiverClientManager clientManager = new XceiverClientManager(conf);
    ContainerWithPipeline container = storageContainerLocationClient.allocateContainer(SCMTestUtils.getReplicationType(conf), SCMTestUtils.getReplicationFactor(conf), OzoneConsts.OZONE);
    XceiverClientSpi client = clientManager.acquireClient(container.getPipeline());
    ContainerCommandRequestProto request = ContainerTestHelper.getCreateContainerRequest(container.getContainerInfo().getContainerID(), container.getPipeline());
    client.sendCommand(request);
    MetricsRecordBuilder containerMetrics = getMetrics(XceiverClientMetrics.SOURCE_NAME);
    // Above request command is in a synchronous way, so there will be no
    // pending requests.
    assertCounter("PendingOps", 0L, containerMetrics);
    assertCounter("numPendingCreateContainer", 0L, containerMetrics);
    // the counter value of average latency metric should be increased
    assertCounter("CreateContainerLatencyNumOps", 1L, containerMetrics);
    breakFlag = false;
    latch = new CountDownLatch(1);
    int numRequest = 10;
    List<CompletableFuture<ContainerCommandResponseProto>> computeResults = new ArrayList<>();
    // start new thread to send async requests
    Thread sendThread = new Thread(() -> {
        while (!breakFlag) {
            try {
                // use async interface for testing pending metrics
                for (int i = 0; i < numRequest; i++) {
                    BlockID blockID = ContainerTestHelper.getTestBlockID(container.getContainerInfo().getContainerID());
                    ContainerProtos.ContainerCommandRequestProto smallFileRequest;
                    smallFileRequest = ContainerTestHelper.getWriteSmallFileRequest(client.getPipeline(), blockID, 1024);
                    CompletableFuture<ContainerProtos.ContainerCommandResponseProto> response = client.sendCommandAsync(smallFileRequest).getResponse();
                    computeResults.add(response);
                }
                Thread.sleep(1000);
            } catch (Exception ignored) {
            }
        }
        latch.countDown();
    });
    sendThread.start();
    GenericTestUtils.waitFor(() -> {
        // check if pending metric count is increased
        MetricsRecordBuilder metric = getMetrics(XceiverClientMetrics.SOURCE_NAME);
        long pendingOps = getLongCounter("PendingOps", metric);
        long pendingPutSmallFileOps = getLongCounter("numPendingPutSmallFile", metric);
        if (pendingOps > 0 && pendingPutSmallFileOps > 0) {
            // reset break flag
            breakFlag = true;
            return true;
        } else {
            return false;
        }
    }, 100, 60000);
    // blocking until we stop sending async requests
    latch.await();
    // Wait for all futures being done.
    GenericTestUtils.waitFor(() -> {
        for (CompletableFuture future : computeResults) {
            if (!future.isDone()) {
                return false;
            }
        }
        return true;
    }, 100, 60000);
    // the counter value of pending metrics should be decreased to 0
    containerMetrics = getMetrics(XceiverClientMetrics.SOURCE_NAME);
    assertCounter("PendingOps", 0L, containerMetrics);
    assertCounter("numPendingPutSmallFile", 0L, containerMetrics);
    clientManager.close();
}
Also used : ContainerProtos(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos) ContainerCommandRequestProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto) ArrayList(java.util.ArrayList) OzoneConfiguration(org.apache.hadoop.hdds.conf.OzoneConfiguration) XceiverClientManager(org.apache.hadoop.hdds.scm.XceiverClientManager) XceiverClientSpi(org.apache.hadoop.hdds.scm.XceiverClientSpi) CountDownLatch(java.util.concurrent.CountDownLatch) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto) CompletableFuture(java.util.concurrent.CompletableFuture) ContainerCommandRequestProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto) BlockID(org.apache.hadoop.hdds.client.BlockID) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) Test(org.junit.Test)

Example 2 with ContainerCommandResponseProto

use of org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto in project ozone by apache.

the class DatanodeChunkValidator method readReference.

/**
 * Read a reference chunk using same name than one from the
 * {@link org.apache.hadoop.ozone.freon.DatanodeChunkGenerator}.
 */
private void readReference() throws IOException {
    ContainerCommandRequestProto request = createReadChunkRequest(0);
    ContainerCommandResponseProto response = xceiverClient.sendCommand(request);
    checksum = new Checksum(ContainerProtos.ChecksumType.CRC32, chunkSize);
    checksumReference = computeChecksum(response);
}
Also used : Checksum(org.apache.hadoop.ozone.common.Checksum) ContainerCommandRequestProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto)

Example 3 with ContainerCommandResponseProto

use of org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto in project ozone by apache.

the class DatanodeChunkValidator method validateChunk.

private void validateChunk(long stepNo) throws Exception {
    ContainerCommandRequestProto request = createReadChunkRequest(stepNo);
    timer.time(() -> {
        try {
            ContainerCommandResponseProto response = xceiverClient.sendCommand(request);
            ChecksumData checksumOfChunk = computeChecksum(response);
            if (!checksumReference.equals(checksumOfChunk)) {
                throw new IllegalStateException("Reference (=first) message checksum doesn't match " + "with checksum of chunk " + response.getReadChunk().getChunkData().getChunkName());
            }
        } catch (IOException e) {
            LOG.warn("Could not read chunk due to IOException: ", e);
        }
    });
}
Also used : ChecksumData(org.apache.hadoop.ozone.common.ChecksumData) ContainerCommandRequestProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto) IOException(java.io.IOException) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto)

Example 4 with ContainerCommandResponseProto

use of org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto in project ozone by apache.

the class HddsDispatcher method dispatchRequest.

@SuppressWarnings("methodlength")
private ContainerCommandResponseProto dispatchRequest(ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) {
    Preconditions.checkNotNull(msg);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Command {}, trace ID: {} ", msg.getCmdType(), msg.getTraceID());
    }
    AuditAction action = ContainerCommandRequestPBHelper.getAuditAction(msg.getCmdType());
    EventType eventType = getEventType(msg);
    Map<String, String> params = ContainerCommandRequestPBHelper.getAuditParams(msg);
    ContainerType containerType;
    ContainerCommandResponseProto responseProto = null;
    long startTime = System.currentTimeMillis();
    Type cmdType = msg.getCmdType();
    long containerID = msg.getContainerID();
    metrics.incContainerOpsMetrics(cmdType);
    Container container = getContainer(containerID);
    boolean isWriteStage = (cmdType == Type.WriteChunk && dispatcherContext != null && dispatcherContext.getStage() == DispatcherContext.WriteChunkStage.WRITE_DATA);
    boolean isWriteCommitStage = (cmdType == Type.WriteChunk && dispatcherContext != null && dispatcherContext.getStage() == DispatcherContext.WriteChunkStage.COMMIT_DATA);
    try {
        validateToken(msg);
    } catch (IOException ioe) {
        StorageContainerException sce = new StorageContainerException("Block token verification failed. " + ioe.getMessage(), ioe, ContainerProtos.Result.BLOCK_TOKEN_VERIFICATION_FAILED);
        return ContainerUtils.logAndReturnError(LOG, sce, msg);
    }
    // if the command gets executed other than Ratis, the default write stage
    // is WriteChunkStage.COMBINED
    boolean isCombinedStage = cmdType == Type.WriteChunk && (dispatcherContext == null || dispatcherContext.getStage() == DispatcherContext.WriteChunkStage.COMBINED);
    Map<Long, Long> container2BCSIDMap = null;
    if (dispatcherContext != null) {
        container2BCSIDMap = dispatcherContext.getContainer2BCSIDMap();
    }
    if (isWriteCommitStage) {
        // check if the container Id exist in the loaded snapshot file. if
        // it does not , it infers that , this is a restart of dn where
        // the we are reapplying the transaction which was not captured in the
        // snapshot.
        // just add it to the list, and remove it from missing container set
        // as it might have been added in the list during "init".
        Preconditions.checkNotNull(container2BCSIDMap);
        if (container != null && container2BCSIDMap.get(containerID) == null) {
            container2BCSIDMap.put(containerID, container.getBlockCommitSequenceId());
            getMissingContainerSet().remove(containerID);
        }
    }
    if (getMissingContainerSet().contains(containerID)) {
        StorageContainerException sce = new StorageContainerException("ContainerID " + containerID + " has been lost and and cannot be recreated on this DataNode", ContainerProtos.Result.CONTAINER_MISSING);
        audit(action, eventType, params, AuditEventStatus.FAILURE, sce);
        return ContainerUtils.logAndReturnError(LOG, sce, msg);
    }
    if (cmdType != Type.CreateContainer) {
        /**
         * Create Container should happen only as part of Write_Data phase of
         * writeChunk.
         */
        if (container == null && ((isWriteStage || isCombinedStage) || cmdType == Type.PutSmallFile)) {
            // If container does not exist, create one for WriteChunk and
            // PutSmallFile request
            responseProto = createContainer(msg);
            if (responseProto.getResult() != Result.SUCCESS) {
                StorageContainerException sce = new StorageContainerException("ContainerID " + containerID + " creation failed", responseProto.getResult());
                audit(action, eventType, params, AuditEventStatus.FAILURE, sce);
                return ContainerUtils.logAndReturnError(LOG, sce, msg);
            }
            Preconditions.checkArgument(isWriteStage && container2BCSIDMap != null || dispatcherContext == null);
            if (container2BCSIDMap != null) {
                // adds this container to list of containers created in the pipeline
                // with initial BCSID recorded as 0.
                container2BCSIDMap.putIfAbsent(containerID, 0L);
            }
            container = getContainer(containerID);
        }
        // if container not found return error
        if (container == null) {
            StorageContainerException sce = new StorageContainerException("ContainerID " + containerID + " does not exist", ContainerProtos.Result.CONTAINER_NOT_FOUND);
            audit(action, eventType, params, AuditEventStatus.FAILURE, sce);
            return ContainerUtils.logAndReturnError(LOG, sce, msg);
        }
        containerType = getContainerType(container);
    } else {
        if (!msg.hasCreateContainer()) {
            audit(action, eventType, params, AuditEventStatus.FAILURE, new Exception("MALFORMED_REQUEST"));
            return malformedRequest(msg);
        }
        containerType = msg.getCreateContainer().getContainerType();
    }
    // write before trying to send CloseContainerAction.
    if (!HddsUtils.isReadOnly(msg)) {
        sendCloseContainerActionIfNeeded(container);
    }
    Handler handler = getHandler(containerType);
    if (handler == null) {
        StorageContainerException ex = new StorageContainerException("Invalid " + "ContainerType " + containerType, ContainerProtos.Result.CONTAINER_INTERNAL_ERROR);
        // log failure
        audit(action, eventType, params, AuditEventStatus.FAILURE, ex);
        return ContainerUtils.logAndReturnError(LOG, ex, msg);
    }
    responseProto = handler.handle(msg, container, dispatcherContext);
    if (responseProto != null) {
        metrics.incContainerOpsLatencies(cmdType, System.currentTimeMillis() - startTime);
        // If the request is of Write Type and the container operation
        // is unsuccessful, it implies the applyTransaction on the container
        // failed. All subsequent transactions on the container should fail and
        // hence replica will be marked unhealthy here. In this case, a close
        // container action will be sent to SCM to close the container.
        // ApplyTransaction called on closed Container will fail with Closed
        // container exception. In such cases, ignore the exception here
        // If the container is already marked unhealthy, no need to change the
        // state here.
        Result result = responseProto.getResult();
        if (cmdType == Type.CreateContainer && result == Result.SUCCESS && dispatcherContext != null) {
            Preconditions.checkNotNull(dispatcherContext.getContainer2BCSIDMap());
            container2BCSIDMap.putIfAbsent(containerID, Long.valueOf(0));
        }
        if (!HddsUtils.isReadOnly(msg) && !canIgnoreException(result)) {
            if (container == null) {
                throw new NullPointerException("Error on creating containers " + result + " " + responseProto.getMessage());
            }
            // For container to be moved to unhealthy state here, the container can
            // only be in open or closing state.
            State containerState = container.getContainerData().getState();
            Preconditions.checkState(containerState == State.OPEN || containerState == State.CLOSING);
            // mark and persist the container state to be unhealthy
            try {
                handler.markContainerUnhealthy(container);
                LOG.info("Marked Container UNHEALTHY, ContainerID: {}", containerID);
            } catch (IOException ioe) {
                // just log the error here in case marking the container fails,
                // Return the actual failure response to the client
                LOG.error("Failed to mark container " + containerID + " UNHEALTHY. ", ioe);
            }
            // in any case, the in memory state of the container should be unhealthy
            Preconditions.checkArgument(container.getContainerData().getState() == State.UNHEALTHY);
            sendCloseContainerActionIfNeeded(container);
        }
        if (result == Result.SUCCESS) {
            updateBCSID(container, dispatcherContext, cmdType);
            audit(action, eventType, params, AuditEventStatus.SUCCESS, null);
        } else {
            audit(action, eventType, params, AuditEventStatus.FAILURE, new Exception(responseProto.getMessage()));
        }
        return responseProto;
    } else {
        // log failure
        audit(action, eventType, params, AuditEventStatus.FAILURE, new Exception("UNSUPPORTED_REQUEST"));
        return unsupportedRequest(msg);
    }
}
Also used : ContainerType(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType) Handler(org.apache.hadoop.ozone.container.common.interfaces.Handler) IOException(java.io.IOException) InvalidContainerStateException(org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException) ServiceException(com.google.protobuf.ServiceException) ContainerNotOpenException(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException) IOException(java.io.IOException) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto) Result(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result) AuditAction(org.apache.hadoop.ozone.audit.AuditAction) AuditLoggerType(org.apache.hadoop.ozone.audit.AuditLoggerType) Type(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type) ContainerType(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) State(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State) StorageContainerException(org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException)

Example 5 with ContainerCommandResponseProto

use of org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto in project ozone by apache.

the class GrpcXceiverService method send.

@Override
public StreamObserver<ContainerCommandRequestProto> send(StreamObserver<ContainerCommandResponseProto> responseObserver) {
    return new StreamObserver<ContainerCommandRequestProto>() {

        private final AtomicBoolean isClosed = new AtomicBoolean(false);

        @Override
        public void onNext(ContainerCommandRequestProto request) {
            try {
                ContainerCommandResponseProto resp = dispatcher.dispatch(request, null);
                responseObserver.onNext(resp);
            } catch (Throwable e) {
                LOG.error("Got exception when processing" + " ContainerCommandRequestProto {}", request, e);
                responseObserver.onError(e);
            }
        }

        @Override
        public void onError(Throwable t) {
            // for now we just log a msg
            LOG.error("ContainerCommand send on error. Exception: ", t);
        }

        @Override
        public void onCompleted() {
            if (isClosed.compareAndSet(false, true)) {
                LOG.debug("ContainerCommand send completed");
                responseObserver.onCompleted();
            }
        }
    };
}
Also used : StreamObserver(org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ContainerCommandRequestProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto) ContainerCommandResponseProto(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto)

Aggregations

ContainerCommandResponseProto (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto)33 ContainerCommandRequestProto (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto)24 ByteString (org.apache.ratis.thirdparty.com.google.protobuf.ByteString)14 IOException (java.io.IOException)11 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)11 ContainerProtos (org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos)10 Test (org.junit.Test)8 File (java.io.File)7 Map (java.util.Map)7 UUID (java.util.UUID)7 CompletableFuture (java.util.concurrent.CompletableFuture)7 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)7 StorageContainerException (org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException)6 List (java.util.List)5 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)4 MockPipeline (org.apache.hadoop.hdds.scm.pipeline.MockPipeline)4 OzoneConfigKeys (org.apache.hadoop.ozone.OzoneConfigKeys)4