use of com.github.ambry.messageformat.MessageFormatWriteSet in project ambry by linkedin.
the class StoreCopier method copy.
/**
* Copies data starting from {@code startToken} until all the data is copied.
* @param startToken the {@link FindToken} to start copying from. It is expected that start token does not cause
* the copier to attempt to copy blobs that have already been copied. If that happens, the boolean
* in the return value will be {@code true}.
* @return a {@link Pair} of the {@link FindToken} until which data has been copied and a {@link Boolean} indicating
* whether the source had problems that were skipped over - like duplicates ({@code true} indicates that there were).
* @throws Exception if there is any exception during processing
*/
public Pair<FindToken, Boolean> copy(FindToken startToken) throws Exception {
boolean sourceHasProblems = false;
FindToken lastToken;
FindToken token = startToken;
do {
lastToken = token;
FindInfo findInfo = src.findEntriesSince(lastToken, fetchSizeInBytes, null, null);
List<MessageInfo> messageInfos = findInfo.getMessageEntries();
for (Transformer transformer : transformers) {
transformer.warmup(messageInfos);
}
for (MessageInfo messageInfo : messageInfos) {
logger.trace("Processing {} - isDeleted: {}, isExpired {}", messageInfo.getStoreKey(), messageInfo.isDeleted(), messageInfo.isExpired());
if (!messageInfo.isExpired() && !messageInfo.isDeleted()) {
if (tgt.findMissingKeys(Collections.singletonList(messageInfo.getStoreKey())).size() == 1) {
StoreInfo storeInfo = src.get(Collections.singletonList(messageInfo.getStoreKey()), EnumSet.allOf(StoreGetOptions.class));
MessageReadSet readSet = storeInfo.getMessageReadSet();
if (readSet.sizeInBytes(0) > Integer.MAX_VALUE) {
throw new IllegalStateException("Cannot copy blobs whose size > Integer.MAX_VALUE");
}
int size = (int) readSet.sizeInBytes(0);
byte[] buf = new byte[size];
readSet.writeTo(0, new ByteBufferChannel(ByteBuffer.wrap(buf)), 0, size);
Message message = new Message(storeInfo.getMessageReadSetInfo().get(0), new ByteArrayInputStream(buf));
for (Transformer transformer : transformers) {
TransformationOutput tfmOutput = transformer.transform(message);
if (tfmOutput.getException() != null) {
throw tfmOutput.getException();
} else {
message = tfmOutput.getMsg();
}
if (message == null) {
break;
}
}
if (message == null) {
logger.trace("Dropping {} because the transformers did not return a message", messageInfo.getStoreKey());
continue;
}
MessageFormatWriteSet writeSet = new MessageFormatWriteSet(message.getStream(), Collections.singletonList(message.getMessageInfo()), false);
tgt.put(writeSet);
MessageInfo tgtMsgInfo = message.getMessageInfo();
if (tgtMsgInfo.isTtlUpdated()) {
MessageInfo updateMsgInfo = new MessageInfo(tgtMsgInfo.getStoreKey(), 0, false, true, tgtMsgInfo.getExpirationTimeInMs(), tgtMsgInfo.getAccountId(), tgtMsgInfo.getContainerId(), tgtMsgInfo.getOperationTimeMs());
tgt.updateTtl(Collections.singletonList(updateMsgInfo));
}
logger.trace("Copied {} as {}", messageInfo.getStoreKey(), tgtMsgInfo.getStoreKey());
} else if (!messageInfo.isTtlUpdated()) {
logger.warn("Found a duplicate entry for {} while copying data", messageInfo.getStoreKey());
sourceHasProblems = true;
}
}
}
token = findInfo.getFindToken();
double percentBytesRead = src.isEmpty() ? 100.0 : token.getBytesRead() * 100.0 / src.getSizeInBytes();
logger.info("[{}] [{}] {}% copied", Thread.currentThread().getName(), storeId, df.format(percentBytesRead));
} while (!token.equals(lastToken));
return new Pair<>(token, sourceHasProblems);
}
use of com.github.ambry.messageformat.MessageFormatWriteSet in project ambry by linkedin.
the class ReplicaThread method writeMessagesToLocalStoreAndAdvanceTokens.
/**
* Writes the messages (if any) to the local stores from the remote stores for the missing keys, and advances tokens.
* @param exchangeMetadataResponseList The list of metadata response from the remote node
* @param getResponse The {@link GetResponse} that contains the missing messages. This may be null if there are no
* missing messages to write as per the exchange metadata response. In that case this method will
* simply advance the tokens for every store.
* @param replicasToReplicatePerNode The list of remote replicas for the remote node
* @param remoteNode The remote node from which replication needs to happen
* @param remoteColoGetRequestForStandby boolean which indicates if we are getting missing keys for standby or
* non-leader replica pairs during leader-based replication.
* @throws IOException
*/
private void writeMessagesToLocalStoreAndAdvanceTokens(List<ExchangeMetadataResponse> exchangeMetadataResponseList, GetResponse getResponse, List<RemoteReplicaInfo> replicasToReplicatePerNode, DataNodeId remoteNode, boolean remoteColoGetRequestForStandby) throws IOException {
int partitionResponseInfoIndex = 0;
long totalBytesFixed = 0;
long totalBlobsFixed = 0;
long startTime = time.milliseconds();
for (int i = 0; i < exchangeMetadataResponseList.size(); i++) {
ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
// TODO: if remoteReplicaInfo.getLocalStore() is closed, write will fail
if (exchangeMetadataResponse.serverErrorCode == ServerErrorCode.No_Error) {
if (exchangeMetadataResponse.missingStoreMessages.size() > 0) {
PartitionResponseInfo partitionResponseInfo = getResponse.getPartitionResponseInfoList().get(partitionResponseInfoIndex);
responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), partitionResponseInfo.getErrorCode());
partitionResponseInfoIndex++;
if (!partitionResponseInfo.getPartition().toPathString().equals(remoteReplicaInfo.getReplicaId().getPartitionId().toPathString())) {
throw new IllegalStateException("The partition id from partitionResponseInfo " + partitionResponseInfo.getPartition() + " and from remoteReplicaInfo " + remoteReplicaInfo.getReplicaId().getPartitionId() + " are not the same");
}
if (partitionResponseInfo.getErrorCode() == ServerErrorCode.No_Error) {
List<MessageInfo> messageInfoList = partitionResponseInfo.getMessageInfoList();
try {
logger.trace("Remote node: {} Thread name: {} Remote replica: {} Messages to fix: {} " + "Partition: {} Local mount path: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.getMissingStoreKeys(), remoteReplicaInfo.getReplicaId().getPartitionId(), remoteReplicaInfo.getLocalReplicaId().getMountPath());
MessageFormatWriteSet writeset;
MessageSievingInputStream validMessageDetectionInputStream = new MessageSievingInputStream(getResponse.getInputStream(), messageInfoList, Collections.singletonList(transformer), metricRegistry);
if (validMessageDetectionInputStream.hasInvalidMessages()) {
replicationMetrics.incrementInvalidMessageError(partitionResponseInfo.getPartition());
logger.error("Out of {} messages, {} invalid messages were found in message stream from {}", messageInfoList.size(), messageInfoList.size() - validMessageDetectionInputStream.getValidMessageInfoList().size(), remoteReplicaInfo.getReplicaId());
}
messageInfoList = validMessageDetectionInputStream.getValidMessageInfoList();
if (messageInfoList.size() == 0) {
logger.debug("MessageInfoList is of size 0 as all messages are invalidated, deprecated, deleted or expired.");
} else {
writeset = new MessageFormatWriteSet(validMessageDetectionInputStream, messageInfoList, false);
remoteReplicaInfo.getLocalStore().put(writeset);
}
for (MessageInfo messageInfo : messageInfoList) {
totalBytesFixed += messageInfo.getSize();
logger.trace("Remote node: {} Thread name: {} Remote replica: {} Message replicated: {} Partition: {} " + "Local mount path: {} Message size: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), messageInfo.getStoreKey(), remoteReplicaInfo.getReplicaId().getPartitionId(), remoteReplicaInfo.getLocalReplicaId().getMountPath(), messageInfo.getSize());
if (notification != null) {
notification.onBlobReplicaCreated(dataNodeId.getHostname(), dataNodeId.getPort(), messageInfo.getStoreKey().getID(), BlobReplicaSourceType.REPAIRED);
}
if (messageInfo.isTtlUpdated()) {
applyTtlUpdate(messageInfo, remoteReplicaInfo);
}
}
totalBlobsFixed += messageInfoList.size();
if (leaderBasedReplicationAdmin != null) {
// If leader based replication is enabled, we will only fetch missing blobs for local leaders from their
// remote leaders. For non-leader replicas pairs (leader <-> standby, standby <-> leader, standby <->
// standby), we will store the missing keys and track them via leader<->leader exchanges and intra-dc
// replication.
// Notify all the replicas of the partition on newly written messages so that non-leader replica pairs
// can update their missing keys and advance token if needed.
leaderBasedReplicationAdmin.onMessageWriteForPartition(partitionResponseInfo.getPartition(), messageInfoList);
}
remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
// reset stored metadata response for this replica
remoteReplicaInfo.setExchangeMetadataResponse(new ExchangeMetadataResponse(ServerErrorCode.No_Error));
logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
} catch (StoreException e) {
if (e.getErrorCode() != StoreErrorCodes.Already_Exist) {
replicationMetrics.updateLocalStoreError(remoteReplicaInfo.getReplicaId());
logger.error("Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), e);
}
}
} else if (partitionResponseInfo.getErrorCode() == ServerErrorCode.Blob_Deleted) {
replicationMetrics.blobDeletedOnGetCount.inc();
logger.trace("One of the blobs to GET is deleted: Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId());
} else if (partitionResponseInfo.getErrorCode() == ServerErrorCode.Blob_Authorization_Failure) {
replicationMetrics.blobAuthorizationFailureCount.inc();
logger.error("One of the blobs authorization failed: Remote node: {} Thread name: {} Remote replica: {} Keys are: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.getMissingStoreKeys());
} else {
replicationMetrics.updateGetRequestError(remoteReplicaInfo.getReplicaId());
logger.error("Remote node: {} Thread name: {} Remote replica: {} Server error: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), partitionResponseInfo.getErrorCode());
}
}
}
}
long batchStoreWriteTime = time.milliseconds() - startTime;
replicationMetrics.updateBatchStoreWriteTime(batchStoreWriteTime, totalBytesFixed, totalBlobsFixed, replicatingFromRemoteColo, replicatingOverSsl, datacenterName, remoteColoGetRequestForStandby);
}
use of com.github.ambry.messageformat.MessageFormatWriteSet in project ambry by linkedin.
the class InMemoryStore method delete.
@Override
public void delete(List<MessageInfo> infos) throws StoreException {
List<MessageInfo> infosToDelete = new ArrayList<>(infos.size());
List<InputStream> inputStreams = new ArrayList<>();
try {
for (MessageInfo info : infos) {
short lifeVersion = info.getLifeVersion();
MessageInfo latestInfo = getMergedMessageInfo(info.getStoreKey(), messageInfos);
if (latestInfo == null) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it is not present in the index.", StoreErrorCodes.ID_Not_Found);
}
if (lifeVersion == MessageInfo.LIFE_VERSION_FROM_FRONTEND) {
if (latestInfo.isDeleted()) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it is already deleted in the index.", StoreErrorCodes.ID_Deleted);
}
lifeVersion = latestInfo.getLifeVersion();
} else {
if ((latestInfo.isDeleted() && latestInfo.getLifeVersion() >= info.getLifeVersion()) || (latestInfo.getLifeVersion() > info.getLifeVersion())) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it is already deleted in the index.", StoreErrorCodes.Life_Version_Conflict);
}
lifeVersion = info.getLifeVersion();
}
MessageFormatInputStream stream = new DeleteMessageFormatInputStream(info.getStoreKey(), info.getAccountId(), info.getContainerId(), info.getOperationTimeMs(), lifeVersion);
infosToDelete.add(new MessageInfo(info.getStoreKey(), stream.getSize(), true, info.isTtlUpdated(), false, info.getExpirationTimeInMs(), null, info.getAccountId(), info.getContainerId(), info.getOperationTimeMs(), lifeVersion));
inputStreams.add(stream);
}
MessageFormatWriteSet writeSet = new MessageFormatWriteSet(new SequenceInputStream(Collections.enumeration(inputStreams)), infosToDelete, false);
writeSet.writeTo(log);
messageInfos.addAll(infosToDelete);
} catch (Exception e) {
throw (e instanceof StoreException ? (StoreException) e : new StoreException(e, StoreErrorCodes.Unknown_Error));
}
}
use of com.github.ambry.messageformat.MessageFormatWriteSet in project ambry by linkedin.
the class InMemoryStore method updateTtl.
@Override
public void updateTtl(List<MessageInfo> infos) throws StoreException {
List<MessageInfo> infosToUpdate = new ArrayList<>(infos.size());
List<InputStream> inputStreams = new ArrayList<>();
try {
for (MessageInfo info : infos) {
if (info.getExpirationTimeInMs() != Utils.Infinite_Time) {
throw new StoreException("BlobStore only supports removing the expiration time", StoreErrorCodes.Update_Not_Allowed);
}
MessageInfo latestInfo = getMergedMessageInfo(info.getStoreKey(), messageInfos);
if (latestInfo == null) {
throw new StoreException("Cannot update TTL of " + info.getStoreKey() + " since it's not in the index", StoreErrorCodes.ID_Not_Found);
} else if (latestInfo.isDeleted()) {
throw new StoreException("Cannot update TTL of " + info.getStoreKey() + " since it is already deleted in the index.", StoreErrorCodes.ID_Deleted);
} else if (latestInfo.isTtlUpdated()) {
throw new StoreException("TTL of " + info.getStoreKey() + " is already updated in the index.", StoreErrorCodes.Already_Updated);
}
short lifeVersion = latestInfo.getLifeVersion();
MessageFormatInputStream stream = new TtlUpdateMessageFormatInputStream(info.getStoreKey(), info.getAccountId(), info.getContainerId(), info.getExpirationTimeInMs(), info.getOperationTimeMs(), lifeVersion);
infosToUpdate.add(new MessageInfo(info.getStoreKey(), stream.getSize(), false, true, false, info.getExpirationTimeInMs(), null, info.getAccountId(), info.getContainerId(), info.getOperationTimeMs(), lifeVersion));
inputStreams.add(stream);
}
MessageFormatWriteSet writeSet = new MessageFormatWriteSet(new SequenceInputStream(Collections.enumeration(inputStreams)), infosToUpdate, false);
writeSet.writeTo(log);
messageInfos.addAll(infosToUpdate);
} catch (Exception e) {
throw (e instanceof StoreException ? (StoreException) e : new StoreException(e, StoreErrorCodes.Unknown_Error));
}
}
use of com.github.ambry.messageformat.MessageFormatWriteSet in project ambry by linkedin.
the class BlobStore method delete.
@Override
public void delete(List<MessageInfo> infosToDelete) throws StoreException {
checkStarted();
checkDuplicates(infosToDelete);
final Timer.Context context = metrics.deleteResponse.time();
try {
List<IndexValue> indexValuesPriorToDelete = new ArrayList<>();
List<IndexValue> originalPuts = new ArrayList<>();
List<Short> lifeVersions = new ArrayList<>();
Offset indexEndOffsetBeforeCheck = index.getCurrentEndOffset();
for (MessageInfo info : infosToDelete) {
IndexValue value = index.findKey(info.getStoreKey(), new FileSpan(index.getStartOffset(), indexEndOffsetBeforeCheck));
if (value == null) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " because it is not present in the index", StoreErrorCodes.ID_Not_Found);
}
if (!info.getStoreKey().isAccountContainerMatch(value.getAccountId(), value.getContainerId())) {
if (config.storeValidateAuthorization) {
throw new StoreException("DELETE authorization failure. Key: " + info.getStoreKey() + "Actually accountId: " + value.getAccountId() + "Actually containerId: " + value.getContainerId(), StoreErrorCodes.Authorization_Failure);
} else {
logger.warn("DELETE authorization failure. Key: {} Actually accountId: {} Actually containerId: {}", info.getStoreKey(), value.getAccountId(), value.getContainerId());
metrics.deleteAuthorizationFailureCount.inc();
}
}
short revisedLifeVersion = info.getLifeVersion();
if (info.getLifeVersion() == MessageInfo.LIFE_VERSION_FROM_FRONTEND) {
// This is a delete request from frontend
if (value.isDelete()) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it is already deleted in the index.", StoreErrorCodes.ID_Deleted);
}
revisedLifeVersion = value.getLifeVersion();
} else {
// This is a delete request from replication
if (value.isDelete() && value.getLifeVersion() == info.getLifeVersion()) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it is already deleted in the index with lifeVersion " + value.getLifeVersion() + ".", StoreErrorCodes.ID_Deleted);
}
if (value.getLifeVersion() > info.getLifeVersion()) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it has a higher lifeVersion than the message info: " + value.getLifeVersion() + ">" + info.getLifeVersion(), StoreErrorCodes.Life_Version_Conflict);
}
}
indexValuesPriorToDelete.add(value);
lifeVersions.add(revisedLifeVersion);
if (!value.isDelete() && !value.isUndelete()) {
originalPuts.add(value);
} else {
originalPuts.add(index.findKey(info.getStoreKey(), new FileSpan(index.getStartOffset(), value.getOffset()), EnumSet.of(PersistentIndex.IndexEntryType.PUT)));
}
}
synchronized (storeWriteLock) {
Offset currentIndexEndOffset = index.getCurrentEndOffset();
if (!currentIndexEndOffset.equals(indexEndOffsetBeforeCheck)) {
FileSpan fileSpan = new FileSpan(indexEndOffsetBeforeCheck, currentIndexEndOffset);
int i = 0;
for (MessageInfo info : infosToDelete) {
IndexValue value = index.findKey(info.getStoreKey(), fileSpan, EnumSet.allOf(PersistentIndex.IndexEntryType.class));
if (value != null) {
// From these cases, we can have value being DELETE, TTL_UPDATE AND UNDELETE, we have to deal with them accordingly.
if (value.getLifeVersion() == lifeVersions.get(i)) {
if (value.isDelete()) {
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since it is already deleted in the index.", StoreErrorCodes.ID_Deleted);
}
// value being ttl update is fine, we can just append DELETE to it.
} else {
// For the extreme case, we log it out and throw an exception.
logger.warn("Concurrent operation for id " + info.getStoreKey() + " in store " + dataDir + ". Newly added value " + value);
throw new StoreException("Cannot delete id " + info.getStoreKey() + " since there are concurrent operation while delete", StoreErrorCodes.Life_Version_Conflict);
}
indexValuesPriorToDelete.set(i, value);
}
i++;
}
}
List<InputStream> inputStreams = new ArrayList<>(infosToDelete.size());
List<MessageInfo> updatedInfos = new ArrayList<>(infosToDelete.size());
int i = 0;
for (MessageInfo info : infosToDelete) {
MessageFormatInputStream stream = new DeleteMessageFormatInputStream(info.getStoreKey(), info.getAccountId(), info.getContainerId(), info.getOperationTimeMs(), lifeVersions.get(i));
// Don't change the lifeVersion here, there are other logic in markAsDeleted that relies on this lifeVersion.
updatedInfos.add(new MessageInfo(info.getStoreKey(), stream.getSize(), info.getAccountId(), info.getContainerId(), info.getOperationTimeMs(), info.getLifeVersion()));
inputStreams.add(stream);
i++;
}
Offset endOffsetOfLastMessage = log.getEndOffset();
MessageFormatWriteSet writeSet = new MessageFormatWriteSet(new SequenceInputStream(Collections.enumeration(inputStreams)), updatedInfos, false);
writeSet.writeTo(log);
logger.trace("Store : {} delete mark written to log", dataDir);
int correspondingPutIndex = 0;
for (MessageInfo info : updatedInfos) {
FileSpan fileSpan = log.getFileSpanForMessage(endOffsetOfLastMessage, info.getSize());
IndexValue deleteIndexValue = index.markAsDeleted(info.getStoreKey(), fileSpan, null, info.getOperationTimeMs(), info.getLifeVersion());
endOffsetOfLastMessage = fileSpan.getEndOffset();
blobStoreStats.handleNewDeleteEntry(info.getStoreKey(), deleteIndexValue, originalPuts.get(correspondingPutIndex), indexValuesPriorToDelete.get(correspondingPutIndex));
correspondingPutIndex++;
}
logger.trace("Store : {} delete has been marked in the index ", dataDir);
}
onSuccess();
} catch (StoreException e) {
if (e.getErrorCode() == StoreErrorCodes.IOError) {
onError();
}
throw e;
} catch (Exception e) {
throw new StoreException("Unknown error while trying to delete blobs from store " + dataDir, e, StoreErrorCodes.Unknown_Error);
} finally {
context.stop();
}
}
Aggregations