use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.
the class CheckpointWorkflow method fillCacheGroupState.
/**
* Fill cache group state in checkpoint record.
*
* @param cpRec Checkpoint record for filling.
* @throws IgniteCheckedException if fail.
*/
private void fillCacheGroupState(CheckpointRecord cpRec) throws IgniteCheckedException {
GridCompoundFuture grpHandleFut = checkpointCollectPagesInfoPool == null ? null : new GridCompoundFuture();
for (CacheGroupContext grp : cacheGroupsContexts.get()) {
if (grp.isLocal() || !grp.walEnabled())
continue;
Runnable r = () -> {
ArrayList<GridDhtLocalPartition> parts = new ArrayList<>(grp.topology().localPartitions().size());
for (GridDhtLocalPartition part : grp.topology().currentLocalPartitions()) parts.add(part);
CacheState state = new CacheState(parts.size());
for (GridDhtLocalPartition part : parts) {
GridDhtPartitionState partState = part.state();
if (partState == LOST)
partState = OWNING;
state.addPartitionState(part.id(), part.dataStore().fullSize(), part.updateCounter(), (byte) partState.ordinal());
}
synchronized (cpRec) {
cpRec.addCacheGroupState(grp.groupId(), state);
}
};
if (checkpointCollectPagesInfoPool == null)
r.run();
else
try {
GridFutureAdapter<?> res = new GridFutureAdapter<>();
checkpointCollectPagesInfoPool.execute(U.wrapIgniteFuture(r, res));
grpHandleFut.add(res);
} catch (RejectedExecutionException e) {
assert false : "Task should never be rejected by async runner";
// to protect from disabled asserts and call to failure handler
throw new IgniteException(e);
}
}
if (grpHandleFut != null) {
grpHandleFut.markInitialized();
grpHandleFut.get();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.
the class RecordDataV1Serializer method readPlainRecord.
/**
* Reads {@code WalRecord} of {@code type} from input.
* Input should be plain(not encrypted).
*
* @param type Record type.
* @param in Input
* @param encrypted Record was encrypted.
* @param recordSize Record size.
* @return Deserialized record.
* @throws IOException If failed.
* @throws IgniteCheckedException If failed.
*/
WALRecord readPlainRecord(RecordType type, ByteBufferBackedDataInput in, boolean encrypted, int recordSize) throws IOException, IgniteCheckedException {
WALRecord res;
switch(type) {
case PAGE_RECORD:
byte[] arr = new byte[pageSize];
int cacheId = in.readInt();
long pageId = in.readLong();
in.readFully(arr);
res = new PageSnapshot(new FullPageId(pageId, cacheId), arr, encrypted ? realPageSize : pageSize);
break;
case CHECKPOINT_RECORD:
long msb = in.readLong();
long lsb = in.readLong();
boolean hasPtr = in.readByte() != 0;
long idx = hasPtr ? in.readLong() : 0;
int off = hasPtr ? in.readInt() : 0;
int len = hasPtr ? in.readInt() : 0;
Map<Integer, CacheState> states = readPartitionStates(in);
boolean end = in.readByte() != 0;
WALPointer walPtr = hasPtr ? new WALPointer(idx, off, len) : null;
CheckpointRecord cpRec = new CheckpointRecord(new UUID(msb, lsb), walPtr, end);
cpRec.cacheGroupStates(states);
res = cpRec;
break;
case META_PAGE_INIT:
cacheId = in.readInt();
pageId = in.readLong();
int ioType = in.readUnsignedShort();
int ioVer = in.readUnsignedShort();
long treeRoot = in.readLong();
long reuseListRoot = in.readLong();
res = new MetaPageInitRecord(cacheId, pageId, ioType, ioVer, treeRoot, reuseListRoot, log);
break;
case INDEX_META_PAGE_DELTA_RECORD:
res = new MetaPageUpdateIndexDataRecord(in);
break;
case PARTITION_META_PAGE_UPDATE_COUNTERS:
res = new MetaPageUpdatePartitionDataRecord(in);
break;
case PARTITION_META_PAGE_UPDATE_COUNTERS_V2:
res = new MetaPageUpdatePartitionDataRecordV2(in);
break;
case PARTITION_META_PAGE_DELTA_RECORD_V3:
res = new MetaPageUpdatePartitionDataRecordV3(in);
break;
case MEMORY_RECOVERY:
long ts = in.readLong();
res = new MemoryRecoveryRecord(ts);
break;
case PARTITION_DESTROY:
cacheId = in.readInt();
int partId = in.readInt();
res = new PartitionDestroyRecord(cacheId, partId);
break;
case DATA_RECORD:
case DATA_RECORD_V2:
int entryCnt = in.readInt();
if (entryCnt == 1)
res = new DataRecord(readPlainDataEntry(in, type), 0L);
else {
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readPlainDataEntry(in, type));
res = new DataRecord(entries, 0L);
}
break;
case ENCRYPTED_DATA_RECORD:
case ENCRYPTED_DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD_V3:
entryCnt = in.readInt();
if (entryCnt == 1)
res = new DataRecord(readEncryptedDataEntry(in, type), 0L);
else {
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readEncryptedDataEntry(in, type));
res = new DataRecord(entries, 0L);
}
break;
case METASTORE_DATA_RECORD:
int strLen = in.readInt();
byte[] strBytes = new byte[strLen];
in.readFully(strBytes);
String key = new String(strBytes);
int valLen = in.readInt();
assert valLen >= 0;
byte[] val;
if (valLen > 0) {
val = new byte[valLen];
in.readFully(val);
} else
val = null;
return new MetastoreDataRecord(key, val);
case HEADER_RECORD:
long magic = in.readLong();
if (magic != HeaderRecord.REGULAR_MAGIC && magic != HeaderRecord.COMPACTED_MAGIC)
throw new EOFException("Magic is corrupted [actual=" + U.hexLong(magic) + ']');
int ver = in.readInt();
res = new HeaderRecord(ver);
break;
case DATA_PAGE_INSERT_RECORD:
{
cacheId = in.readInt();
pageId = in.readLong();
int size = in.readUnsignedShort();
in.ensure(size);
byte[] payload = new byte[size];
in.readFully(payload);
res = new DataPageInsertRecord(cacheId, pageId, payload);
break;
}
case DATA_PAGE_UPDATE_RECORD:
{
cacheId = in.readInt();
pageId = in.readLong();
int itemId = in.readInt();
int size = in.readUnsignedShort();
in.ensure(size);
byte[] payload = new byte[size];
in.readFully(payload);
res = new DataPageUpdateRecord(cacheId, pageId, itemId, payload);
break;
}
case DATA_PAGE_INSERT_FRAGMENT_RECORD:
{
cacheId = in.readInt();
pageId = in.readLong();
final long lastLink = in.readLong();
final int payloadSize = in.readInt();
final byte[] payload = new byte[payloadSize];
in.readFully(payload);
res = new DataPageInsertFragmentRecord(cacheId, pageId, payload, lastLink);
break;
}
case DATA_PAGE_REMOVE_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
int itemId = in.readUnsignedByte();
res = new DataPageRemoveRecord(cacheId, pageId, itemId);
break;
case DATA_PAGE_SET_FREE_LIST_PAGE:
cacheId = in.readInt();
pageId = in.readLong();
long freeListPage = in.readLong();
res = new DataPageSetFreeListPageRecord(cacheId, pageId, freeListPage);
break;
case MVCC_DATA_PAGE_MARK_UPDATED_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
itemId = in.readInt();
long newMvccCrd = in.readLong();
long newMvccCntr = in.readLong();
int newMvccOpCntr = in.readInt();
res = new DataPageMvccMarkUpdatedRecord(cacheId, pageId, itemId, newMvccCrd, newMvccCntr, newMvccOpCntr);
break;
case MVCC_DATA_PAGE_TX_STATE_HINT_UPDATED_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
itemId = in.readInt();
byte txState = in.readByte();
res = new DataPageMvccUpdateTxStateHintRecord(cacheId, pageId, itemId, txState);
break;
case MVCC_DATA_PAGE_NEW_TX_STATE_HINT_UPDATED_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
itemId = in.readInt();
byte newTxState = in.readByte();
res = new DataPageMvccUpdateNewTxStateHintRecord(cacheId, pageId, itemId, newTxState);
break;
case INIT_NEW_PAGE_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
ioType = in.readUnsignedShort();
ioVer = in.readUnsignedShort();
long virtualPageId = in.readLong();
res = new InitNewPageRecord(cacheId, pageId, ioType, ioVer, virtualPageId, log);
break;
case BTREE_META_PAGE_INIT_ROOT:
cacheId = in.readInt();
pageId = in.readLong();
long rootId = in.readLong();
res = new MetaPageInitRootRecord(cacheId, pageId, rootId);
break;
case BTREE_META_PAGE_INIT_ROOT2:
cacheId = in.readInt();
pageId = in.readLong();
long rootId2 = in.readLong();
int inlineSize = in.readShort();
res = new MetaPageInitRootInlineRecord(cacheId, pageId, rootId2, inlineSize);
break;
case BTREE_META_PAGE_INIT_ROOT_V3:
cacheId = in.readInt();
pageId = in.readLong();
long rootId3 = in.readLong();
int inlineSize3 = in.readShort();
long flags = in.readLong();
byte[] revHash = new byte[IgniteProductVersion.REV_HASH_SIZE];
byte maj = in.readByte();
byte min = in.readByte();
byte maint = in.readByte();
long verTs = in.readLong();
in.readFully(revHash);
IgniteProductVersion createdVer = new IgniteProductVersion(maj, min, maint, verTs, revHash);
res = new MetaPageInitRootInlineFlagsCreatedVersionRecord(cacheId, pageId, rootId3, inlineSize3, flags, createdVer);
break;
case BTREE_META_PAGE_ADD_ROOT:
cacheId = in.readInt();
pageId = in.readLong();
rootId = in.readLong();
res = new MetaPageAddRootRecord(cacheId, pageId, rootId);
break;
case BTREE_META_PAGE_CUT_ROOT:
cacheId = in.readInt();
pageId = in.readLong();
res = new MetaPageCutRootRecord(cacheId, pageId);
break;
case BTREE_INIT_NEW_ROOT:
cacheId = in.readInt();
pageId = in.readLong();
rootId = in.readLong();
ioType = in.readUnsignedShort();
ioVer = in.readUnsignedShort();
long leftId = in.readLong();
long rightId = in.readLong();
BPlusIO<?> io = BPlusIO.getBPlusIO(ioType, ioVer);
byte[] rowBytes = new byte[io.getItemSize()];
in.readFully(rowBytes);
res = new NewRootInitRecord<>(cacheId, pageId, rootId, (BPlusInnerIO<?>) io, leftId, rowBytes, rightId);
break;
case BTREE_PAGE_RECYCLE:
cacheId = in.readInt();
pageId = in.readLong();
long newPageId = in.readLong();
res = new RecycleRecord(cacheId, pageId, newPageId);
break;
case BTREE_PAGE_INSERT:
cacheId = in.readInt();
pageId = in.readLong();
ioType = in.readUnsignedShort();
ioVer = in.readUnsignedShort();
int itemIdx = in.readUnsignedShort();
rightId = in.readLong();
io = BPlusIO.getBPlusIO(ioType, ioVer);
rowBytes = new byte[io.getItemSize()];
in.readFully(rowBytes);
res = new InsertRecord<>(cacheId, pageId, io, itemIdx, rowBytes, rightId);
break;
case BTREE_FIX_LEFTMOST_CHILD:
cacheId = in.readInt();
pageId = in.readLong();
rightId = in.readLong();
res = new FixLeftmostChildRecord(cacheId, pageId, rightId);
break;
case BTREE_FIX_COUNT:
cacheId = in.readInt();
pageId = in.readLong();
int cnt = in.readUnsignedShort();
res = new FixCountRecord(cacheId, pageId, cnt);
break;
case BTREE_PAGE_REPLACE:
cacheId = in.readInt();
pageId = in.readLong();
ioType = in.readUnsignedShort();
ioVer = in.readUnsignedShort();
itemIdx = in.readUnsignedShort();
io = BPlusIO.getBPlusIO(ioType, ioVer);
rowBytes = new byte[io.getItemSize()];
in.readFully(rowBytes);
res = new ReplaceRecord<>(cacheId, pageId, io, rowBytes, itemIdx);
break;
case BTREE_PAGE_REMOVE:
cacheId = in.readInt();
pageId = in.readLong();
itemIdx = in.readUnsignedShort();
cnt = in.readUnsignedShort();
res = new RemoveRecord(cacheId, pageId, itemIdx, cnt);
break;
case BTREE_PAGE_INNER_REPLACE:
cacheId = in.readInt();
pageId = in.readLong();
int dstIdx = in.readUnsignedShort();
long srcPageId = in.readLong();
int srcIdx = in.readUnsignedShort();
long rmvId = in.readLong();
res = new InnerReplaceRecord<>(cacheId, pageId, dstIdx, srcPageId, srcIdx, rmvId);
break;
case BTREE_FORWARD_PAGE_SPLIT:
cacheId = in.readInt();
pageId = in.readLong();
long fwdId = in.readLong();
ioType = in.readUnsignedShort();
ioVer = in.readUnsignedShort();
srcPageId = in.readLong();
int mid = in.readUnsignedShort();
cnt = in.readUnsignedShort();
res = new SplitForwardPageRecord(cacheId, pageId, fwdId, ioType, ioVer, srcPageId, mid, cnt);
break;
case BTREE_EXISTING_PAGE_SPLIT:
cacheId = in.readInt();
pageId = in.readLong();
mid = in.readUnsignedShort();
fwdId = in.readLong();
res = new SplitExistingPageRecord(cacheId, pageId, mid, fwdId);
break;
case BTREE_PAGE_MERGE:
cacheId = in.readInt();
pageId = in.readLong();
long prntId = in.readLong();
int prntIdx = in.readUnsignedShort();
rightId = in.readLong();
boolean emptyBranch = in.readBoolean();
res = new MergeRecord<>(cacheId, pageId, prntId, prntIdx, rightId, emptyBranch);
break;
case BTREE_FIX_REMOVE_ID:
cacheId = in.readInt();
pageId = in.readLong();
rmvId = in.readLong();
res = new FixRemoveId(cacheId, pageId, rmvId);
break;
case PAGES_LIST_SET_NEXT:
cacheId = in.readInt();
pageId = in.readLong();
long nextPageId = in.readLong();
res = new PagesListSetNextRecord(cacheId, pageId, nextPageId);
break;
case PAGES_LIST_SET_PREVIOUS:
cacheId = in.readInt();
pageId = in.readLong();
long prevPageId = in.readLong();
res = new PagesListSetPreviousRecord(cacheId, pageId, prevPageId);
break;
case PAGES_LIST_INIT_NEW_PAGE:
cacheId = in.readInt();
pageId = in.readLong();
ioType = in.readInt();
ioVer = in.readInt();
newPageId = in.readLong();
prevPageId = in.readLong();
long addDataPageId = in.readLong();
res = new PagesListInitNewPageRecord(cacheId, pageId, ioType, ioVer, newPageId, prevPageId, addDataPageId, log);
break;
case PAGES_LIST_ADD_PAGE:
cacheId = in.readInt();
pageId = in.readLong();
long dataPageId = in.readLong();
res = new PagesListAddPageRecord(cacheId, pageId, dataPageId);
break;
case PAGES_LIST_REMOVE_PAGE:
cacheId = in.readInt();
pageId = in.readLong();
long rmvdPageId = in.readLong();
res = new PagesListRemovePageRecord(cacheId, pageId, rmvdPageId);
break;
case TRACKING_PAGE_DELTA:
cacheId = in.readInt();
pageId = in.readLong();
long pageIdToMark = in.readLong();
long nextSnapshotId0 = in.readLong();
long lastSuccessfulSnapshotId0 = in.readLong();
res = new TrackingPageDeltaRecord(cacheId, pageId, pageIdToMark, nextSnapshotId0, lastSuccessfulSnapshotId0);
break;
case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
cacheId = in.readInt();
pageId = in.readLong();
long nextSnapshotId = in.readLong();
res = new MetaPageUpdateNextSnapshotId(cacheId, pageId, nextSnapshotId);
break;
case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
cacheId = in.readInt();
pageId = in.readLong();
long lastSuccessfulFullSnapshotId = in.readLong();
res = new MetaPageUpdateLastSuccessfulFullSnapshotId(cacheId, pageId, lastSuccessfulFullSnapshotId);
break;
case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
cacheId = in.readInt();
pageId = in.readLong();
long lastSuccessfulSnapshotId = in.readLong();
long lastSuccessfulSnapshotTag = in.readLong();
res = new MetaPageUpdateLastSuccessfulSnapshotId(cacheId, pageId, lastSuccessfulSnapshotId, lastSuccessfulSnapshotTag);
break;
case META_PAGE_UPDATE_LAST_ALLOCATED_INDEX:
cacheId = in.readInt();
pageId = in.readLong();
int lastAllocatedIdx = in.readInt();
res = new MetaPageUpdateLastAllocatedIndex(cacheId, pageId, lastAllocatedIdx);
break;
case PART_META_UPDATE_STATE:
cacheId = in.readInt();
partId = in.readInt();
byte state = in.readByte();
long updateCntr = in.readLong();
GridDhtPartitionState partState = GridDhtPartitionState.fromOrdinal(state);
res = new PartitionMetaStateRecord(cacheId, partId, partState, updateCntr);
break;
case PAGE_LIST_META_RESET_COUNT_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
res = new PageListMetaResetCountRecord(cacheId, pageId);
break;
case ROTATED_ID_PART_RECORD:
cacheId = in.readInt();
pageId = in.readLong();
byte rotatedIdPart = in.readByte();
res = new RotatedIdPartRecord(cacheId, pageId, rotatedIdPart);
break;
case SWITCH_SEGMENT_RECORD:
throw new EOFException("END OF SEGMENT");
case TX_RECORD:
res = txRecordSerializer.readTx(in);
break;
case MASTER_KEY_CHANGE_RECORD:
case MASTER_KEY_CHANGE_RECORD_V2:
int keyNameLen = in.readInt();
byte[] keyNameBytes = new byte[keyNameLen];
in.readFully(keyNameBytes);
String masterKeyName = new String(keyNameBytes);
int keysCnt = in.readInt();
List<T2<Integer, GroupKeyEncrypted>> grpKeys = new ArrayList<>(keysCnt);
boolean readKeyId = type == MASTER_KEY_CHANGE_RECORD_V2;
for (int i = 0; i < keysCnt; i++) {
int grpId = in.readInt();
int keyId = readKeyId ? in.readByte() & 0xff : 0;
int grpKeySize = in.readInt();
byte[] grpKey = new byte[grpKeySize];
in.readFully(grpKey);
grpKeys.add(new T2<>(grpId, new GroupKeyEncrypted(keyId, grpKey)));
}
res = new MasterKeyChangeRecordV2(masterKeyName, grpKeys);
break;
case REENCRYPTION_START_RECORD:
int grpsCnt = in.readInt();
Map<Integer, Byte> map = U.newHashMap(grpsCnt);
for (int i = 0; i < grpsCnt; i++) {
int grpId = in.readInt();
byte keyId = in.readByte();
map.put(grpId, keyId);
}
res = new ReencryptionStartRecord(map);
break;
case INDEX_ROOT_PAGE_RENAME_RECORD:
res = new IndexRenameRootPageRecord(in);
break;
case PARTITION_CLEARING_START_RECORD:
int partId0 = in.readInt();
int grpId = in.readInt();
long clearVer = in.readLong();
res = new PartitionClearingStartRecord(partId0, grpId, clearVer);
break;
default:
throw new UnsupportedOperationException("Type: " + type);
}
return res;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.
the class GridPartitionStateMap method entrySet.
/**
* {@inheritDoc}
*/
@Override
public Set<Entry<Integer, GridDhtPartitionState>> entrySet() {
return new AbstractSet<Entry<Integer, GridDhtPartitionState>>() {
@Override
public Iterator<Entry<Integer, GridDhtPartitionState>> iterator() {
return new Iterator<Entry<Integer, GridDhtPartitionState>>() {
/**
* Current {@link GridPartitionStateMap#states} index.
*/
private int idx;
/**
* Current key value.
*/
private int cur;
@Override
public boolean hasNext() {
idx = states.nextSetBit(idx);
return idx != -1;
}
@Override
public Entry<Integer, GridDhtPartitionState> next() {
if (!hasNext())
throw new NoSuchElementException();
cur = idx / BITS;
int bitN = idx % BITS;
// Get state value from BitSet like in GridPartitionStateMap#state, but don't process known zero bits.
int st = 1 << bitN;
// Accumulating values of remaining bits
for (int i = 1; i < BITS - bitN; i++) st |= (states.get(idx + i) ? 1 : 0) << i + bitN;
final int ordinal = st - 1;
idx += (BITS - bitN);
return new Entry<Integer, GridDhtPartitionState>() {
int p = cur;
@Override
public Integer getKey() {
return p;
}
@Override
public GridDhtPartitionState getValue() {
return GridDhtPartitionState.fromOrdinal(ordinal);
}
@Override
public GridDhtPartitionState setValue(GridDhtPartitionState val) {
return setState(p, val);
}
};
}
@Override
public void remove() {
setState(cur, null);
}
};
}
@Override
public int size() {
return GridPartitionStateMap.this.size();
}
};
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.
the class SnapshotPartitionsVerifyHandler method invoke.
/**
* {@inheritDoc}
*/
@Override
public Map<PartitionKeyV2, PartitionHashRecordV2> invoke(SnapshotHandlerContext opCtx) throws IgniteCheckedException {
SnapshotMetadata meta = opCtx.metadata();
Set<Integer> grps = F.isEmpty(opCtx.groups()) ? new HashSet<>(meta.partitions().keySet()) : opCtx.groups().stream().map(CU::cacheId).collect(Collectors.toSet());
Set<File> partFiles = new HashSet<>();
IgniteSnapshotManager snpMgr = cctx.snapshotMgr();
for (File dir : snpMgr.snapshotCacheDirectories(meta.snapshotName(), meta.folderName())) {
int grpId = CU.cacheId(cacheGroupName(dir));
if (!grps.remove(grpId))
continue;
Set<Integer> parts = meta.partitions().get(grpId) == null ? Collections.emptySet() : new HashSet<>(meta.partitions().get(grpId));
for (File part : cachePartitionFiles(dir)) {
int partId = partId(part.getName());
if (!parts.remove(partId))
continue;
partFiles.add(part);
}
if (!parts.isEmpty()) {
throw new IgniteException("Snapshot data doesn't contain required cache group partition " + "[grpId=" + grpId + ", snpName=" + meta.snapshotName() + ", consId=" + meta.consistentId() + ", missed=" + parts + ", meta=" + meta + ']');
}
}
if (!grps.isEmpty()) {
throw new IgniteException("Snapshot data doesn't contain required cache groups " + "[grps=" + grps + ", snpName=" + meta.snapshotName() + ", consId=" + meta.consistentId() + ", meta=" + meta + ']');
}
Map<PartitionKeyV2, PartitionHashRecordV2> res = new ConcurrentHashMap<>();
ThreadLocal<ByteBuffer> buff = ThreadLocal.withInitial(() -> ByteBuffer.allocateDirect(meta.pageSize()).order(ByteOrder.nativeOrder()));
GridKernalContext snpCtx = snpMgr.createStandaloneKernalContext(meta.snapshotName(), meta.folderName());
for (GridComponent comp : snpCtx) comp.start();
try {
U.doInParallel(snpMgr.snapshotExecutorService(), partFiles, part -> {
String grpName = cacheGroupName(part.getParentFile());
int grpId = CU.cacheId(grpName);
int partId = partId(part.getName());
FilePageStoreManager storeMgr = (FilePageStoreManager) cctx.pageStore();
try (FilePageStore pageStore = (FilePageStore) storeMgr.getPageStoreFactory(grpId, false).createPageStore(getTypeByPartId(partId), part::toPath, val -> {
})) {
if (partId == INDEX_PARTITION) {
checkPartitionsPageCrcSum(() -> pageStore, INDEX_PARTITION, FLAG_IDX);
return null;
}
if (grpId == MetaStorage.METASTORAGE_CACHE_ID) {
checkPartitionsPageCrcSum(() -> pageStore, partId, FLAG_DATA);
return null;
}
ByteBuffer pageBuff = buff.get();
pageBuff.clear();
pageStore.read(0, pageBuff, true);
long pageAddr = GridUnsafe.bufferAddress(pageBuff);
PagePartitionMetaIO io = PageIO.getPageIO(pageBuff);
GridDhtPartitionState partState = fromOrdinal(io.getPartitionState(pageAddr));
if (partState != OWNING) {
throw new IgniteCheckedException("Snapshot partitions must be in the OWNING " + "state only: " + partState);
}
long updateCntr = io.getUpdateCounter(pageAddr);
long size = io.getSize(pageAddr);
if (log.isDebugEnabled()) {
log.debug("Partition [grpId=" + grpId + ", id=" + partId + ", counter=" + updateCntr + ", size=" + size + "]");
}
// Snapshot partitions must always be in OWNING state.
// There is no `primary` partitions for snapshot.
PartitionKeyV2 key = new PartitionKeyV2(grpId, partId, grpName);
PartitionHashRecordV2 hash = calculatePartitionHash(key, updateCntr, meta.consistentId(), GridDhtPartitionState.OWNING, false, size, snpMgr.partitionRowIterator(snpCtx, grpName, partId, pageStore));
assert hash != null : "OWNING must have hash: " + key;
res.put(key, hash);
} catch (IOException e) {
throw new IgniteCheckedException(e);
}
return null;
});
} catch (Throwable t) {
log.error("Error executing handler: ", t);
throw t;
} finally {
for (GridComponent comp : snpCtx) comp.stop(true);
}
return res;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState in project ignite by apache.
the class PartitionReservationManager method reservePartitions.
/**
* @param cacheIds Cache IDs.
* @param reqTopVer Topology version from request.
* @param explicitParts Explicit partitions list.
* @param nodeId Node ID.
* @param reqId Request ID.
* @return String which is null in case of success or with causeMessage if failed
* @throws IgniteCheckedException If failed.
*/
public PartitionReservation reservePartitions(@Nullable List<Integer> cacheIds, AffinityTopologyVersion reqTopVer, final int[] explicitParts, UUID nodeId, long reqId) throws IgniteCheckedException {
try (TraceSurroundings ignored = MTC.support(ctx.tracing().create(SQL_PARTITIONS_RESERVE, MTC.span()))) {
assert reqTopVer != null;
AffinityTopologyVersion topVer = ctx.cache().context().exchange().lastAffinityChangedTopologyVersion(reqTopVer);
if (F.isEmpty(cacheIds))
return new PartitionReservation(Collections.emptyList());
Collection<Integer> partIds;
if (explicitParts == null)
partIds = null;
else if (explicitParts.length == 0)
partIds = Collections.emptyList();
else {
partIds = new ArrayList<>(explicitParts.length);
for (int explicitPart : explicitParts) partIds.add(explicitPart);
}
List<GridReservable> reserved = new ArrayList<>();
for (int i = 0; i < cacheIds.size(); i++) {
GridCacheContext<?, ?> cctx = ctx.cache().context().cacheContext(cacheIds.get(i));
// Cache was not found, probably was not deployed yet.
if (cctx == null) {
return new PartitionReservation(reserved, String.format("Failed to reserve partitions for query (cache is not " + "found on local node) [localNodeId=%s, rmtNodeId=%s, reqId=%s, affTopVer=%s, cacheId=%s]", ctx.localNodeId(), nodeId, reqId, topVer, cacheIds.get(i)));
}
if (cctx.isLocal() || !cctx.rebalanceEnabled())
continue;
// For replicated cache topology version does not make sense.
final PartitionReservationKey grpKey = new PartitionReservationKey(cctx.name(), cctx.isReplicated() ? null : topVer);
GridReservable r = reservations.get(grpKey);
if (explicitParts == null && r != null) {
// Try to reserve group partition if any and no explicits.
if (r != REPLICATED_RESERVABLE) {
if (!r.reserve())
return new PartitionReservation(reserved, String.format("Failed to reserve partitions for query (group " + "reservation failed) [localNodeId=%s, rmtNodeId=%s, reqId=%s, affTopVer=%s, cacheId=%s, " + "cacheName=%s]", ctx.localNodeId(), nodeId, reqId, topVer, cacheIds.get(i), cctx.name()));
reserved.add(r);
MTC.span().addLog(() -> "Cache partitions were reserved " + r);
}
} else {
// Try to reserve partitions one by one.
int partsCnt = cctx.affinity().partitions();
if (cctx.isReplicated()) {
// Check all the partitions are in owning state for replicated cache.
if (r == null) {
// Check only once.
for (int p = 0; p < partsCnt; p++) {
GridDhtLocalPartition part = partition(cctx, p);
// We don't need to reserve partitions because they will not be evicted in replicated caches.
GridDhtPartitionState partState = part != null ? part.state() : null;
if (partState != OWNING)
return new PartitionReservation(reserved, String.format("Failed to reserve partitions for " + "query (partition of REPLICATED cache is not in OWNING state) [" + "localNodeId=%s, rmtNodeId=%s, reqId=%s, affTopVer=%s, cacheId=%s, " + "cacheName=%s, part=%s, partFound=%s, partState=%s]", ctx.localNodeId(), nodeId, reqId, topVer, cacheIds.get(i), cctx.name(), p, (part != null), partState));
}
// Mark that we checked this replicated cache.
reservations.putIfAbsent(grpKey, REPLICATED_RESERVABLE);
MTC.span().addLog(() -> "Cache partitions were reserved [cache=" + cctx.name() + ", partitions=[0.." + partsCnt + ']');
}
} else {
// Reserve primary partitions for partitioned cache (if no explicit given).
if (explicitParts == null)
partIds = cctx.affinity().primaryPartitions(ctx.localNodeId(), topVer);
int reservedCnt = 0;
for (int partId : partIds) {
GridDhtLocalPartition part = partition(cctx, partId);
GridDhtPartitionState partState = part != null ? part.state() : null;
if (partState != OWNING) {
if (partState == LOST)
failQueryOnLostData(cctx, part);
else {
return new PartitionReservation(reserved, String.format("Failed to reserve partitions " + "for query (partition of PARTITIONED cache is not found or not in OWNING " + "state) [localNodeId=%s, rmtNodeId=%s, reqId=%s, affTopVer=%s, cacheId=%s, " + "cacheName=%s, part=%s, partFound=%s, partState=%s]", ctx.localNodeId(), nodeId, reqId, topVer, cacheIds.get(i), cctx.name(), partId, (part != null), partState));
}
}
if (!part.reserve()) {
return new PartitionReservation(reserved, String.format("Failed to reserve partitions for query " + "(partition of PARTITIONED cache cannot be reserved) [" + "localNodeId=%s, rmtNodeId=%s, reqId=%s, affTopVer=%s, cacheId=%s, " + "cacheName=%s, part=%s, partFound=%s, partState=%s]", ctx.localNodeId(), nodeId, reqId, topVer, cacheIds.get(i), cctx.name(), partId, true, partState));
}
reserved.add(part);
reservedCnt++;
// Double check that we are still in owning state and partition contents are not cleared.
partState = part.state();
if (partState != OWNING) {
if (partState == LOST)
failQueryOnLostData(cctx, part);
else {
return new PartitionReservation(reserved, String.format("Failed to reserve partitions for " + "query (partition of PARTITIONED cache is not in OWNING state after " + "reservation) [localNodeId=%s, rmtNodeId=%s, reqId=%s, affTopVer=%s, " + "cacheId=%s, cacheName=%s, part=%s, partState=%s]", ctx.localNodeId(), nodeId, reqId, topVer, cacheIds.get(i), cctx.name(), partId, partState));
}
}
}
final Collection<Integer> finalPartIds = partIds;
MTC.span().addLog(() -> "Cache partitions were reserved [cache=" + cctx.name() + ", partitions=" + finalPartIds + ", topology=" + topVer + ']');
if (explicitParts == null && reservedCnt > 0) {
// We reserved all the primary partitions for cache, attempt to add group reservation.
GridDhtPartitionsReservation grp = new GridDhtPartitionsReservation(topVer, cctx, "SQL");
if (grp.register(reserved.subList(reserved.size() - reservedCnt, reserved.size()))) {
if (reservations.putIfAbsent(grpKey, grp) != null)
throw new IllegalStateException("Reservation already exists.");
grp.onPublish(new CI1<GridDhtPartitionsReservation>() {
@Override
public void apply(GridDhtPartitionsReservation r) {
reservations.remove(grpKey, r);
}
});
}
}
}
}
}
return new PartitionReservation(reserved);
}
}
Aggregations