use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.
the class PinotLLCRealtimeSegmentManager method commitSegment.
/**
* This method is invoked after the realtime segment is uploaded but before a response is sent to the server.
* It updates the propertystore segment metadata from IN_PROGRESS to DONE, and also creates new propertystore
* records for new segments, and puts them in idealstate in CONSUMING state.
*
* @param rawTableName Raw table name
* @param committingSegmentNameStr Committing segment name
* @param nextOffset The offset with which the next segment should start.
* @return
*/
public boolean commitSegment(String rawTableName, final String committingSegmentNameStr, long nextOffset) {
final long now = System.currentTimeMillis();
final String realtimeTableName = TableNameBuilder.REALTIME_TABLE_NAME_BUILDER.forTable(rawTableName);
final LLCRealtimeSegmentZKMetadata oldSegMetadata = getRealtimeSegmentZKMetadata(realtimeTableName, committingSegmentNameStr);
final LLCSegmentName oldSegmentName = new LLCSegmentName(committingSegmentNameStr);
final int partitionId = oldSegmentName.getPartitionId();
final int oldSeqNum = oldSegmentName.getSequenceNumber();
oldSegMetadata.setEndOffset(nextOffset);
oldSegMetadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
oldSegMetadata.setDownloadUrl(ControllerConf.constructDownloadUrl(rawTableName, committingSegmentNameStr, _controllerConf.generateVipUrl()));
// Pull segment metadata from incoming segment and set it in zk segment metadata
SegmentMetadataImpl segmentMetadata = extractSegmentMetadata(rawTableName, committingSegmentNameStr);
oldSegMetadata.setCrc(Long.valueOf(segmentMetadata.getCrc()));
oldSegMetadata.setStartTime(segmentMetadata.getTimeInterval().getStartMillis());
oldSegMetadata.setEndTime(segmentMetadata.getTimeInterval().getEndMillis());
oldSegMetadata.setTimeUnit(TimeUnit.MILLISECONDS);
oldSegMetadata.setIndexVersion(segmentMetadata.getVersion());
oldSegMetadata.setTotalRawDocs(segmentMetadata.getTotalRawDocs());
final ZNRecord oldZnRecord = oldSegMetadata.toZNRecord();
final String oldZnodePath = ZKMetadataProvider.constructPropertyStorePathForSegment(realtimeTableName, committingSegmentNameStr);
final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
// creating a new segment
if (partitionAssignment == null) {
LOGGER.warn("Kafka partition assignment not found for {}", realtimeTableName);
throw new RuntimeException("Kafka partition assigment not found. Not committing segment");
}
List<String> newInstances = partitionAssignment.getListField(Integer.toString(partitionId));
// Construct segment metadata and idealstate for the new segment
final int newSeqNum = oldSeqNum + 1;
final long newStartOffset = nextOffset;
LLCSegmentName newHolder = new LLCSegmentName(oldSegmentName.getTableName(), partitionId, newSeqNum, now);
final String newSegmentNameStr = newHolder.getSegmentName();
ZNRecord newZnRecord = makeZnRecordForNewSegment(rawTableName, newInstances.size(), newStartOffset, newSegmentNameStr);
final LLCRealtimeSegmentZKMetadata newSegmentZKMetadata = new LLCRealtimeSegmentZKMetadata(newZnRecord);
updateFlushThresholdForSegmentMetadata(newSegmentZKMetadata, partitionAssignment, getRealtimeTableFlushSizeForTable(rawTableName));
newZnRecord = newSegmentZKMetadata.toZNRecord();
final String newZnodePath = ZKMetadataProvider.constructPropertyStorePathForSegment(realtimeTableName, newSegmentNameStr);
List<String> paths = new ArrayList<>(2);
paths.add(oldZnodePath);
paths.add(newZnodePath);
List<ZNRecord> records = new ArrayList<>(2);
records.add(oldZnRecord);
records.add(newZnRecord);
/*
* Update zookeeper in two steps.
*
* Step 1: Update PROPERTYSTORE to change the segment metadata for old segment and add a new one for new segment
* Step 2: Update IDEALSTATES to include the new segment in the idealstate for the table in CONSUMING state, and change
* the old segment to ONLINE state.
*
* The controller may fail between these two steps, so when a new controller takes over as leader, it needs to
* check whether there are any recent segments in PROPERTYSTORE that are not accounted for in idealState. If so,
* it should create the new segments in idealState.
*
* If the controller fails after step-2, we are fine because the idealState has the new segments.
* If the controller fails before step-1, the server will see this as an upload failure, and will re-try.
*/
writeSegmentsToPropertyStore(paths, records, realtimeTableName);
// TODO Introduce a controller failure here for integration testing
// When multiple segments of the same table complete around the same time it is possible that
// the idealstate udpate fails due to contention. We serialize the updates to the idealstate
// to reduce this contention. We may still contend with RetentionManager, or other updates
// to idealstate from other controllers, but then we have the retry mechanism to get around that.
// hash code can be negative, so make sure we are getting a positive lock index
int lockIndex = (realtimeTableName.hashCode() & Integer.MAX_VALUE) % NUM_LOCKS;
Lock lock = _idealstateUpdateLocks[lockIndex];
try {
lock.lock();
updateIdealState(realtimeTableName, newInstances, committingSegmentNameStr, newSegmentNameStr);
LOGGER.info("Changed {} to ONLINE and created {} in CONSUMING", committingSegmentNameStr, newSegmentNameStr);
} finally {
lock.unlock();
}
return true;
}
use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.
the class RealtimeTableDataManager method addSegment.
/*
* This call comes in one of two ways:
* For HL Segments:
* - We are being directed by helix to own up all the segments that we committed and are still in retention. In this case
* we treat it exactly like how OfflineTableDataManager would -- wrap it into an OfflineSegmentDataManager, and put it
* in the map.
* - We are being asked to own up a new realtime segment. In this case, we wrap the segment with a RealTimeSegmentDataManager
* (that kicks off Kafka consumption). When the segment is committed we get notified via the notifySegmentCommitted call, at
* which time we replace the segment with the OfflineSegmentDataManager
* For LL Segments:
* - We are being asked to start consuming from a kafka partition.
* - We did not know about the segment and are being asked to download and own the segment (re-balancing, or
* replacing a realtime server with a fresh one, maybe). We need to look at segment metadata and decide whether
* to start consuming or download the segment.
*/
@Override
public void addSegment(ZkHelixPropertyStore<ZNRecord> propertyStore, AbstractTableConfig tableConfig, InstanceZKMetadata instanceZKMetadata, SegmentZKMetadata inputSegmentZKMetadata) throws Exception {
// TODO FIXME
// Hack. We get the _helixPropertyStore here and save it, knowing that we will get this addSegment call
// before the notifyCommitted call (that uses _helixPropertyStore)
this._helixPropertyStore = propertyStore;
final String segmentId = inputSegmentZKMetadata.getSegmentName();
final String tableName = inputSegmentZKMetadata.getTableName();
if (!(inputSegmentZKMetadata instanceof RealtimeSegmentZKMetadata)) {
LOGGER.warn("Got called with an unexpected instance object:{},table {}, segment {}", inputSegmentZKMetadata.getClass().getSimpleName(), tableName, segmentId);
return;
}
RealtimeSegmentZKMetadata segmentZKMetadata = (RealtimeSegmentZKMetadata) inputSegmentZKMetadata;
LOGGER.info("Attempting to add realtime segment {} for table {}", segmentId, tableName);
if (new File(_indexDir, segmentId).exists() && (segmentZKMetadata).getStatus() == Status.DONE) {
// segment already exists on file, and we have committed the realtime segment in ZK. Treat it like an offline segment
if (_segmentsMap.containsKey(segmentId)) {
LOGGER.warn("Got reload for segment already on disk {} table {}, have {}", segmentId, tableName, _segmentsMap.get(segmentId).getClass().getSimpleName());
return;
}
IndexSegment segment = ColumnarSegmentLoader.load(new File(_indexDir, segmentId), _readMode, _indexLoadingConfigMetadata);
addSegment(segment);
markSegmentAsLoaded(segmentId);
} else {
// on-disk segment next time
if (_segmentsMap.containsKey(segmentId)) {
LOGGER.warn("Got reload for segment not on disk {} table {}, have {}", segmentId, tableName, _segmentsMap.get(segmentId).getClass().getSimpleName());
return;
}
PinotHelixPropertyStoreZnRecordProvider propertyStoreHelper = PinotHelixPropertyStoreZnRecordProvider.forSchema(propertyStore);
ZNRecord record = propertyStoreHelper.get(tableConfig.getValidationConfig().getSchemaName());
LOGGER.info("Found schema {} ", tableConfig.getValidationConfig().getSchemaName());
Schema schema = SchemaUtils.fromZNRecord(record);
if (!isValid(schema, tableConfig.getIndexingConfig())) {
LOGGER.error("Not adding segment {}", segmentId);
throw new RuntimeException("Mismatching schema/table config for " + _tableName);
}
SegmentDataManager manager;
if (SegmentName.isHighLevelConsumerSegmentName(segmentId)) {
manager = new HLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, this, _indexDir.getAbsolutePath(), _readMode, SchemaUtils.fromZNRecord(record), _serverMetrics);
} else {
LLCRealtimeSegmentZKMetadata llcSegmentMetadata = (LLCRealtimeSegmentZKMetadata) segmentZKMetadata;
if (segmentZKMetadata.getStatus().equals(Status.DONE)) {
// TODO Remove code duplication here and in LLRealtimeSegmentDataManager
downloadAndReplaceSegment(segmentId, llcSegmentMetadata);
return;
}
manager = new LLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, this, _indexDir.getAbsolutePath(), SchemaUtils.fromZNRecord(record), _serverMetrics);
}
LOGGER.info("Initialize RealtimeSegmentDataManager - " + segmentId);
try {
_rwLock.writeLock().lock();
_segmentsMap.put(segmentId, manager);
} finally {
_rwLock.writeLock().unlock();
}
_loadingSegments.add(segmentId);
}
}
use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.
the class LLRealtimeSegmentDataManagerTest method createFakeSegmentManager.
private FakeLLRealtimeSegmentDataManager createFakeSegmentManager() throws Exception {
LLCRealtimeSegmentZKMetadata segmentZKMetadata = createZkMetadata();
AbstractTableConfig tableConfig = createTableConfig();
InstanceZKMetadata instanceZKMetadata = new InstanceZKMetadata();
RealtimeTableDataManager tableDataManager = createTableDataManager();
String resourceDir = _segmentDir;
Schema schema = Schema.fromString(makeSchema());
ServerMetrics serverMetrics = new ServerMetrics(new MetricsRegistry());
FakeLLRealtimeSegmentDataManager segmentDataManager = new FakeLLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, tableDataManager, resourceDir, schema, serverMetrics);
return segmentDataManager;
}
use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.
the class LLRealtimeSegmentDataManagerTest method testOnlineTransitionAfterStop.
// Tests to go online from consuming state
// If the state is is COMMITTED or RETAINED, nothing to do
// If discarded or error state, then downloadAndReplace the segment
@Test
public void testOnlineTransitionAfterStop() throws Exception {
LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata();
final long finalOffset = _startOffset + 600;
metadata.setEndOffset(finalOffset);
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.COMMITTED);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertFalse(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.RETAINED);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertFalse(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.DISCARDED);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertTrue(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.ERROR);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertTrue(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
// If holding, but we have overshot the expected final offset, the download and replace
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.HOLDING);
segmentDataManager.setCurrentOffset(finalOffset + 1);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertTrue(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
// If catching up, but we have overshot the expected final offset, the download and replace
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.CATCHING_UP);
segmentDataManager.setCurrentOffset(finalOffset + 1);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertTrue(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
// If catching up, but we did not get to the final offset, then download and replace
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.CATCHING_UP);
segmentDataManager._consumeOffsets.add(finalOffset - 1);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertTrue(segmentDataManager._downloadAndReplaceCalled);
Assert.assertFalse(segmentDataManager._buildAndReplaceCalled);
}
// But then if we get to the exact offset, we get to build and replace, not download
{
FakeLLRealtimeSegmentDataManager segmentDataManager = createFakeSegmentManager();
segmentDataManager._stopWaitTimeMs = 0;
segmentDataManager._state.set(segmentDataManager, LLRealtimeSegmentDataManager.State.CATCHING_UP);
segmentDataManager._consumeOffsets.add(finalOffset);
segmentDataManager.goOnlineFromConsuming(metadata);
Assert.assertFalse(segmentDataManager._downloadAndReplaceCalled);
Assert.assertTrue(segmentDataManager._buildAndReplaceCalled);
}
}
use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.
the class LLRealtimeSegmentDataManagerTest method createZkMetadata.
private LLCRealtimeSegmentZKMetadata createZkMetadata() {
LLCRealtimeSegmentZKMetadata segmentZKMetadata = new LLCRealtimeSegmentZKMetadata();
segmentZKMetadata.setTableName(_tableName);
segmentZKMetadata.setSegmentName(_segmentNameStr);
segmentZKMetadata.setStartOffset(_startOffset);
return segmentZKMetadata;
}
Aggregations