use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.
the class SegmentFetcherAndLoader method addOrReplaceOfflineSegment.
public void addOrReplaceOfflineSegment(String tableName, String segmentId, boolean retryOnFailure) {
OfflineSegmentZKMetadata offlineSegmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(_propertyStore, tableName, segmentId);
// Try to load table schema from Helix property store.
// This schema is used for adding default values for newly added columns.
Schema schema = null;
try {
schema = getSchema(tableName);
} catch (Exception e) {
LOGGER.error("Caught exception while trying to load schema for table: {}", tableName, e);
}
LOGGER.info("Adding or replacing segment {} for table {}, metadata {}", segmentId, tableName, offlineSegmentZKMetadata);
try {
SegmentMetadata segmentMetadataForCheck = new SegmentMetadataImpl(offlineSegmentZKMetadata);
// We lock the segment in order to get its metadata, and then release the lock, so it is possible
// that the segment is dropped after we get its metadata.
SegmentMetadata localSegmentMetadata = _dataManager.getSegmentMetadata(tableName, segmentId);
if (localSegmentMetadata == null) {
LOGGER.info("Segment {} of table {} is not loaded in memory, checking disk", segmentId, tableName);
final String localSegmentDir = getSegmentLocalDirectory(tableName, segmentId);
if (new File(localSegmentDir).exists()) {
LOGGER.info("Segment {} of table {} found on disk, attempting to load it", segmentId, tableName);
try {
localSegmentMetadata = _metadataLoader.loadIndexSegmentMetadataFromDir(localSegmentDir);
LOGGER.info("Found segment {} of table {} with crc {} on disk", segmentId, tableName, localSegmentMetadata.getCrc());
} catch (Exception e) {
// The localSegmentDir should help us get the table name,
LOGGER.error("Failed to load segment metadata from {}. Deleting it.", localSegmentDir, e);
FileUtils.deleteQuietly(new File(localSegmentDir));
localSegmentMetadata = null;
}
try {
if (!isNewSegmentMetadata(localSegmentMetadata, segmentMetadataForCheck, segmentId, tableName)) {
LOGGER.info("Segment metadata same as before, loading {} of table {} (crc {}) from disk", segmentId, tableName, localSegmentMetadata.getCrc());
AbstractTableConfig tableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
_dataManager.addSegment(localSegmentMetadata, tableConfig, schema);
// TODO Update zk metadata with CRC for this instance
return;
}
} catch (V3RemoveIndexException e) {
LOGGER.info("Unable to remove local index from V3 format segment: {}, table: {}, try to reload it from controller.", segmentId, tableName, e);
FileUtils.deleteQuietly(new File(localSegmentDir));
localSegmentMetadata = null;
} catch (Exception e) {
LOGGER.error("Failed to load {} of table {} from local, will try to reload it from controller!", segmentId, tableName, e);
FileUtils.deleteQuietly(new File(localSegmentDir));
localSegmentMetadata = null;
}
}
}
// that we have is different from that in zookeeper.
if (isNewSegmentMetadata(localSegmentMetadata, segmentMetadataForCheck, segmentId, tableName)) {
if (localSegmentMetadata == null) {
LOGGER.info("Loading new segment {} of table {} from controller", segmentId, tableName);
} else {
LOGGER.info("Trying to refresh segment {} of table {} with new data.", segmentId, tableName);
}
int retryCount;
int maxRetryCount = 1;
if (retryOnFailure) {
maxRetryCount = _segmentLoadMaxRetryCount;
}
for (retryCount = 0; retryCount < maxRetryCount; ++retryCount) {
long attemptStartTime = System.currentTimeMillis();
try {
AbstractTableConfig tableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
final String uri = offlineSegmentZKMetadata.getDownloadUrl();
final String localSegmentDir = downloadSegmentToLocal(uri, tableName, segmentId);
final SegmentMetadata segmentMetadata = _metadataLoader.loadIndexSegmentMetadataFromDir(localSegmentDir);
_dataManager.addSegment(segmentMetadata, tableConfig, schema);
LOGGER.info("Downloaded segment {} of table {} crc {} from controller", segmentId, tableName, segmentMetadata.getCrc());
// Successfully loaded the segment, break out of the retry loop
break;
} catch (Exception e) {
long attemptDurationMillis = System.currentTimeMillis() - attemptStartTime;
LOGGER.warn("Caught exception while loading segment " + segmentId + "(table " + tableName + "), attempt " + (retryCount + 1) + " of " + maxRetryCount, e);
// Do we need to wait for the next retry attempt?
if (retryCount < maxRetryCount - 1) {
// Exponentially back off, wait for (minDuration + attemptDurationMillis) *
// 1.0..(2^retryCount)+1.0
double maxRetryDurationMultiplier = Math.pow(2.0, (retryCount + 1));
double retryDurationMultiplier = Math.random() * maxRetryDurationMultiplier + 1.0;
long waitTime = (long) ((_segmentLoadMinRetryDelayMs + attemptDurationMillis) * retryDurationMultiplier);
LOGGER.warn("Waiting for " + TimeUnit.MILLISECONDS.toSeconds(waitTime) + " seconds to retry(" + segmentId + " of table " + tableName);
long waitEndTime = System.currentTimeMillis() + waitTime;
while (System.currentTimeMillis() < waitEndTime) {
try {
Thread.sleep(Math.max(System.currentTimeMillis() - waitEndTime, 1L));
} catch (InterruptedException ie) {
// Ignore spurious wakeup
}
}
}
}
}
if (_segmentLoadMaxRetryCount <= retryCount) {
String msg = "Failed to download segment " + segmentId + " (table " + tableName + " after " + retryCount + " retries";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
} else {
LOGGER.info("Got already loaded segment {} of table {} crc {} again, will do nothing.", segmentId, tableName, localSegmentMetadata.getCrc());
}
} catch (final Exception e) {
LOGGER.error("Cannot load segment : " + segmentId + " for table " + tableName, e);
Utils.rethrowException(e);
throw new AssertionError("Should not reach this");
}
}
use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.
the class StarTreeQueryGenerator method main.
/**
* Given star tree segments directory and number of queries, generate star tree queries.
* Usage: StarTreeQueryGenerator starTreeSegmentsDirectory numQueries
*
* @param args arguments.
*/
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: StarTreeQueryGenerator starTreeSegmentsDirectory numQueries");
return;
}
// Get segment metadata for the first segment to get table name and verify query is fit for star tree.
File segmentsDir = new File(args[0]);
Preconditions.checkState(segmentsDir.exists());
Preconditions.checkState(segmentsDir.isDirectory());
File[] segments = segmentsDir.listFiles();
Preconditions.checkNotNull(segments);
File segment = segments[0];
IndexSegment indexSegment = Loaders.IndexSegment.load(segment, ReadMode.heap);
SegmentMetadata segmentMetadata = indexSegment.getSegmentMetadata();
String tableName = segmentMetadata.getTableName();
// Set up star tree query generator.
int numQueries = Integer.parseInt(args[1]);
SegmentInfoProvider infoProvider = new SegmentInfoProvider(args[0]);
StarTreeQueryGenerator generator = new StarTreeQueryGenerator(tableName, infoProvider.getSingleValueDimensionColumns(), infoProvider.getMetricColumns(), infoProvider.getSingleValueDimensionValuesMap());
Pql2Compiler compiler = new Pql2Compiler();
for (int i = 0; i < numQueries; i++) {
String query = generator.nextQuery();
System.out.println(query);
// Verify that query is fit for star tree.
BrokerRequest brokerRequest = compiler.compileToBrokerRequest(query);
Preconditions.checkState(RequestUtils.isFitForStarTreeIndex(segmentMetadata, RequestUtils.generateFilterQueryTree(brokerRequest), brokerRequest));
}
}
use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.
the class PinotHelixResourceManager method ifRefreshAnExistedSegment.
private boolean ifRefreshAnExistedSegment(SegmentMetadata segmentMetadata, String segmentName, String tableName) {
OfflineSegmentZKMetadata offlineSegmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(_propertyStore, segmentMetadata.getTableName(), segmentMetadata.getName());
if (offlineSegmentZKMetadata == null) {
LOGGER.info("Rejecting because Zk metadata is null for segment {} of table {}", segmentName, tableName);
return false;
}
final SegmentMetadata existedSegmentMetadata = new SegmentMetadataImpl(offlineSegmentZKMetadata);
if (segmentMetadata.getIndexCreationTime() <= existedSegmentMetadata.getIndexCreationTime()) {
LOGGER.info("Rejecting because of older or same creation time {} (we have {}) for segment {} of table {}", segmentMetadata.getIndexCreationTime(), existedSegmentMetadata.getIndexCreationTime(), segmentName, tableName);
return false;
}
if (segmentMetadata.getCrc().equals(existedSegmentMetadata.getCrc())) {
LOGGER.info("Rejecting because of matching CRC exists (incoming={}, existing={}) for {} of table {}", segmentMetadata.getCrc(), existedSegmentMetadata.getCrc(), segmentName, tableName);
return false;
}
return true;
}
use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.
the class ValidationManager method runValidation.
/**
* Runs a validation pass over the currently loaded tables.
*/
public void runValidation() {
if (!_pinotHelixResourceManager.isLeader()) {
LOGGER.info("Skipping validation, not leader!");
return;
}
LOGGER.info("Starting validation");
// Fetch the list of tables
List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
for (String tableName : allTableNames) {
List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
AbstractTableConfig tableConfig = null;
_pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
// For each table, fetch the metadata for all its segments
if (tableType.equals(TableType.OFFLINE)) {
validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
} else if (tableType.equals(TableType.REALTIME)) {
LOGGER.info("Starting to validate table {}", tableName);
List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
// false if this table has ONLY LLC segments (i.e. fully migrated)
boolean countHLCSegments = true;
KafkaStreamMetadata streamMetadata = null;
try {
tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
countHLCSegments = false;
}
for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
segmentMetadataList.add(segmentMetadata);
}
// Update the gauge to contain the total document count in the segments
_validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList, countHLCSegments));
if (streamMetadata.hasSimpleKafkaConsumerType()) {
validateLLCSegments(tableName, tableConfig);
}
} catch (Exception e) {
if (tableConfig == null) {
LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
} else if (streamMetadata == null) {
LOGGER.warn("Cannot get streamconfig for {}", tableName);
} else {
LOGGER.error("Exception while validating table {}", tableName, e);
}
}
} else {
LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
}
}
LOGGER.info("Validation completed");
}
use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.
the class ValidationManager method validateOfflineSegmentPush.
// For offline segment pushes, validate that there are no missing segments, and update metrics
private void validateOfflineSegmentPush(ZkHelixPropertyStore<ZNRecord> propertyStore, String tableName, List<SegmentMetadata> segmentMetadataList) {
List<OfflineSegmentZKMetadata> offlineSegmentZKMetadatas = ZKMetadataProvider.getOfflineSegmentZKMetadataListForTable(propertyStore, tableName);
for (OfflineSegmentZKMetadata offlineSegmentZKMetadata : offlineSegmentZKMetadatas) {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(offlineSegmentZKMetadata);
segmentMetadataList.add(segmentMetadata);
}
// Calculate missing segments only for offline tables
int missingSegmentCount = 0;
// Compute the missing segments if there are at least two
if (2 < segmentMetadataList.size()) {
List<Interval> segmentIntervals = new ArrayList<Interval>();
for (SegmentMetadata segmentMetadata : segmentMetadataList) {
Interval timeInterval = segmentMetadata.getTimeInterval();
if (timeInterval != null && TimeUtils.timeValueInValidRange(timeInterval.getStartMillis()) && TimeUtils.timeValueInValidRange(timeInterval.getEndMillis())) {
segmentIntervals.add(timeInterval);
}
}
List<Interval> missingIntervals = computeMissingIntervals(segmentIntervals, segmentMetadataList.get(0).getTimeGranularity());
missingSegmentCount = missingIntervals.size();
for (Interval missingInterval : missingIntervals) {
LOGGER.warn("Missing data in table {} for time interval {}", tableName, missingInterval);
}
}
// Update the gauge that contains the number of missing segments
_validationMetrics.updateMissingSegmentsGauge(tableName, missingSegmentCount);
// Compute the max segment end time and max segment push time
long maxSegmentEndTime = Long.MIN_VALUE;
long maxSegmentPushTime = Long.MIN_VALUE;
for (SegmentMetadata segmentMetadata : segmentMetadataList) {
Interval segmentInterval = segmentMetadata.getTimeInterval();
if (segmentInterval != null && maxSegmentEndTime < segmentInterval.getEndMillis()) {
maxSegmentEndTime = segmentInterval.getEndMillis();
}
long segmentPushTime = segmentMetadata.getPushTime();
long segmentRefreshTime = segmentMetadata.getRefreshTime();
long segmentUpdateTime = Math.max(segmentPushTime, segmentRefreshTime);
if (maxSegmentPushTime < segmentUpdateTime) {
maxSegmentPushTime = segmentUpdateTime;
}
}
// Update the gauges that contain the delay between the current time and last segment end time
_validationMetrics.updateOfflineSegmentDelayGauge(tableName, maxSegmentEndTime);
_validationMetrics.updateLastPushTimeGauge(tableName, maxSegmentPushTime);
// Update the gauge to contain the total document count in the segments
_validationMetrics.updateTotalDocumentsGauge(tableName, computeOfflineTotalDocumentInSegments(segmentMetadataList));
// Update the gauge to contain the total number of segments for this table
_validationMetrics.updateSegmentCountGauge(tableName, segmentMetadataList.size());
}
Aggregations