Search in sources :

Example 1 with SegmentMetadata

use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.

the class SegmentFetcherAndLoader method addOrReplaceOfflineSegment.

public void addOrReplaceOfflineSegment(String tableName, String segmentId, boolean retryOnFailure) {
    OfflineSegmentZKMetadata offlineSegmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(_propertyStore, tableName, segmentId);
    // Try to load table schema from Helix property store.
    // This schema is used for adding default values for newly added columns.
    Schema schema = null;
    try {
        schema = getSchema(tableName);
    } catch (Exception e) {
        LOGGER.error("Caught exception while trying to load schema for table: {}", tableName, e);
    }
    LOGGER.info("Adding or replacing segment {} for table {}, metadata {}", segmentId, tableName, offlineSegmentZKMetadata);
    try {
        SegmentMetadata segmentMetadataForCheck = new SegmentMetadataImpl(offlineSegmentZKMetadata);
        // We lock the segment in order to get its metadata, and then release the lock, so it is possible
        // that the segment is dropped after we get its metadata.
        SegmentMetadata localSegmentMetadata = _dataManager.getSegmentMetadata(tableName, segmentId);
        if (localSegmentMetadata == null) {
            LOGGER.info("Segment {} of table {} is not loaded in memory, checking disk", segmentId, tableName);
            final String localSegmentDir = getSegmentLocalDirectory(tableName, segmentId);
            if (new File(localSegmentDir).exists()) {
                LOGGER.info("Segment {} of table {} found on disk, attempting to load it", segmentId, tableName);
                try {
                    localSegmentMetadata = _metadataLoader.loadIndexSegmentMetadataFromDir(localSegmentDir);
                    LOGGER.info("Found segment {} of table {} with crc {} on disk", segmentId, tableName, localSegmentMetadata.getCrc());
                } catch (Exception e) {
                    // The localSegmentDir should help us get the table name,
                    LOGGER.error("Failed to load segment metadata from {}. Deleting it.", localSegmentDir, e);
                    FileUtils.deleteQuietly(new File(localSegmentDir));
                    localSegmentMetadata = null;
                }
                try {
                    if (!isNewSegmentMetadata(localSegmentMetadata, segmentMetadataForCheck, segmentId, tableName)) {
                        LOGGER.info("Segment metadata same as before, loading {} of table {} (crc {}) from disk", segmentId, tableName, localSegmentMetadata.getCrc());
                        AbstractTableConfig tableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
                        _dataManager.addSegment(localSegmentMetadata, tableConfig, schema);
                        // TODO Update zk metadata with CRC for this instance
                        return;
                    }
                } catch (V3RemoveIndexException e) {
                    LOGGER.info("Unable to remove local index from V3 format segment: {}, table: {}, try to reload it from controller.", segmentId, tableName, e);
                    FileUtils.deleteQuietly(new File(localSegmentDir));
                    localSegmentMetadata = null;
                } catch (Exception e) {
                    LOGGER.error("Failed to load {} of table {} from local, will try to reload it from controller!", segmentId, tableName, e);
                    FileUtils.deleteQuietly(new File(localSegmentDir));
                    localSegmentMetadata = null;
                }
            }
        }
        // that we have is different from that in zookeeper.
        if (isNewSegmentMetadata(localSegmentMetadata, segmentMetadataForCheck, segmentId, tableName)) {
            if (localSegmentMetadata == null) {
                LOGGER.info("Loading new segment {} of table {} from controller", segmentId, tableName);
            } else {
                LOGGER.info("Trying to refresh segment {} of table {} with new data.", segmentId, tableName);
            }
            int retryCount;
            int maxRetryCount = 1;
            if (retryOnFailure) {
                maxRetryCount = _segmentLoadMaxRetryCount;
            }
            for (retryCount = 0; retryCount < maxRetryCount; ++retryCount) {
                long attemptStartTime = System.currentTimeMillis();
                try {
                    AbstractTableConfig tableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
                    final String uri = offlineSegmentZKMetadata.getDownloadUrl();
                    final String localSegmentDir = downloadSegmentToLocal(uri, tableName, segmentId);
                    final SegmentMetadata segmentMetadata = _metadataLoader.loadIndexSegmentMetadataFromDir(localSegmentDir);
                    _dataManager.addSegment(segmentMetadata, tableConfig, schema);
                    LOGGER.info("Downloaded segment {} of table {} crc {} from controller", segmentId, tableName, segmentMetadata.getCrc());
                    // Successfully loaded the segment, break out of the retry loop
                    break;
                } catch (Exception e) {
                    long attemptDurationMillis = System.currentTimeMillis() - attemptStartTime;
                    LOGGER.warn("Caught exception while loading segment " + segmentId + "(table " + tableName + "), attempt " + (retryCount + 1) + " of " + maxRetryCount, e);
                    // Do we need to wait for the next retry attempt?
                    if (retryCount < maxRetryCount - 1) {
                        // Exponentially back off, wait for (minDuration + attemptDurationMillis) *
                        // 1.0..(2^retryCount)+1.0
                        double maxRetryDurationMultiplier = Math.pow(2.0, (retryCount + 1));
                        double retryDurationMultiplier = Math.random() * maxRetryDurationMultiplier + 1.0;
                        long waitTime = (long) ((_segmentLoadMinRetryDelayMs + attemptDurationMillis) * retryDurationMultiplier);
                        LOGGER.warn("Waiting for " + TimeUnit.MILLISECONDS.toSeconds(waitTime) + " seconds to retry(" + segmentId + " of table " + tableName);
                        long waitEndTime = System.currentTimeMillis() + waitTime;
                        while (System.currentTimeMillis() < waitEndTime) {
                            try {
                                Thread.sleep(Math.max(System.currentTimeMillis() - waitEndTime, 1L));
                            } catch (InterruptedException ie) {
                            // Ignore spurious wakeup
                            }
                        }
                    }
                }
            }
            if (_segmentLoadMaxRetryCount <= retryCount) {
                String msg = "Failed to download segment " + segmentId + " (table " + tableName + " after " + retryCount + " retries";
                LOGGER.error(msg);
                throw new RuntimeException(msg);
            }
        } else {
            LOGGER.info("Got already loaded segment {} of table {} crc {} again, will do nothing.", segmentId, tableName, localSegmentMetadata.getCrc());
        }
    } catch (final Exception e) {
        LOGGER.error("Cannot load segment : " + segmentId + " for table " + tableName, e);
        Utils.rethrowException(e);
        throw new AssertionError("Should not reach this");
    }
}
Also used : V3RemoveIndexException(com.linkedin.pinot.core.segment.index.loader.V3RemoveIndexException) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) Schema(com.linkedin.pinot.common.data.Schema) IOException(java.io.IOException) V3RemoveIndexException(com.linkedin.pinot.core.segment.index.loader.V3RemoveIndexException) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) File(java.io.File)

Example 2 with SegmentMetadata

use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.

the class StarTreeQueryGenerator method main.

/**
   * Given star tree segments directory and number of queries, generate star tree queries.
   * Usage: StarTreeQueryGenerator starTreeSegmentsDirectory numQueries
   *
   * @param args arguments.
   */
public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: StarTreeQueryGenerator starTreeSegmentsDirectory numQueries");
        return;
    }
    // Get segment metadata for the first segment to get table name and verify query is fit for star tree.
    File segmentsDir = new File(args[0]);
    Preconditions.checkState(segmentsDir.exists());
    Preconditions.checkState(segmentsDir.isDirectory());
    File[] segments = segmentsDir.listFiles();
    Preconditions.checkNotNull(segments);
    File segment = segments[0];
    IndexSegment indexSegment = Loaders.IndexSegment.load(segment, ReadMode.heap);
    SegmentMetadata segmentMetadata = indexSegment.getSegmentMetadata();
    String tableName = segmentMetadata.getTableName();
    // Set up star tree query generator.
    int numQueries = Integer.parseInt(args[1]);
    SegmentInfoProvider infoProvider = new SegmentInfoProvider(args[0]);
    StarTreeQueryGenerator generator = new StarTreeQueryGenerator(tableName, infoProvider.getSingleValueDimensionColumns(), infoProvider.getMetricColumns(), infoProvider.getSingleValueDimensionValuesMap());
    Pql2Compiler compiler = new Pql2Compiler();
    for (int i = 0; i < numQueries; i++) {
        String query = generator.nextQuery();
        System.out.println(query);
        // Verify that query is fit for star tree.
        BrokerRequest brokerRequest = compiler.compileToBrokerRequest(query);
        Preconditions.checkState(RequestUtils.isFitForStarTreeIndex(segmentMetadata, RequestUtils.generateFilterQueryTree(brokerRequest), brokerRequest));
    }
}
Also used : SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) Pql2Compiler(com.linkedin.pinot.pql.parsers.Pql2Compiler) BrokerRequest(com.linkedin.pinot.common.request.BrokerRequest) File(java.io.File)

Example 3 with SegmentMetadata

use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.

the class PinotHelixResourceManager method ifRefreshAnExistedSegment.

private boolean ifRefreshAnExistedSegment(SegmentMetadata segmentMetadata, String segmentName, String tableName) {
    OfflineSegmentZKMetadata offlineSegmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(_propertyStore, segmentMetadata.getTableName(), segmentMetadata.getName());
    if (offlineSegmentZKMetadata == null) {
        LOGGER.info("Rejecting because Zk metadata is null for segment {} of table {}", segmentName, tableName);
        return false;
    }
    final SegmentMetadata existedSegmentMetadata = new SegmentMetadataImpl(offlineSegmentZKMetadata);
    if (segmentMetadata.getIndexCreationTime() <= existedSegmentMetadata.getIndexCreationTime()) {
        LOGGER.info("Rejecting because of older or same creation time {} (we have {}) for segment {} of table {}", segmentMetadata.getIndexCreationTime(), existedSegmentMetadata.getIndexCreationTime(), segmentName, tableName);
        return false;
    }
    if (segmentMetadata.getCrc().equals(existedSegmentMetadata.getCrc())) {
        LOGGER.info("Rejecting because of matching CRC exists (incoming={}, existing={}) for {} of table {}", segmentMetadata.getCrc(), existedSegmentMetadata.getCrc(), segmentName, tableName);
        return false;
    }
    return true;
}
Also used : SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)

Example 4 with SegmentMetadata

use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.

the class ValidationManager method runValidation.

/**
   * Runs a validation pass over the currently loaded tables.
   */
public void runValidation() {
    if (!_pinotHelixResourceManager.isLeader()) {
        LOGGER.info("Skipping validation, not leader!");
        return;
    }
    LOGGER.info("Starting validation");
    // Fetch the list of tables
    List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
    ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
    for (String tableName : allTableNames) {
        List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
        TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
        AbstractTableConfig tableConfig = null;
        _pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
        // For each table, fetch the metadata for all its segments
        if (tableType.equals(TableType.OFFLINE)) {
            validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
        } else if (tableType.equals(TableType.REALTIME)) {
            LOGGER.info("Starting to validate table {}", tableName);
            List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
            // false if this table has ONLY LLC segments (i.e. fully migrated)
            boolean countHLCSegments = true;
            KafkaStreamMetadata streamMetadata = null;
            try {
                tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
                streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
                if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
                    countHLCSegments = false;
                }
                for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
                    SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
                    segmentMetadataList.add(segmentMetadata);
                }
                // Update the gauge to contain the total document count in the segments
                _validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList, countHLCSegments));
                if (streamMetadata.hasSimpleKafkaConsumerType()) {
                    validateLLCSegments(tableName, tableConfig);
                }
            } catch (Exception e) {
                if (tableConfig == null) {
                    LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
                } else if (streamMetadata == null) {
                    LOGGER.warn("Cannot get streamconfig for {}", tableName);
                } else {
                    LOGGER.error("Exception while validating table {}", tableName, e);
                }
            }
        } else {
            LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
        }
    }
    LOGGER.info("Validation completed");
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) TableType(com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType) ArrayList(java.util.ArrayList) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) ArrayList(java.util.ArrayList) List(java.util.List) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) ZNRecord(org.apache.helix.ZNRecord)

Example 5 with SegmentMetadata

use of com.linkedin.pinot.common.segment.SegmentMetadata in project pinot by linkedin.

the class ValidationManager method validateOfflineSegmentPush.

// For offline segment pushes, validate that there are no missing segments, and update metrics
private void validateOfflineSegmentPush(ZkHelixPropertyStore<ZNRecord> propertyStore, String tableName, List<SegmentMetadata> segmentMetadataList) {
    List<OfflineSegmentZKMetadata> offlineSegmentZKMetadatas = ZKMetadataProvider.getOfflineSegmentZKMetadataListForTable(propertyStore, tableName);
    for (OfflineSegmentZKMetadata offlineSegmentZKMetadata : offlineSegmentZKMetadatas) {
        SegmentMetadata segmentMetadata = new SegmentMetadataImpl(offlineSegmentZKMetadata);
        segmentMetadataList.add(segmentMetadata);
    }
    // Calculate missing segments only for offline tables
    int missingSegmentCount = 0;
    // Compute the missing segments if there are at least two
    if (2 < segmentMetadataList.size()) {
        List<Interval> segmentIntervals = new ArrayList<Interval>();
        for (SegmentMetadata segmentMetadata : segmentMetadataList) {
            Interval timeInterval = segmentMetadata.getTimeInterval();
            if (timeInterval != null && TimeUtils.timeValueInValidRange(timeInterval.getStartMillis()) && TimeUtils.timeValueInValidRange(timeInterval.getEndMillis())) {
                segmentIntervals.add(timeInterval);
            }
        }
        List<Interval> missingIntervals = computeMissingIntervals(segmentIntervals, segmentMetadataList.get(0).getTimeGranularity());
        missingSegmentCount = missingIntervals.size();
        for (Interval missingInterval : missingIntervals) {
            LOGGER.warn("Missing data in table {} for time interval {}", tableName, missingInterval);
        }
    }
    // Update the gauge that contains the number of missing segments
    _validationMetrics.updateMissingSegmentsGauge(tableName, missingSegmentCount);
    // Compute the max segment end time and max segment push time
    long maxSegmentEndTime = Long.MIN_VALUE;
    long maxSegmentPushTime = Long.MIN_VALUE;
    for (SegmentMetadata segmentMetadata : segmentMetadataList) {
        Interval segmentInterval = segmentMetadata.getTimeInterval();
        if (segmentInterval != null && maxSegmentEndTime < segmentInterval.getEndMillis()) {
            maxSegmentEndTime = segmentInterval.getEndMillis();
        }
        long segmentPushTime = segmentMetadata.getPushTime();
        long segmentRefreshTime = segmentMetadata.getRefreshTime();
        long segmentUpdateTime = Math.max(segmentPushTime, segmentRefreshTime);
        if (maxSegmentPushTime < segmentUpdateTime) {
            maxSegmentPushTime = segmentUpdateTime;
        }
    }
    // Update the gauges that contain the delay between the current time and last segment end time
    _validationMetrics.updateOfflineSegmentDelayGauge(tableName, maxSegmentEndTime);
    _validationMetrics.updateLastPushTimeGauge(tableName, maxSegmentPushTime);
    // Update the gauge to contain the total document count in the segments
    _validationMetrics.updateTotalDocumentsGauge(tableName, computeOfflineTotalDocumentInSegments(segmentMetadataList));
    // Update the gauge to contain the total number of segments for this table
    _validationMetrics.updateSegmentCountGauge(tableName, segmentMetadataList.size());
}
Also used : SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) ArrayList(java.util.ArrayList) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) Interval(org.joda.time.Interval)

Aggregations

SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)33 Test (org.testng.annotations.Test)10 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)8 SimpleSegmentMetadata (com.linkedin.pinot.core.query.utils.SimpleSegmentMetadata)7 File (java.io.File)6 AfterTest (org.testng.annotations.AfterTest)6 BeforeTest (org.testng.annotations.BeforeTest)6 RetentionManager (com.linkedin.pinot.controller.helix.core.retention.RetentionManager)5 ArrayList (java.util.ArrayList)5 OfflineSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata)4 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)4 BrokerRequest (com.linkedin.pinot.common.request.BrokerRequest)3 Pql2Compiler (com.linkedin.pinot.pql.parsers.Pql2Compiler)3 HashMap (java.util.HashMap)3 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)2 HLCSegmentName (com.linkedin.pinot.common.utils.HLCSegmentName)2 FilterQueryTree (com.linkedin.pinot.common.utils.request.FilterQueryTree)2 TransformExpressionOperator (com.linkedin.pinot.core.operator.transform.TransformExpressionOperator)2 Interval (org.joda.time.Interval)2 Matchers.anyString (org.mockito.Matchers.anyString)2