Search in sources :

Example 6 with SegmentDataManager

use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.

the class ServerQueryExecutorV1Impl method pruneSegments.

/**
   * Helper method to prune segments.
   *
   * @param tableDataManager Table data manager
   * @param segments List of segments to prune
   * @param brokerRequest Broker request
   * @return Total number of docs across all segments (including the ones that were pruned).
   */
private long pruneSegments(TableDataManager tableDataManager, List<SegmentDataManager> segments, BrokerRequest brokerRequest) {
    long totalRawDocs = 0;
    Iterator<SegmentDataManager> it = segments.iterator();
    while (it.hasNext()) {
        SegmentDataManager segmentDataManager = it.next();
        final IndexSegment indexSegment = segmentDataManager.getSegment();
        // We need to compute the total raw docs for the table before any pruning.
        totalRawDocs += indexSegment.getSegmentMetadata().getTotalRawDocs();
        if (_segmentPrunerService.prune(indexSegment, brokerRequest)) {
            it.remove();
            tableDataManager.releaseSegment(segmentDataManager);
        }
    }
    return totalRawDocs;
}
Also used : SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment)

Example 7 with SegmentDataManager

use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.

the class ServerQueryExecutorV1Impl method processQuery.

@Override
public DataTable processQuery(final QueryRequest queryRequest, ExecutorService executorService) {
    TimerContext timerContext = queryRequest.getTimerContext();
    TimerContext.Timer schedulerWaitTimer = timerContext.getPhaseTimer(ServerQueryPhase.SCHEDULER_WAIT);
    if (schedulerWaitTimer != null) {
        schedulerWaitTimer.stopAndRecord();
    }
    TimerContext.Timer queryProcessingTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.QUERY_PROCESSING);
    DataTable dataTable;
    List<SegmentDataManager> queryableSegmentDataManagerList = null;
    InstanceRequest instanceRequest = queryRequest.getInstanceRequest();
    final long requestId = instanceRequest.getRequestId();
    try {
        TraceContext.register(instanceRequest);
        final BrokerRequest brokerRequest = instanceRequest.getQuery();
        LOGGER.debug("Incoming query is : {}", brokerRequest);
        TimerContext.Timer segmentPruneTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.SEGMENT_PRUNING);
        final String tableName = instanceRequest.getQuery().getQuerySource().getTableName();
        TableDataManager tableDataManager = _instanceDataManager.getTableDataManager(tableName);
        queryableSegmentDataManagerList = acquireQueryableSegments(tableDataManager, instanceRequest);
        long totalRawDocs = pruneSegments(tableDataManager, queryableSegmentDataManagerList, instanceRequest.getQuery());
        segmentPruneTimer.stopAndRecord();
        int numSegmentsMatched = queryableSegmentDataManagerList.size();
        queryRequest.setSegmentCountAfterPruning(numSegmentsMatched);
        LOGGER.debug("Matched {} segments", numSegmentsMatched);
        if (numSegmentsMatched == 0) {
            DataTable emptyDataTable = DataTableBuilder.buildEmptyDataTable(brokerRequest);
            emptyDataTable.getMetadata().put(DataTable.TOTAL_DOCS_METADATA_KEY, String.valueOf(totalRawDocs));
            return emptyDataTable;
        }
        TimerContext.Timer planBuildTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.BUILD_QUERY_PLAN);
        final Plan globalQueryPlan = _planMaker.makeInterSegmentPlan(queryableSegmentDataManagerList, brokerRequest, executorService, getResourceTimeOut(instanceRequest.getQuery()));
        planBuildTimer.stopAndRecord();
        if (_printQueryPlan) {
            LOGGER.debug("***************************** Query Plan for Request {} ***********************************", instanceRequest.getRequestId());
            globalQueryPlan.print();
            LOGGER.debug("*********************************** End Query Plan ***********************************");
        }
        TimerContext.Timer planExecTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.QUERY_PLAN_EXECUTION);
        globalQueryPlan.execute();
        planExecTimer.stopAndRecord();
        dataTable = globalQueryPlan.getInstanceResponse();
        Map<String, String> dataTableMetadata = dataTable.getMetadata();
        queryProcessingTimer.stopAndRecord();
        LOGGER.debug("Searching Instance for Request Id - {}, browse took: {}", instanceRequest.getRequestId(), queryProcessingTimer.getDurationNs());
        LOGGER.debug("InstanceResponse for Request Id - {} : {}", instanceRequest.getRequestId(), dataTable.toString());
        dataTableMetadata.put(DataTable.TIME_USED_MS_METADATA_KEY, Long.toString(queryProcessingTimer.getDurationMs()));
        dataTableMetadata.put(DataTable.REQUEST_ID_METADATA_KEY, Long.toString(instanceRequest.getRequestId()));
        dataTableMetadata.put(DataTable.TRACE_INFO_METADATA_KEY, TraceContext.getTraceInfoOfRequestId(instanceRequest.getRequestId()));
        // Update the total docs in the metadata based on un-pruned segments.
        dataTableMetadata.put(DataTable.TOTAL_DOCS_METADATA_KEY, String.valueOf(totalRawDocs));
        return dataTable;
    } catch (Exception e) {
        _serverMetrics.addMeteredQueryValue(instanceRequest.getQuery(), ServerMeter.QUERY_EXECUTION_EXCEPTIONS, 1);
        LOGGER.error("Exception processing requestId {}", requestId, e);
        dataTable = new DataTableImplV2();
        Map<String, String> dataTableMetadata = dataTable.getMetadata();
        dataTable.addException(QueryException.getException(QueryException.QUERY_EXECUTION_ERROR, e));
        TraceContext.logException("ServerQueryExecutorV1Impl", "Exception occurs in processQuery");
        queryProcessingTimer.stopAndRecord();
        LOGGER.info("Searching Instance for Request Id - {}, browse took: {}, instanceResponse: {}", requestId, queryProcessingTimer.getDurationMs(), dataTable.toString());
        dataTableMetadata.put(DataTable.TIME_USED_MS_METADATA_KEY, Long.toString(queryProcessingTimer.getDurationNs()));
        dataTableMetadata.put(DataTable.REQUEST_ID_METADATA_KEY, Long.toString(instanceRequest.getRequestId()));
        dataTableMetadata.put(DataTable.TRACE_INFO_METADATA_KEY, TraceContext.getTraceInfoOfRequestId(instanceRequest.getRequestId()));
        return dataTable;
    } finally {
        TableDataManager tableDataManager = _instanceDataManager.getTableDataManager(queryRequest.getTableName());
        if (tableDataManager != null && queryableSegmentDataManagerList != null) {
            for (SegmentDataManager segmentDataManager : queryableSegmentDataManagerList) {
                tableDataManager.releaseSegment(segmentDataManager);
            }
        }
        TraceContext.unregister(instanceRequest);
    }
}
Also used : DataTable(com.linkedin.pinot.common.utils.DataTable) DataTableImplV2(com.linkedin.pinot.core.common.datatable.DataTableImplV2) Plan(com.linkedin.pinot.core.plan.Plan) QueryException(com.linkedin.pinot.common.exception.QueryException) ConfigurationException(org.apache.commons.configuration.ConfigurationException) SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) TimerContext(com.linkedin.pinot.common.query.context.TimerContext) TableDataManager(com.linkedin.pinot.core.data.manager.offline.TableDataManager) BrokerRequest(com.linkedin.pinot.common.request.BrokerRequest) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InstanceRequest(com.linkedin.pinot.common.request.InstanceRequest)

Example 8 with SegmentDataManager

use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.

the class InstancePlanMakerImplV2 method makeInterSegmentPlan.

@Override
public Plan makeInterSegmentPlan(List<SegmentDataManager> segmentDataManagers, BrokerRequest brokerRequest, ExecutorService executorService, long timeOutMs) {
    // TODO: pass in List<IndexSegment> directly.
    List<IndexSegment> indexSegments = new ArrayList<>(segmentDataManagers.size());
    for (SegmentDataManager segmentDataManager : segmentDataManagers) {
        indexSegments.add(segmentDataManager.getSegment());
    }
    BrokerRequestPreProcessor.preProcess(indexSegments, brokerRequest);
    List<PlanNode> planNodes = new ArrayList<>();
    for (IndexSegment indexSegment : indexSegments) {
        planNodes.add(makeInnerSegmentPlan(indexSegment, brokerRequest));
    }
    CombinePlanNode combinePlanNode = new CombinePlanNode(planNodes, brokerRequest, executorService, timeOutMs);
    return new GlobalPlanImplV0(new InstanceResponsePlanNode(combinePlanNode));
}
Also used : SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) AggregationPlanNode(com.linkedin.pinot.core.plan.AggregationPlanNode) SelectionPlanNode(com.linkedin.pinot.core.plan.SelectionPlanNode) PlanNode(com.linkedin.pinot.core.plan.PlanNode) AggregationGroupByPlanNode(com.linkedin.pinot.core.plan.AggregationGroupByPlanNode) CombinePlanNode(com.linkedin.pinot.core.plan.CombinePlanNode) InstanceResponsePlanNode(com.linkedin.pinot.core.plan.InstanceResponsePlanNode) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) ArrayList(java.util.ArrayList) InstanceResponsePlanNode(com.linkedin.pinot.core.plan.InstanceResponsePlanNode) CombinePlanNode(com.linkedin.pinot.core.plan.CombinePlanNode) GlobalPlanImplV0(com.linkedin.pinot.core.plan.GlobalPlanImplV0)

Example 9 with SegmentDataManager

use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.

the class RealtimeTableDataManager method addSegment.

/*
   * This call comes in one of two ways:
   * For HL Segments:
   * - We are being directed by helix to own up all the segments that we committed and are still in retention. In this case
   *   we treat it exactly like how OfflineTableDataManager would -- wrap it into an OfflineSegmentDataManager, and put it
   *   in the map.
   * - We are being asked to own up a new realtime segment. In this case, we wrap the segment with a RealTimeSegmentDataManager
   *   (that kicks off Kafka consumption). When the segment is committed we get notified via the notifySegmentCommitted call, at
   *   which time we replace the segment with the OfflineSegmentDataManager
   * For LL Segments:
   * - We are being asked to start consuming from a kafka partition.
   * - We did not know about the segment and are being asked to download and own the segment (re-balancing, or
   *   replacing a realtime server with a fresh one, maybe). We need to look at segment metadata and decide whether
   *   to start consuming or download the segment.
   */
@Override
public void addSegment(ZkHelixPropertyStore<ZNRecord> propertyStore, AbstractTableConfig tableConfig, InstanceZKMetadata instanceZKMetadata, SegmentZKMetadata inputSegmentZKMetadata) throws Exception {
    // TODO FIXME
    // Hack. We get the _helixPropertyStore here and save it, knowing that we will get this addSegment call
    // before the notifyCommitted call (that uses _helixPropertyStore)
    this._helixPropertyStore = propertyStore;
    final String segmentId = inputSegmentZKMetadata.getSegmentName();
    final String tableName = inputSegmentZKMetadata.getTableName();
    if (!(inputSegmentZKMetadata instanceof RealtimeSegmentZKMetadata)) {
        LOGGER.warn("Got called with an unexpected instance object:{},table {}, segment {}", inputSegmentZKMetadata.getClass().getSimpleName(), tableName, segmentId);
        return;
    }
    RealtimeSegmentZKMetadata segmentZKMetadata = (RealtimeSegmentZKMetadata) inputSegmentZKMetadata;
    LOGGER.info("Attempting to add realtime segment {} for table {}", segmentId, tableName);
    if (new File(_indexDir, segmentId).exists() && (segmentZKMetadata).getStatus() == Status.DONE) {
        // segment already exists on file, and we have committed the realtime segment in ZK. Treat it like an offline segment
        if (_segmentsMap.containsKey(segmentId)) {
            LOGGER.warn("Got reload for segment already on disk {} table {}, have {}", segmentId, tableName, _segmentsMap.get(segmentId).getClass().getSimpleName());
            return;
        }
        IndexSegment segment = ColumnarSegmentLoader.load(new File(_indexDir, segmentId), _readMode, _indexLoadingConfigMetadata);
        addSegment(segment);
        markSegmentAsLoaded(segmentId);
    } else {
        // on-disk segment next time
        if (_segmentsMap.containsKey(segmentId)) {
            LOGGER.warn("Got reload for segment not on disk {} table {}, have {}", segmentId, tableName, _segmentsMap.get(segmentId).getClass().getSimpleName());
            return;
        }
        PinotHelixPropertyStoreZnRecordProvider propertyStoreHelper = PinotHelixPropertyStoreZnRecordProvider.forSchema(propertyStore);
        ZNRecord record = propertyStoreHelper.get(tableConfig.getValidationConfig().getSchemaName());
        LOGGER.info("Found schema {} ", tableConfig.getValidationConfig().getSchemaName());
        Schema schema = SchemaUtils.fromZNRecord(record);
        if (!isValid(schema, tableConfig.getIndexingConfig())) {
            LOGGER.error("Not adding segment {}", segmentId);
            throw new RuntimeException("Mismatching schema/table config for " + _tableName);
        }
        SegmentDataManager manager;
        if (SegmentName.isHighLevelConsumerSegmentName(segmentId)) {
            manager = new HLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, this, _indexDir.getAbsolutePath(), _readMode, SchemaUtils.fromZNRecord(record), _serverMetrics);
        } else {
            LLCRealtimeSegmentZKMetadata llcSegmentMetadata = (LLCRealtimeSegmentZKMetadata) segmentZKMetadata;
            if (segmentZKMetadata.getStatus().equals(Status.DONE)) {
                // TODO Remove code duplication here and in LLRealtimeSegmentDataManager
                downloadAndReplaceSegment(segmentId, llcSegmentMetadata);
                return;
            }
            manager = new LLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, this, _indexDir.getAbsolutePath(), SchemaUtils.fromZNRecord(record), _serverMetrics);
        }
        LOGGER.info("Initialize RealtimeSegmentDataManager - " + segmentId);
        try {
            _rwLock.writeLock().lock();
            _segmentsMap.put(segmentId, manager);
        } finally {
            _rwLock.writeLock().unlock();
        }
        _loadingSegments.add(segmentId);
    }
}
Also used : LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) Schema(com.linkedin.pinot.common.data.Schema) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) File(java.io.File) PinotHelixPropertyStoreZnRecordProvider(com.linkedin.pinot.common.utils.helix.PinotHelixPropertyStoreZnRecordProvider) ZNRecord(org.apache.helix.ZNRecord)

Example 10 with SegmentDataManager

use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.

the class TablesResource method getSegmentMetadata.

@GET
@Path("/tables/{tableName}/segments/{segmentName}/metadata")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation(value = "Provide segment metadata", notes = "Provide segments metadata for the segment on server")
@ApiResponses(value = { @ApiResponse(code = 200, message = "Success"), @ApiResponse(code = 500, message = "Internal server error", response = ErrorInfo.class), @ApiResponse(code = 404, message = "Table or segment not found", response = ErrorInfo.class) })
public String getSegmentMetadata(@ApiParam(value = "Table name including type", required = true, example = "myTable_OFFLINE") @PathParam("tableName") String tableName, @ApiParam(value = "Segment Name", required = true) @PathParam("segmentName") String segmentName, @ApiParam(value = "column name", required = false, allowMultiple = true, defaultValue = "") @QueryParam("columns") @DefaultValue("") List<String> columns) {
    TableDataManager tableDataManager = checkGetTableDataManager(tableName);
    SegmentDataManager segmentDataManager = null;
    try {
        segmentDataManager = tableDataManager.acquireSegment(segmentName);
        if (segmentDataManager == null) {
            throw new WebApplicationException(String.format("Table %s segments %s does not exist", tableName, segmentName), Response.Status.NOT_FOUND);
        }
        SegmentMetadataImpl segmentMetadata = (SegmentMetadataImpl) segmentDataManager.getSegment().getSegmentMetadata();
        Set<String> columnSet;
        if (columns.size() == 1 && columns.get(0).equals("*")) {
            columnSet = null;
        } else {
            columnSet = new HashSet<>(columns);
        }
        try {
            return segmentMetadata.toJson(columnSet).toString();
        } catch (JSONException e) {
            LOGGER.error("Failed to convert table {} segment {} to json", tableName, segmentMetadata);
            throw new WebApplicationException("Failed to convert segment metadata to json", Response.Status.INTERNAL_SERVER_ERROR);
        }
    } finally {
        if (segmentDataManager != null) {
            tableDataManager.releaseSegment(segmentDataManager);
        }
    }
}
Also used : SegmentDataManager(com.linkedin.pinot.core.data.manager.offline.SegmentDataManager) WebApplicationException(javax.ws.rs.WebApplicationException) TableDataManager(com.linkedin.pinot.core.data.manager.offline.TableDataManager) JSONException(org.json.JSONException) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Aggregations

SegmentDataManager (com.linkedin.pinot.core.data.manager.offline.SegmentDataManager)11 TableDataManager (com.linkedin.pinot.core.data.manager.offline.TableDataManager)5 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)5 ApiOperation (io.swagger.annotations.ApiOperation)3 ApiResponses (io.swagger.annotations.ApiResponses)3 ArrayList (java.util.ArrayList)3 GET (javax.ws.rs.GET)3 Path (javax.ws.rs.Path)3 Produces (javax.ws.rs.Produces)3 OfflineSegmentDataManager (com.linkedin.pinot.core.data.manager.offline.OfflineSegmentDataManager)2 WebApplicationException (javax.ws.rs.WebApplicationException)2 Schema (com.linkedin.pinot.common.data.Schema)1 QueryException (com.linkedin.pinot.common.exception.QueryException)1 LLCRealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)1 RealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata)1 TimerContext (com.linkedin.pinot.common.query.context.TimerContext)1 BrokerRequest (com.linkedin.pinot.common.request.BrokerRequest)1 InstanceRequest (com.linkedin.pinot.common.request.InstanceRequest)1 SegmentSizeInfo (com.linkedin.pinot.common.restlet.resources.SegmentSizeInfo)1 TableSegments (com.linkedin.pinot.common.restlet.resources.TableSegments)1