use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.
the class ServerQueryExecutorV1Impl method pruneSegments.
/**
* Helper method to prune segments.
*
* @param tableDataManager Table data manager
* @param segments List of segments to prune
* @param brokerRequest Broker request
* @return Total number of docs across all segments (including the ones that were pruned).
*/
private long pruneSegments(TableDataManager tableDataManager, List<SegmentDataManager> segments, BrokerRequest brokerRequest) {
long totalRawDocs = 0;
Iterator<SegmentDataManager> it = segments.iterator();
while (it.hasNext()) {
SegmentDataManager segmentDataManager = it.next();
final IndexSegment indexSegment = segmentDataManager.getSegment();
// We need to compute the total raw docs for the table before any pruning.
totalRawDocs += indexSegment.getSegmentMetadata().getTotalRawDocs();
if (_segmentPrunerService.prune(indexSegment, brokerRequest)) {
it.remove();
tableDataManager.releaseSegment(segmentDataManager);
}
}
return totalRawDocs;
}
use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.
the class ServerQueryExecutorV1Impl method processQuery.
@Override
public DataTable processQuery(final QueryRequest queryRequest, ExecutorService executorService) {
TimerContext timerContext = queryRequest.getTimerContext();
TimerContext.Timer schedulerWaitTimer = timerContext.getPhaseTimer(ServerQueryPhase.SCHEDULER_WAIT);
if (schedulerWaitTimer != null) {
schedulerWaitTimer.stopAndRecord();
}
TimerContext.Timer queryProcessingTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.QUERY_PROCESSING);
DataTable dataTable;
List<SegmentDataManager> queryableSegmentDataManagerList = null;
InstanceRequest instanceRequest = queryRequest.getInstanceRequest();
final long requestId = instanceRequest.getRequestId();
try {
TraceContext.register(instanceRequest);
final BrokerRequest brokerRequest = instanceRequest.getQuery();
LOGGER.debug("Incoming query is : {}", brokerRequest);
TimerContext.Timer segmentPruneTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.SEGMENT_PRUNING);
final String tableName = instanceRequest.getQuery().getQuerySource().getTableName();
TableDataManager tableDataManager = _instanceDataManager.getTableDataManager(tableName);
queryableSegmentDataManagerList = acquireQueryableSegments(tableDataManager, instanceRequest);
long totalRawDocs = pruneSegments(tableDataManager, queryableSegmentDataManagerList, instanceRequest.getQuery());
segmentPruneTimer.stopAndRecord();
int numSegmentsMatched = queryableSegmentDataManagerList.size();
queryRequest.setSegmentCountAfterPruning(numSegmentsMatched);
LOGGER.debug("Matched {} segments", numSegmentsMatched);
if (numSegmentsMatched == 0) {
DataTable emptyDataTable = DataTableBuilder.buildEmptyDataTable(brokerRequest);
emptyDataTable.getMetadata().put(DataTable.TOTAL_DOCS_METADATA_KEY, String.valueOf(totalRawDocs));
return emptyDataTable;
}
TimerContext.Timer planBuildTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.BUILD_QUERY_PLAN);
final Plan globalQueryPlan = _planMaker.makeInterSegmentPlan(queryableSegmentDataManagerList, brokerRequest, executorService, getResourceTimeOut(instanceRequest.getQuery()));
planBuildTimer.stopAndRecord();
if (_printQueryPlan) {
LOGGER.debug("***************************** Query Plan for Request {} ***********************************", instanceRequest.getRequestId());
globalQueryPlan.print();
LOGGER.debug("*********************************** End Query Plan ***********************************");
}
TimerContext.Timer planExecTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.QUERY_PLAN_EXECUTION);
globalQueryPlan.execute();
planExecTimer.stopAndRecord();
dataTable = globalQueryPlan.getInstanceResponse();
Map<String, String> dataTableMetadata = dataTable.getMetadata();
queryProcessingTimer.stopAndRecord();
LOGGER.debug("Searching Instance for Request Id - {}, browse took: {}", instanceRequest.getRequestId(), queryProcessingTimer.getDurationNs());
LOGGER.debug("InstanceResponse for Request Id - {} : {}", instanceRequest.getRequestId(), dataTable.toString());
dataTableMetadata.put(DataTable.TIME_USED_MS_METADATA_KEY, Long.toString(queryProcessingTimer.getDurationMs()));
dataTableMetadata.put(DataTable.REQUEST_ID_METADATA_KEY, Long.toString(instanceRequest.getRequestId()));
dataTableMetadata.put(DataTable.TRACE_INFO_METADATA_KEY, TraceContext.getTraceInfoOfRequestId(instanceRequest.getRequestId()));
// Update the total docs in the metadata based on un-pruned segments.
dataTableMetadata.put(DataTable.TOTAL_DOCS_METADATA_KEY, String.valueOf(totalRawDocs));
return dataTable;
} catch (Exception e) {
_serverMetrics.addMeteredQueryValue(instanceRequest.getQuery(), ServerMeter.QUERY_EXECUTION_EXCEPTIONS, 1);
LOGGER.error("Exception processing requestId {}", requestId, e);
dataTable = new DataTableImplV2();
Map<String, String> dataTableMetadata = dataTable.getMetadata();
dataTable.addException(QueryException.getException(QueryException.QUERY_EXECUTION_ERROR, e));
TraceContext.logException("ServerQueryExecutorV1Impl", "Exception occurs in processQuery");
queryProcessingTimer.stopAndRecord();
LOGGER.info("Searching Instance for Request Id - {}, browse took: {}, instanceResponse: {}", requestId, queryProcessingTimer.getDurationMs(), dataTable.toString());
dataTableMetadata.put(DataTable.TIME_USED_MS_METADATA_KEY, Long.toString(queryProcessingTimer.getDurationNs()));
dataTableMetadata.put(DataTable.REQUEST_ID_METADATA_KEY, Long.toString(instanceRequest.getRequestId()));
dataTableMetadata.put(DataTable.TRACE_INFO_METADATA_KEY, TraceContext.getTraceInfoOfRequestId(instanceRequest.getRequestId()));
return dataTable;
} finally {
TableDataManager tableDataManager = _instanceDataManager.getTableDataManager(queryRequest.getTableName());
if (tableDataManager != null && queryableSegmentDataManagerList != null) {
for (SegmentDataManager segmentDataManager : queryableSegmentDataManagerList) {
tableDataManager.releaseSegment(segmentDataManager);
}
}
TraceContext.unregister(instanceRequest);
}
}
use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.
the class InstancePlanMakerImplV2 method makeInterSegmentPlan.
@Override
public Plan makeInterSegmentPlan(List<SegmentDataManager> segmentDataManagers, BrokerRequest brokerRequest, ExecutorService executorService, long timeOutMs) {
// TODO: pass in List<IndexSegment> directly.
List<IndexSegment> indexSegments = new ArrayList<>(segmentDataManagers.size());
for (SegmentDataManager segmentDataManager : segmentDataManagers) {
indexSegments.add(segmentDataManager.getSegment());
}
BrokerRequestPreProcessor.preProcess(indexSegments, brokerRequest);
List<PlanNode> planNodes = new ArrayList<>();
for (IndexSegment indexSegment : indexSegments) {
planNodes.add(makeInnerSegmentPlan(indexSegment, brokerRequest));
}
CombinePlanNode combinePlanNode = new CombinePlanNode(planNodes, brokerRequest, executorService, timeOutMs);
return new GlobalPlanImplV0(new InstanceResponsePlanNode(combinePlanNode));
}
use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.
the class RealtimeTableDataManager method addSegment.
/*
* This call comes in one of two ways:
* For HL Segments:
* - We are being directed by helix to own up all the segments that we committed and are still in retention. In this case
* we treat it exactly like how OfflineTableDataManager would -- wrap it into an OfflineSegmentDataManager, and put it
* in the map.
* - We are being asked to own up a new realtime segment. In this case, we wrap the segment with a RealTimeSegmentDataManager
* (that kicks off Kafka consumption). When the segment is committed we get notified via the notifySegmentCommitted call, at
* which time we replace the segment with the OfflineSegmentDataManager
* For LL Segments:
* - We are being asked to start consuming from a kafka partition.
* - We did not know about the segment and are being asked to download and own the segment (re-balancing, or
* replacing a realtime server with a fresh one, maybe). We need to look at segment metadata and decide whether
* to start consuming or download the segment.
*/
@Override
public void addSegment(ZkHelixPropertyStore<ZNRecord> propertyStore, AbstractTableConfig tableConfig, InstanceZKMetadata instanceZKMetadata, SegmentZKMetadata inputSegmentZKMetadata) throws Exception {
// TODO FIXME
// Hack. We get the _helixPropertyStore here and save it, knowing that we will get this addSegment call
// before the notifyCommitted call (that uses _helixPropertyStore)
this._helixPropertyStore = propertyStore;
final String segmentId = inputSegmentZKMetadata.getSegmentName();
final String tableName = inputSegmentZKMetadata.getTableName();
if (!(inputSegmentZKMetadata instanceof RealtimeSegmentZKMetadata)) {
LOGGER.warn("Got called with an unexpected instance object:{},table {}, segment {}", inputSegmentZKMetadata.getClass().getSimpleName(), tableName, segmentId);
return;
}
RealtimeSegmentZKMetadata segmentZKMetadata = (RealtimeSegmentZKMetadata) inputSegmentZKMetadata;
LOGGER.info("Attempting to add realtime segment {} for table {}", segmentId, tableName);
if (new File(_indexDir, segmentId).exists() && (segmentZKMetadata).getStatus() == Status.DONE) {
// segment already exists on file, and we have committed the realtime segment in ZK. Treat it like an offline segment
if (_segmentsMap.containsKey(segmentId)) {
LOGGER.warn("Got reload for segment already on disk {} table {}, have {}", segmentId, tableName, _segmentsMap.get(segmentId).getClass().getSimpleName());
return;
}
IndexSegment segment = ColumnarSegmentLoader.load(new File(_indexDir, segmentId), _readMode, _indexLoadingConfigMetadata);
addSegment(segment);
markSegmentAsLoaded(segmentId);
} else {
// on-disk segment next time
if (_segmentsMap.containsKey(segmentId)) {
LOGGER.warn("Got reload for segment not on disk {} table {}, have {}", segmentId, tableName, _segmentsMap.get(segmentId).getClass().getSimpleName());
return;
}
PinotHelixPropertyStoreZnRecordProvider propertyStoreHelper = PinotHelixPropertyStoreZnRecordProvider.forSchema(propertyStore);
ZNRecord record = propertyStoreHelper.get(tableConfig.getValidationConfig().getSchemaName());
LOGGER.info("Found schema {} ", tableConfig.getValidationConfig().getSchemaName());
Schema schema = SchemaUtils.fromZNRecord(record);
if (!isValid(schema, tableConfig.getIndexingConfig())) {
LOGGER.error("Not adding segment {}", segmentId);
throw new RuntimeException("Mismatching schema/table config for " + _tableName);
}
SegmentDataManager manager;
if (SegmentName.isHighLevelConsumerSegmentName(segmentId)) {
manager = new HLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, this, _indexDir.getAbsolutePath(), _readMode, SchemaUtils.fromZNRecord(record), _serverMetrics);
} else {
LLCRealtimeSegmentZKMetadata llcSegmentMetadata = (LLCRealtimeSegmentZKMetadata) segmentZKMetadata;
if (segmentZKMetadata.getStatus().equals(Status.DONE)) {
// TODO Remove code duplication here and in LLRealtimeSegmentDataManager
downloadAndReplaceSegment(segmentId, llcSegmentMetadata);
return;
}
manager = new LLRealtimeSegmentDataManager(segmentZKMetadata, tableConfig, instanceZKMetadata, this, _indexDir.getAbsolutePath(), SchemaUtils.fromZNRecord(record), _serverMetrics);
}
LOGGER.info("Initialize RealtimeSegmentDataManager - " + segmentId);
try {
_rwLock.writeLock().lock();
_segmentsMap.put(segmentId, manager);
} finally {
_rwLock.writeLock().unlock();
}
_loadingSegments.add(segmentId);
}
}
use of com.linkedin.pinot.core.data.manager.offline.SegmentDataManager in project pinot by linkedin.
the class TablesResource method getSegmentMetadata.
@GET
@Path("/tables/{tableName}/segments/{segmentName}/metadata")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation(value = "Provide segment metadata", notes = "Provide segments metadata for the segment on server")
@ApiResponses(value = { @ApiResponse(code = 200, message = "Success"), @ApiResponse(code = 500, message = "Internal server error", response = ErrorInfo.class), @ApiResponse(code = 404, message = "Table or segment not found", response = ErrorInfo.class) })
public String getSegmentMetadata(@ApiParam(value = "Table name including type", required = true, example = "myTable_OFFLINE") @PathParam("tableName") String tableName, @ApiParam(value = "Segment Name", required = true) @PathParam("segmentName") String segmentName, @ApiParam(value = "column name", required = false, allowMultiple = true, defaultValue = "") @QueryParam("columns") @DefaultValue("") List<String> columns) {
TableDataManager tableDataManager = checkGetTableDataManager(tableName);
SegmentDataManager segmentDataManager = null;
try {
segmentDataManager = tableDataManager.acquireSegment(segmentName);
if (segmentDataManager == null) {
throw new WebApplicationException(String.format("Table %s segments %s does not exist", tableName, segmentName), Response.Status.NOT_FOUND);
}
SegmentMetadataImpl segmentMetadata = (SegmentMetadataImpl) segmentDataManager.getSegment().getSegmentMetadata();
Set<String> columnSet;
if (columns.size() == 1 && columns.get(0).equals("*")) {
columnSet = null;
} else {
columnSet = new HashSet<>(columns);
}
try {
return segmentMetadata.toJson(columnSet).toString();
} catch (JSONException e) {
LOGGER.error("Failed to convert table {} segment {} to json", tableName, segmentMetadata);
throw new WebApplicationException("Failed to convert segment metadata to json", Response.Status.INTERNAL_SERVER_ERROR);
}
} finally {
if (segmentDataManager != null) {
tableDataManager.releaseSegment(segmentDataManager);
}
}
}
Aggregations