use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class BrokerReduceService method reduceOnDataTable.
@Nonnull
@Override
public BrokerResponseNative reduceOnDataTable(@Nonnull BrokerRequest brokerRequest, @Nonnull Map<ServerInstance, DataTable> dataTableMap, @Nullable BrokerMetrics brokerMetrics) {
if (dataTableMap.size() == 0) {
// Empty response.
return BrokerResponseNative.empty();
}
BrokerResponseNative brokerResponseNative = new BrokerResponseNative();
List<QueryProcessingException> processingExceptions = brokerResponseNative.getProcessingExceptions();
long numDocsScanned = 0L;
long numEntriesScannedInFilter = 0L;
long numEntriesScannedPostFilter = 0L;
long numTotalRawDocs = 0L;
// Cache a data schema from data tables (try to cache one with data rows associated with it).
DataSchema cachedDataSchema = null;
// Process server response metadata.
Iterator<Map.Entry<ServerInstance, DataTable>> iterator = dataTableMap.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<ServerInstance, DataTable> entry = iterator.next();
ServerInstance serverInstance = entry.getKey();
DataTable dataTable = entry.getValue();
Map<String, String> metadata = dataTable.getMetadata();
// Reduce on trace info.
if (brokerRequest.isEnableTrace()) {
brokerResponseNative.getTraceInfo().put(serverInstance.getHostname(), metadata.get(DataTable.TRACE_INFO_METADATA_KEY));
}
// Reduce on exceptions.
for (String key : metadata.keySet()) {
if (key.startsWith(DataTable.EXCEPTION_METADATA_KEY)) {
processingExceptions.add(new QueryProcessingException(Integer.parseInt(key.substring(9)), metadata.get(key)));
}
}
// Reduce on execution statistics.
String numDocsScannedString = metadata.get(DataTable.NUM_DOCS_SCANNED_METADATA_KEY);
if (numDocsScannedString != null) {
numDocsScanned += Long.parseLong(numDocsScannedString);
}
String numEntriesScannedInFilterString = metadata.get(DataTable.NUM_ENTRIES_SCANNED_IN_FILTER_METADATA_KEY);
if (numEntriesScannedInFilterString != null) {
numEntriesScannedInFilter += Long.parseLong(numEntriesScannedInFilterString);
}
String numEntriesScannedPostFilterString = metadata.get(DataTable.NUM_ENTRIES_SCANNED_POST_FILTER_METADATA_KEY);
if (numEntriesScannedPostFilterString != null) {
numEntriesScannedPostFilter += Long.parseLong(numEntriesScannedPostFilterString);
}
String numTotalRawDocsString = metadata.get(DataTable.TOTAL_DOCS_METADATA_KEY);
if (numTotalRawDocsString != null) {
numTotalRawDocs += Long.parseLong(numTotalRawDocsString);
}
// After processing the metadata, remove data tables without data rows inside.
DataSchema dataSchema = dataTable.getDataSchema();
if (dataSchema == null) {
iterator.remove();
} else {
// Try to cache a data table with data rows inside, or cache one with data schema inside.
if (dataTable.getNumberOfRows() == 0) {
if (cachedDataSchema == null) {
cachedDataSchema = dataSchema;
}
iterator.remove();
} else {
cachedDataSchema = dataSchema;
}
}
}
// Set execution statistics.
brokerResponseNative.setNumDocsScanned(numDocsScanned);
brokerResponseNative.setNumEntriesScannedInFilter(numEntriesScannedInFilter);
brokerResponseNative.setNumEntriesScannedPostFilter(numEntriesScannedPostFilter);
brokerResponseNative.setTotalDocs(numTotalRawDocs);
// Update broker metrics.
String tableName = brokerRequest.getQuerySource().getTableName();
if (brokerMetrics != null) {
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.DOCUMENTS_SCANNED, numDocsScanned);
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.ENTRIES_SCANNED_IN_FILTER, numEntriesScannedInFilter);
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.ENTRIES_SCANNED_POST_FILTER, numEntriesScannedPostFilter);
}
if (dataTableMap.isEmpty()) {
// This will only happen to selection query.
if (cachedDataSchema != null) {
List<String> selectionColumns = SelectionOperatorUtils.getSelectionColumns(brokerRequest.getSelections().getSelectionColumns(), cachedDataSchema);
brokerResponseNative.setSelectionResults(new SelectionResults(selectionColumns, new ArrayList<Serializable[]>(0)));
}
} else {
// Reduce server responses data and set query results into the broker response.
assert cachedDataSchema != null;
if (brokerRequest.isSetSelections()) {
// Selection query.
// For data table map with more than one data tables, remove conflicting data tables.
DataSchema masterDataSchema = cachedDataSchema.clone();
if (dataTableMap.size() > 1) {
List<String> droppedServers = removeConflictingResponses(masterDataSchema, dataTableMap);
if (!droppedServers.isEmpty()) {
String errorMessage = QueryException.MERGE_RESPONSE_ERROR.getMessage() + ": responses for table: " + tableName + " from servers: " + droppedServers + " got dropped due to data schema inconsistency.";
LOGGER.error(errorMessage);
if (brokerMetrics != null) {
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.RESPONSE_MERGE_EXCEPTIONS, 1);
}
brokerResponseNative.addToExceptions(new QueryProcessingException(QueryException.MERGE_RESPONSE_ERROR_CODE, errorMessage));
}
}
setSelectionResults(brokerResponseNative, brokerRequest.getSelections(), dataTableMap, masterDataSchema);
} else {
// Aggregation query.
AggregationFunction[] aggregationFunctions = AggregationFunctionUtils.getAggregationFunctions(brokerRequest.getAggregationsInfo());
if (!brokerRequest.isSetGroupBy()) {
// Aggregation only query.
setAggregationResults(brokerResponseNative, aggregationFunctions, dataTableMap, cachedDataSchema);
} else {
// Aggregation group-by query.
setGroupByResults(brokerResponseNative, aggregationFunctions, brokerRequest.getGroupBy(), dataTableMap);
}
}
}
return brokerResponseNative;
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class BrokerReduceService method removeConflictingResponses.
/**
* Given a data schema, remove data tables that are not compatible with this data schema.
* <p>Upgrade the data schema passed in to cover all remaining data schemas.
*
* @param dataSchema data schema.
* @param dataTableMap map from server to data table.
* @return list of server names where the data table got removed.
*/
@Nonnull
private List<String> removeConflictingResponses(@Nonnull DataSchema dataSchema, @Nonnull Map<ServerInstance, DataTable> dataTableMap) {
List<String> droppedServers = new ArrayList<>();
Iterator<Map.Entry<ServerInstance, DataTable>> iterator = dataTableMap.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<ServerInstance, DataTable> entry = iterator.next();
DataSchema dataSchemaToCompare = entry.getValue().getDataSchema();
assert dataSchemaToCompare != null;
if (!dataSchema.isTypeCompatibleWith(dataSchemaToCompare)) {
droppedServers.add(entry.getKey().toString());
iterator.remove();
} else {
dataSchema.upgradeToCover(dataSchemaToCompare);
}
}
return droppedServers;
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class CombineService method mergeTwoBlocks.
public static void mergeTwoBlocks(@Nonnull BrokerRequest brokerRequest, @Nonnull IntermediateResultsBlock mergedBlock, @Nonnull IntermediateResultsBlock blockToMerge) {
// Combine processing exceptions.
List<ProcessingException> mergedProcessingExceptions = mergedBlock.getProcessingExceptions();
List<ProcessingException> processingExceptionsToMerge = blockToMerge.getProcessingExceptions();
if (mergedProcessingExceptions == null) {
mergedBlock.setProcessingExceptions(processingExceptionsToMerge);
} else if (processingExceptionsToMerge != null) {
mergedProcessingExceptions.addAll(processingExceptionsToMerge);
}
// Combine result.
if (brokerRequest.isSetAggregationsInfo()) {
if (!brokerRequest.isSetGroupBy()) {
// Combine aggregation only result.
// Might be null if caught exception during query execution.
List<Object> aggregationResultToMerge = blockToMerge.getAggregationResult();
if (aggregationResultToMerge == null) {
// No data in block to merge.
return;
}
AggregationFunctionContext[] mergedAggregationFunctionContexts = mergedBlock.getAggregationFunctionContexts();
if (mergedAggregationFunctionContexts == null) {
// No data in merged block.
mergedBlock.setAggregationFunctionContexts(blockToMerge.getAggregationFunctionContexts());
mergedBlock.setAggregationResults(aggregationResultToMerge);
}
// Merge two block.
List<Object> mergedAggregationResult = mergedBlock.getAggregationResult();
int numAggregationFunctions = mergedAggregationFunctionContexts.length;
for (int i = 0; i < numAggregationFunctions; i++) {
mergedAggregationResult.set(i, mergedAggregationFunctionContexts[i].getAggregationFunction().merge(mergedAggregationResult.get(i), aggregationResultToMerge.get(i)));
}
} else {
// Combine aggregation group-by result, which should not come into CombineService.
throw new UnsupportedOperationException();
}
} else {
// Combine selection result.
// Data schema will be null if exceptions caught during query processing.
// Result set size will be zero if no row matches the predicate.
DataSchema mergedBlockSchema = mergedBlock.getSelectionDataSchema();
DataSchema blockToMergeSchema = blockToMerge.getSelectionDataSchema();
Collection<Serializable[]> mergedBlockResultSet = mergedBlock.getSelectionResult();
Collection<Serializable[]> blockToMergeResultSet = blockToMerge.getSelectionResult();
if (mergedBlockSchema == null || mergedBlockResultSet.size() == 0) {
// If block to merge schema is not null, set its data schema and result to the merged block.
if (blockToMergeSchema != null) {
mergedBlock.setSelectionDataSchema(blockToMergeSchema);
mergedBlock.setSelectionResult(blockToMergeResultSet);
}
} else {
// Some data in merged block.
Selection selection = brokerRequest.getSelections();
boolean isSelectionOrderBy = selection.isSetSelectionSortSequence();
int selectionSize = selection.getSize();
// No need to merge if already got enough rows for selection only.
if (!isSelectionOrderBy && (mergedBlockResultSet.size() == selectionSize)) {
return;
}
// Merge only if there are data in block to merge.
if (blockToMergeSchema != null && blockToMergeResultSet.size() > 0) {
if (mergedBlockSchema.isTypeCompatibleWith(blockToMergeSchema)) {
// Two blocks are mergeable.
// Upgrade the merged block schema if necessary.
mergedBlockSchema.upgradeToCover(blockToMergeSchema);
// Merge two blocks.
if (isSelectionOrderBy) {
// Combine selection order-by.
SelectionOperatorUtils.mergeWithOrdering((PriorityQueue<Serializable[]>) mergedBlockResultSet, blockToMergeResultSet, selection.getOffset() + selectionSize);
} else {
// Combine selection only.
SelectionOperatorUtils.mergeWithoutOrdering(mergedBlockResultSet, blockToMergeResultSet, selectionSize);
}
mergedBlock.setSelectionResult(mergedBlockResultSet);
} else {
// Two blocks are not mergeable.
throw new RuntimeException("Data schema inconsistency between merged block schema: " + mergedBlockSchema + " and block to merge schema: " + blockToMergeSchema + ", drop block to merge.");
}
}
}
}
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class SelectionOperatorUtils method extractRowFromDataTable.
/**
* Extract a selection row from {@link DataTable}. (Broker side)
*
* @param dataTable data table.
* @param rowId row id.
* @return selection row.
*/
@Nonnull
public static Serializable[] extractRowFromDataTable(@Nonnull DataTable dataTable, int rowId) {
DataSchema dataSchema = dataTable.getDataSchema();
int numColumns = dataSchema.size();
Serializable[] row = new Serializable[numColumns];
for (int i = 0; i < numColumns; i++) {
DataType columnType = dataSchema.getColumnType(i);
switch(columnType) {
// Single-value column.
case INT:
row[i] = dataTable.getInt(rowId, i);
break;
case LONG:
row[i] = dataTable.getLong(rowId, i);
break;
case FLOAT:
row[i] = dataTable.getFloat(rowId, i);
break;
case DOUBLE:
row[i] = dataTable.getDouble(rowId, i);
break;
case STRING:
row[i] = dataTable.getString(rowId, i);
break;
// Multi-value column.
case INT_ARRAY:
row[i] = dataTable.getIntArray(rowId, i);
break;
case LONG_ARRAY:
row[i] = dataTable.getLongArray(rowId, i);
break;
case FLOAT_ARRAY:
row[i] = dataTable.getFloatArray(rowId, i);
break;
case DOUBLE_ARRAY:
row[i] = dataTable.getDoubleArray(rowId, i);
break;
case STRING_ARRAY:
row[i] = dataTable.getStringArray(rowId, i);
break;
default:
throw new UnsupportedOperationException("Unsupported data type: " + columnType + " for column: " + dataSchema.getColumnName(i));
}
}
return row;
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class ScheduledRequestHandlerTest method testValidQueryResponse.
@Test
public void testValidQueryResponse() throws InterruptedException, ExecutionException, TimeoutException, IOException {
ScheduledRequestHandler handler = new ScheduledRequestHandler(new QueryScheduler(queryExecutor) {
@Override
public ListenableFuture<DataTable> submit(QueryRequest queryRequest) {
return queryRunners.submit(new Callable<DataTable>() {
@Override
public DataTable call() throws Exception {
String[] columns = new String[] { "foo", "bar" };
FieldSpec.DataType[] columnTypes = new FieldSpec.DataType[] { FieldSpec.DataType.STRING, FieldSpec.DataType.INT };
DataSchema dataSchema = new DataSchema(columns, columnTypes);
DataTableBuilder dtBuilder = new DataTableBuilder(dataSchema);
dtBuilder.startRow();
dtBuilder.setColumn(0, "mars");
dtBuilder.setColumn(1, 10);
dtBuilder.finishRow();
dtBuilder.startRow();
dtBuilder.setColumn(0, "jupiter");
dtBuilder.setColumn(1, 100);
dtBuilder.finishRow();
return dtBuilder.build();
}
});
}
}, serverMetrics);
ByteBuf requestBuf = getSerializedInstanceRequest(getInstanceRequest());
ListenableFuture<byte[]> responseFuture = handler.processRequest(channelHandlerContext, requestBuf);
byte[] responseBytes = responseFuture.get(2, TimeUnit.SECONDS);
DataTable responseDT = DataTableFactory.getDataTable(responseBytes);
Assert.assertEquals(responseDT.getNumberOfRows(), 2);
Assert.assertEquals(responseDT.getString(0, 0), "mars");
Assert.assertEquals(responseDT.getInt(0, 1), 10);
Assert.assertEquals(responseDT.getString(1, 0), "jupiter");
Assert.assertEquals(responseDT.getInt(1, 1), 100);
}
Aggregations