use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class IntermediateResultsBlock method getAggregationResultDataTable.
@Nonnull
private DataTable getAggregationResultDataTable() throws Exception {
// Extract each aggregation column name and type from aggregation function context.
int numAggregationFunctions = _aggregationFunctionContexts.length;
String[] columnNames = new String[numAggregationFunctions];
FieldSpec.DataType[] columnTypes = new FieldSpec.DataType[numAggregationFunctions];
for (int i = 0; i < numAggregationFunctions; i++) {
AggregationFunctionContext aggregationFunctionContext = _aggregationFunctionContexts[i];
columnNames[i] = aggregationFunctionContext.getAggregationColumnName();
columnTypes[i] = aggregationFunctionContext.getAggregationFunction().getIntermediateResultDataType();
}
// Build the data table.
DataTableBuilder dataTableBuilder = new DataTableBuilder(new DataSchema(columnNames, columnTypes));
dataTableBuilder.startRow();
for (int i = 0; i < numAggregationFunctions; i++) {
switch(columnTypes[i]) {
case LONG:
dataTableBuilder.setColumn(i, ((Number) _aggregationResult.get(i)).longValue());
break;
case DOUBLE:
dataTableBuilder.setColumn(i, ((Double) _aggregationResult.get(i)).doubleValue());
break;
case OBJECT:
dataTableBuilder.setColumn(i, _aggregationResult.get(i));
break;
default:
throw new UnsupportedOperationException("Unsupported aggregation column data type: " + columnTypes[i] + " for column: " + columnNames[i]);
}
}
dataTableBuilder.finishRow();
DataTable dataTable = dataTableBuilder.build();
return attachMetadataToDataTable(dataTable);
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class SelectionOperatorUtils method extractDataSchema.
/**
* Extract the {@link DataSchema} from sort sequence, selection columns and {@link IndexSegment}. (Inner segment)
* <p>Inside data schema, we just store each column once (de-duplicated).
*
* @param sortSequence sort sequence.
* @param selectionColumns selection columns.
* @param indexSegment index segment.
* @return data schema.
*/
@Nonnull
public static DataSchema extractDataSchema(@Nullable List<SelectionSort> sortSequence, @Nonnull List<String> selectionColumns, @Nonnull IndexSegment indexSegment) {
List<String> columnList = new ArrayList<>();
Set<String> columnSet = new HashSet<>();
if (sortSequence != null) {
for (SelectionSort selectionSort : sortSequence) {
String column = selectionSort.getColumn();
columnList.add(column);
columnSet.add(column);
}
}
for (String column : selectionColumns) {
if (!columnSet.contains(column)) {
columnList.add(column);
columnSet.add(column);
}
}
int numColumns = columnList.size();
String[] columns = new String[numColumns];
DataType[] dataTypes = new DataType[numColumns];
for (int i = 0; i < numColumns; i++) {
String column = columnList.get(i);
columns[i] = column;
DataSourceMetadata columnMetadata = indexSegment.getDataSource(column).getDataSourceMetadata();
if (columnMetadata.isSingleValue()) {
dataTypes[i] = columnMetadata.getDataType();
} else {
dataTypes[i] = columnMetadata.getDataType().toMultiValue();
}
}
return new DataSchema(columns, dataTypes);
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class InnerSegmentSelectionMultiValueQueriesTest method testSelectStar.
@Test
public void testSelectStar() {
String query = "SELECT * FROM testTable";
// Test query without filter.
MSelectionOnlyOperator selectionOnlyOperator = getOperatorForQuery(query);
IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
ExecutionStatistics executionStatistics = selectionOnlyOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 100L);
Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
DataSchema selectionDataSchema = resultsBlock.getSelectionDataSchema();
Assert.assertEquals(selectionDataSchema.size(), 10);
Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
Assert.assertEquals(selectionDataSchema.getColumnName(5), "column6");
Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
Assert.assertEquals(selectionDataSchema.getColumnType(5), FieldSpec.DataType.INT_ARRAY);
List<Serializable[]> selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
Assert.assertEquals(selectionResult.size(), 10);
Serializable[] firstRow = selectionResult.get(0);
Assert.assertEquals(firstRow.length, 10);
Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
Assert.assertEquals(firstRow[5], new int[] { 2147483647 });
// Test query with filter.
selectionOnlyOperator = getOperatorForQueryWithFilter(query);
resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
executionStatistics = selectionOnlyOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 230501L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 100L);
Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
selectionDataSchema = resultsBlock.getSelectionDataSchema();
Assert.assertEquals(selectionDataSchema.size(), 10);
Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
Assert.assertEquals(selectionDataSchema.getColumnName(5), "column6");
Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
Assert.assertEquals(selectionDataSchema.getColumnType(5), FieldSpec.DataType.INT_ARRAY);
selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
Assert.assertEquals(selectionResult.size(), 10);
firstRow = selectionResult.get(0);
Assert.assertEquals(firstRow.length, 10);
Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
Assert.assertEquals(firstRow[5], new int[] { 2147483647 });
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class InnerSegmentSelectionMultiValueQueriesTest method testSelectionOrderBy.
@Test
public void testSelectionOrderBy() {
String query = "SELECT" + SELECTION + " FROM testTable" + ORDER_BY;
// Test query without filter.
MSelectionOrderByOperator selectionOrderByOperator = getOperatorForQuery(query);
IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) selectionOrderByOperator.nextBlock();
ExecutionStatistics executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 100000L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 400000L);
Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
DataSchema selectionDataSchema = resultsBlock.getSelectionDataSchema();
Assert.assertEquals(selectionDataSchema.size(), 4);
Assert.assertEquals(selectionDataSchema.getColumnName(0), "column5");
Assert.assertEquals(selectionDataSchema.getColumnName(3), "column6");
Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.STRING);
Assert.assertEquals(selectionDataSchema.getColumnType(3), FieldSpec.DataType.INT_ARRAY);
Queue<Serializable[]> selectionResult = (Queue<Serializable[]>) resultsBlock.getSelectionResult();
Assert.assertEquals(selectionResult.size(), 10);
Serializable[] lastRow = selectionResult.peek();
Assert.assertEquals(lastRow.length, 4);
Assert.assertEquals((String) lastRow[0], "AKXcXcIqsqOJFsdwxZ");
Assert.assertEquals(lastRow[3], new int[] { 1252 });
// Test query with filter.
selectionOrderByOperator = getOperatorForQueryWithFilter(query);
resultsBlock = (IntermediateResultsBlock) selectionOrderByOperator.nextBlock();
executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 15620L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 282430L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 62480L);
Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
selectionDataSchema = resultsBlock.getSelectionDataSchema();
Assert.assertEquals(selectionDataSchema.size(), 4);
Assert.assertEquals(selectionDataSchema.getColumnName(0), "column5");
Assert.assertEquals(selectionDataSchema.getColumnName(3), "column6");
Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.STRING);
Assert.assertEquals(selectionDataSchema.getColumnType(3), FieldSpec.DataType.INT_ARRAY);
selectionResult = (Queue<Serializable[]>) resultsBlock.getSelectionResult();
Assert.assertEquals(selectionResult.size(), 10);
lastRow = selectionResult.peek();
Assert.assertEquals(lastRow.length, 4);
Assert.assertEquals((String) lastRow[0], "AKXcXcIqsqOJFsdwxZ");
Assert.assertEquals(lastRow[3], new int[] { 2147483647 });
}
use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.
the class InnerSegmentSelectionMultiValueQueriesTest method testSelectionOnly.
@Test
public void testSelectionOnly() {
String query = "SELECT" + SELECTION + " FROM testTable";
MSelectionOnlyOperator selectionOnlyOperator = getOperatorForQuery(query);
IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
ExecutionStatistics executionStatistics = selectionOnlyOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 30L);
Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
DataSchema selectionDataSchema = resultsBlock.getSelectionDataSchema();
Assert.assertEquals(selectionDataSchema.size(), 3);
Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
Assert.assertEquals(selectionDataSchema.getColumnName(2), "column6");
Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
Assert.assertEquals(selectionDataSchema.getColumnType(2), FieldSpec.DataType.INT_ARRAY);
List<Serializable[]> selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
Assert.assertEquals(selectionResult.size(), 10);
Serializable[] firstRow = selectionResult.get(0);
Assert.assertEquals(firstRow.length, 3);
Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
Assert.assertEquals(firstRow[2], new int[] { 2147483647 });
// Test query with filter.
selectionOnlyOperator = getOperatorForQueryWithFilter(query);
resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
executionStatistics = selectionOnlyOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 230501L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 30L);
Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
selectionDataSchema = resultsBlock.getSelectionDataSchema();
Assert.assertEquals(selectionDataSchema.size(), 3);
Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
Assert.assertEquals(selectionDataSchema.getColumnName(2), "column6");
Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
Assert.assertEquals(selectionDataSchema.getColumnType(2), FieldSpec.DataType.INT_ARRAY);
selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
Assert.assertEquals(selectionResult.size(), 10);
firstRow = selectionResult.get(0);
Assert.assertEquals(firstRow.length, 3);
Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
Assert.assertEquals(firstRow[2], new int[] { 2147483647 });
}
Aggregations