Search in sources :

Example 1 with DataSchema

use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.

the class IntermediateResultsBlock method getAggregationResultDataTable.

@Nonnull
private DataTable getAggregationResultDataTable() throws Exception {
    // Extract each aggregation column name and type from aggregation function context.
    int numAggregationFunctions = _aggregationFunctionContexts.length;
    String[] columnNames = new String[numAggregationFunctions];
    FieldSpec.DataType[] columnTypes = new FieldSpec.DataType[numAggregationFunctions];
    for (int i = 0; i < numAggregationFunctions; i++) {
        AggregationFunctionContext aggregationFunctionContext = _aggregationFunctionContexts[i];
        columnNames[i] = aggregationFunctionContext.getAggregationColumnName();
        columnTypes[i] = aggregationFunctionContext.getAggregationFunction().getIntermediateResultDataType();
    }
    // Build the data table.
    DataTableBuilder dataTableBuilder = new DataTableBuilder(new DataSchema(columnNames, columnTypes));
    dataTableBuilder.startRow();
    for (int i = 0; i < numAggregationFunctions; i++) {
        switch(columnTypes[i]) {
            case LONG:
                dataTableBuilder.setColumn(i, ((Number) _aggregationResult.get(i)).longValue());
                break;
            case DOUBLE:
                dataTableBuilder.setColumn(i, ((Double) _aggregationResult.get(i)).doubleValue());
                break;
            case OBJECT:
                dataTableBuilder.setColumn(i, _aggregationResult.get(i));
                break;
            default:
                throw new UnsupportedOperationException("Unsupported aggregation column data type: " + columnTypes[i] + " for column: " + columnNames[i]);
        }
    }
    dataTableBuilder.finishRow();
    DataTable dataTable = dataTableBuilder.build();
    return attachMetadataToDataTable(dataTable);
}
Also used : DataSchema(com.linkedin.pinot.common.utils.DataSchema) DataTable(com.linkedin.pinot.common.utils.DataTable) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) DataTableBuilder(com.linkedin.pinot.core.common.datatable.DataTableBuilder) Nonnull(javax.annotation.Nonnull)

Example 2 with DataSchema

use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.

the class SelectionOperatorUtils method extractDataSchema.

/**
   * Extract the {@link DataSchema} from sort sequence, selection columns and {@link IndexSegment}. (Inner segment)
   * <p>Inside data schema, we just store each column once (de-duplicated).
   *
   * @param sortSequence sort sequence.
   * @param selectionColumns selection columns.
   * @param indexSegment index segment.
   * @return data schema.
   */
@Nonnull
public static DataSchema extractDataSchema(@Nullable List<SelectionSort> sortSequence, @Nonnull List<String> selectionColumns, @Nonnull IndexSegment indexSegment) {
    List<String> columnList = new ArrayList<>();
    Set<String> columnSet = new HashSet<>();
    if (sortSequence != null) {
        for (SelectionSort selectionSort : sortSequence) {
            String column = selectionSort.getColumn();
            columnList.add(column);
            columnSet.add(column);
        }
    }
    for (String column : selectionColumns) {
        if (!columnSet.contains(column)) {
            columnList.add(column);
            columnSet.add(column);
        }
    }
    int numColumns = columnList.size();
    String[] columns = new String[numColumns];
    DataType[] dataTypes = new DataType[numColumns];
    for (int i = 0; i < numColumns; i++) {
        String column = columnList.get(i);
        columns[i] = column;
        DataSourceMetadata columnMetadata = indexSegment.getDataSource(column).getDataSourceMetadata();
        if (columnMetadata.isSingleValue()) {
            dataTypes[i] = columnMetadata.getDataType();
        } else {
            dataTypes[i] = columnMetadata.getDataType().toMultiValue();
        }
    }
    return new DataSchema(columns, dataTypes);
}
Also used : DataSchema(com.linkedin.pinot.common.utils.DataSchema) DataSourceMetadata(com.linkedin.pinot.core.common.DataSourceMetadata) ArrayList(java.util.ArrayList) SelectionSort(com.linkedin.pinot.common.request.SelectionSort) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) HashSet(java.util.HashSet) Nonnull(javax.annotation.Nonnull)

Example 3 with DataSchema

use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.

the class InnerSegmentSelectionMultiValueQueriesTest method testSelectStar.

@Test
public void testSelectStar() {
    String query = "SELECT * FROM testTable";
    // Test query without filter.
    MSelectionOnlyOperator selectionOnlyOperator = getOperatorForQuery(query);
    IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
    ExecutionStatistics executionStatistics = selectionOnlyOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 100L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    DataSchema selectionDataSchema = resultsBlock.getSelectionDataSchema();
    Assert.assertEquals(selectionDataSchema.size(), 10);
    Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
    Assert.assertEquals(selectionDataSchema.getColumnName(5), "column6");
    Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
    Assert.assertEquals(selectionDataSchema.getColumnType(5), FieldSpec.DataType.INT_ARRAY);
    List<Serializable[]> selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
    Assert.assertEquals(selectionResult.size(), 10);
    Serializable[] firstRow = selectionResult.get(0);
    Assert.assertEquals(firstRow.length, 10);
    Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
    Assert.assertEquals(firstRow[5], new int[] { 2147483647 });
    // Test query with filter.
    selectionOnlyOperator = getOperatorForQueryWithFilter(query);
    resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
    executionStatistics = selectionOnlyOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 230501L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 100L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    selectionDataSchema = resultsBlock.getSelectionDataSchema();
    Assert.assertEquals(selectionDataSchema.size(), 10);
    Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
    Assert.assertEquals(selectionDataSchema.getColumnName(5), "column6");
    Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
    Assert.assertEquals(selectionDataSchema.getColumnType(5), FieldSpec.DataType.INT_ARRAY);
    selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
    Assert.assertEquals(selectionResult.size(), 10);
    firstRow = selectionResult.get(0);
    Assert.assertEquals(firstRow.length, 10);
    Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
    Assert.assertEquals(firstRow[5], new int[] { 2147483647 });
}
Also used : DataSchema(com.linkedin.pinot.common.utils.DataSchema) ExecutionStatistics(com.linkedin.pinot.core.operator.ExecutionStatistics) Serializable(java.io.Serializable) MSelectionOnlyOperator(com.linkedin.pinot.core.operator.query.MSelectionOnlyOperator) List(java.util.List) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) Test(org.testng.annotations.Test)

Example 4 with DataSchema

use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.

the class InnerSegmentSelectionMultiValueQueriesTest method testSelectionOrderBy.

@Test
public void testSelectionOrderBy() {
    String query = "SELECT" + SELECTION + " FROM testTable" + ORDER_BY;
    // Test query without filter.
    MSelectionOrderByOperator selectionOrderByOperator = getOperatorForQuery(query);
    IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) selectionOrderByOperator.nextBlock();
    ExecutionStatistics executionStatistics = selectionOrderByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 100000L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 400000L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    DataSchema selectionDataSchema = resultsBlock.getSelectionDataSchema();
    Assert.assertEquals(selectionDataSchema.size(), 4);
    Assert.assertEquals(selectionDataSchema.getColumnName(0), "column5");
    Assert.assertEquals(selectionDataSchema.getColumnName(3), "column6");
    Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.STRING);
    Assert.assertEquals(selectionDataSchema.getColumnType(3), FieldSpec.DataType.INT_ARRAY);
    Queue<Serializable[]> selectionResult = (Queue<Serializable[]>) resultsBlock.getSelectionResult();
    Assert.assertEquals(selectionResult.size(), 10);
    Serializable[] lastRow = selectionResult.peek();
    Assert.assertEquals(lastRow.length, 4);
    Assert.assertEquals((String) lastRow[0], "AKXcXcIqsqOJFsdwxZ");
    Assert.assertEquals(lastRow[3], new int[] { 1252 });
    // Test query with filter.
    selectionOrderByOperator = getOperatorForQueryWithFilter(query);
    resultsBlock = (IntermediateResultsBlock) selectionOrderByOperator.nextBlock();
    executionStatistics = selectionOrderByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 15620L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 282430L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 62480L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    selectionDataSchema = resultsBlock.getSelectionDataSchema();
    Assert.assertEquals(selectionDataSchema.size(), 4);
    Assert.assertEquals(selectionDataSchema.getColumnName(0), "column5");
    Assert.assertEquals(selectionDataSchema.getColumnName(3), "column6");
    Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.STRING);
    Assert.assertEquals(selectionDataSchema.getColumnType(3), FieldSpec.DataType.INT_ARRAY);
    selectionResult = (Queue<Serializable[]>) resultsBlock.getSelectionResult();
    Assert.assertEquals(selectionResult.size(), 10);
    lastRow = selectionResult.peek();
    Assert.assertEquals(lastRow.length, 4);
    Assert.assertEquals((String) lastRow[0], "AKXcXcIqsqOJFsdwxZ");
    Assert.assertEquals(lastRow[3], new int[] { 2147483647 });
}
Also used : DataSchema(com.linkedin.pinot.common.utils.DataSchema) ExecutionStatistics(com.linkedin.pinot.core.operator.ExecutionStatistics) Serializable(java.io.Serializable) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) Queue(java.util.Queue) MSelectionOrderByOperator(com.linkedin.pinot.core.operator.query.MSelectionOrderByOperator) Test(org.testng.annotations.Test)

Example 5 with DataSchema

use of com.linkedin.pinot.common.utils.DataSchema in project pinot by linkedin.

the class InnerSegmentSelectionMultiValueQueriesTest method testSelectionOnly.

@Test
public void testSelectionOnly() {
    String query = "SELECT" + SELECTION + " FROM testTable";
    MSelectionOnlyOperator selectionOnlyOperator = getOperatorForQuery(query);
    IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
    ExecutionStatistics executionStatistics = selectionOnlyOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 30L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    DataSchema selectionDataSchema = resultsBlock.getSelectionDataSchema();
    Assert.assertEquals(selectionDataSchema.size(), 3);
    Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
    Assert.assertEquals(selectionDataSchema.getColumnName(2), "column6");
    Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
    Assert.assertEquals(selectionDataSchema.getColumnType(2), FieldSpec.DataType.INT_ARRAY);
    List<Serializable[]> selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
    Assert.assertEquals(selectionResult.size(), 10);
    Serializable[] firstRow = selectionResult.get(0);
    Assert.assertEquals(firstRow.length, 3);
    Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
    Assert.assertEquals(firstRow[2], new int[] { 2147483647 });
    // Test query with filter.
    selectionOnlyOperator = getOperatorForQueryWithFilter(query);
    resultsBlock = (IntermediateResultsBlock) selectionOnlyOperator.nextBlock();
    executionStatistics = selectionOnlyOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 10L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 230501L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 30L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    selectionDataSchema = resultsBlock.getSelectionDataSchema();
    Assert.assertEquals(selectionDataSchema.size(), 3);
    Assert.assertEquals(selectionDataSchema.getColumnName(0), "column1");
    Assert.assertEquals(selectionDataSchema.getColumnName(2), "column6");
    Assert.assertEquals(selectionDataSchema.getColumnType(0), FieldSpec.DataType.INT);
    Assert.assertEquals(selectionDataSchema.getColumnType(2), FieldSpec.DataType.INT_ARRAY);
    selectionResult = (List<Serializable[]>) resultsBlock.getSelectionResult();
    Assert.assertEquals(selectionResult.size(), 10);
    firstRow = selectionResult.get(0);
    Assert.assertEquals(firstRow.length, 3);
    Assert.assertEquals(((Integer) firstRow[0]).intValue(), 890282370);
    Assert.assertEquals(firstRow[2], new int[] { 2147483647 });
}
Also used : DataSchema(com.linkedin.pinot.common.utils.DataSchema) ExecutionStatistics(com.linkedin.pinot.core.operator.ExecutionStatistics) Serializable(java.io.Serializable) MSelectionOnlyOperator(com.linkedin.pinot.core.operator.query.MSelectionOnlyOperator) List(java.util.List) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) Test(org.testng.annotations.Test)

Aggregations

DataSchema (com.linkedin.pinot.common.utils.DataSchema)17 Serializable (java.io.Serializable)9 Test (org.testng.annotations.Test)9 DataTable (com.linkedin.pinot.common.utils.DataTable)7 ExecutionStatistics (com.linkedin.pinot.core.operator.ExecutionStatistics)6 IntermediateResultsBlock (com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock)6 Nonnull (javax.annotation.Nonnull)6 MSelectionOnlyOperator (com.linkedin.pinot.core.operator.query.MSelectionOnlyOperator)4 AggregationFunctionContext (com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext)4 ArrayList (java.util.ArrayList)4 List (java.util.List)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)3 DataTableBuilder (com.linkedin.pinot.core.common.datatable.DataTableBuilder)3 Selection (com.linkedin.pinot.common.request.Selection)2 ServerInstance (com.linkedin.pinot.common.response.ServerInstance)2 MSelectionOrderByOperator (com.linkedin.pinot.core.operator.query.MSelectionOrderByOperator)2 AggregationFunction (com.linkedin.pinot.core.query.aggregation.function.AggregationFunction)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Queue (java.util.Queue)2