Search in sources :

Example 1 with ProjectionMeasure

use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.

the class AbstractQueryExecutor method initQuery.

/**
 * Below method will be used to fill the executor properties based on query
 * model it will parse the query model and get the detail and fill it in
 * query properties
 *
 * @param queryModel
 */
protected void initQuery(QueryModel queryModel) throws IOException {
    StandardLogService.setThreadName(StandardLogService.getPartitionID(queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName()), queryModel.getQueryId());
    LOGGER.info("Query will be executed on table: " + queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName());
    // add executor service for query execution
    queryProperties.executorService = Executors.newCachedThreadPool();
    // Initializing statistics list to record the query statistics
    // creating copy on write to handle concurrent scenario
    queryProperties.queryStatisticsRecorder = CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId());
    queryModel.setStatisticsRecorder(queryProperties.queryStatisticsRecorder);
    QueryStatistic queryStatistic = new QueryStatistic();
    // sort the block info
    // so block will be loaded in sorted order this will be required for
    // query execution
    Collections.sort(queryModel.getTableBlockInfos());
    if (queryModel.getTableBlockInfos().get(0).getDetailInfo() != null) {
        List<AbstractIndex> indexList = new ArrayList<>();
        Map<String, List<TableBlockInfo>> listMap = new LinkedHashMap<>();
        for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) {
            List<TableBlockInfo> tableBlockInfos = listMap.get(blockInfo.getFilePath());
            if (tableBlockInfos == null) {
                tableBlockInfos = new ArrayList<>();
                listMap.put(blockInfo.getFilePath(), tableBlockInfos);
            }
            BlockletDetailInfo blockletDetailInfo = blockInfo.getDetailInfo();
            // the blocklet information from block file
            if (blockletDetailInfo.getBlockletInfo() == null) {
                readAndFillBlockletInfo(blockInfo, tableBlockInfos, blockletDetailInfo);
            } else {
                tableBlockInfos.add(blockInfo);
            }
        }
        for (List<TableBlockInfo> tableBlockInfos : listMap.values()) {
            indexList.add(new IndexWrapper(tableBlockInfos));
        }
        queryProperties.dataBlocks = indexList;
    } else {
        // get the table blocks
        CacheProvider cacheProvider = CacheProvider.getInstance();
        BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex> cache = (BlockIndexStore) cacheProvider.createCache(CacheType.EXECUTOR_BTREE);
        // remove the invalid table blocks, block which is deleted or compacted
        cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), queryModel.getAbsoluteTableIdentifier());
        List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers = prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), queryModel.getAbsoluteTableIdentifier());
        cache.removeTableBlocksIfHorizontalCompactionDone(queryModel);
        queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers);
    }
    queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis());
    queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
    // calculating the total number of aggeragted columns
    int measureCount = queryModel.getProjectionMeasures().size();
    int currentIndex = 0;
    DataType[] dataTypes = new DataType[measureCount];
    for (ProjectionMeasure carbonMeasure : queryModel.getProjectionMeasures()) {
        // adding the data type and aggregation type of all the measure this
        // can be used
        // to select the aggregator
        dataTypes[currentIndex] = carbonMeasure.getMeasure().getDataType();
        currentIndex++;
    }
    queryProperties.measureDataTypes = dataTypes;
    // as aggregation will be executed in following order
    // 1.aggregate dimension expression
    // 2. expression
    // 3. query measure
    // so calculating the index of the expression start index
    // and measure column start index
    queryProperties.filterMeasures = new HashSet<>();
    queryProperties.complexFilterDimension = new HashSet<>();
    QueryUtil.getAllFilterDimensions(queryModel.getFilterExpressionResolverTree(), queryProperties.complexFilterDimension, queryProperties.filterMeasures);
    CarbonTable carbonTable = queryModel.getTable();
    TableProvider tableProvider = new SingleTableProvider(carbonTable);
    queryStatistic = new QueryStatistic();
    // dictionary column unique column id to dictionary mapping
    // which will be used to get column actual data
    queryProperties.columnToDictionaryMapping = QueryUtil.getDimensionDictionaryDetail(queryModel.getProjectionDimensions(), queryProperties.complexFilterDimension, queryModel.getAbsoluteTableIdentifier(), tableProvider);
    queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_DICTIONARY, System.currentTimeMillis());
    queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
    queryModel.setColumnToDictionaryMapping(queryProperties.columnToDictionaryMapping);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) BlockIndexStore(org.apache.carbondata.core.datastore.BlockIndexStore) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) TableBlockUniqueIdentifier(org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier) DataType(org.apache.carbondata.core.metadata.datatype.DataType) List(java.util.List) ArrayList(java.util.ArrayList) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic) TableProvider(org.apache.carbondata.core.scan.filter.TableProvider) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) CacheProvider(org.apache.carbondata.core.cache.CacheProvider) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) IndexWrapper(org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper)

Example 2 with ProjectionMeasure

use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.

the class RestructureBasedDictionaryResultCollector method fillMeasureData.

protected void fillMeasureData(Object[] msrValues, int offset, BlockletScannedResult scannedResult) {
    int measureExistIndex = 0;
    for (short i = 0; i < measureInfo.getMeasureDataTypes().length; i++) {
        // data chunk to the collector
        if (measureInfo.getMeasureExists()[i]) {
            ProjectionMeasure queryMeasure = executionInfo.getProjectionMeasures()[measureExistIndex];
            msrValues[i + offset] = getMeasureData(scannedResult.getMeasureChunk(measureInfo.getMeasureOrdinals()[measureExistIndex]), scannedResult.getCurrentRowId(), queryMeasure.getMeasure());
            measureExistIndex++;
        } else if (DataTypes.isDecimal(measureInfo.getMeasureDataTypes()[i])) {
            // if not then get the default value
            msrValues[i + offset] = DataTypeUtil.getDataTypeConverter().convertFromBigDecimalToDecimal(measureDefaultValues[i]);
        } else {
            msrValues[i + offset] = measureDefaultValues[i];
        }
    }
}
Also used : ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure)

Example 3 with ProjectionMeasure

use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.

the class RestructureUtilTest method testToGetAggregatorInfos.

@Test
public void testToGetAggregatorInfos() {
    ColumnSchema columnSchema1 = new ColumnSchema();
    columnSchema1.setColumnName("Id");
    columnSchema1.setDataType(DataTypes.STRING);
    columnSchema1.setColumnUniqueId(UUID.randomUUID().toString());
    ColumnSchema columnSchema2 = new ColumnSchema();
    columnSchema2.setColumnName("Name");
    columnSchema2.setDataType(DataTypes.STRING);
    columnSchema2.setColumnUniqueId(UUID.randomUUID().toString());
    ColumnSchema columnSchema3 = new ColumnSchema();
    columnSchema3.setColumnName("Age");
    columnSchema3.setDataType(DataTypes.STRING);
    columnSchema3.setColumnUniqueId(UUID.randomUUID().toString());
    CarbonMeasure carbonMeasure1 = new CarbonMeasure(columnSchema1, 1);
    CarbonMeasure carbonMeasure2 = new CarbonMeasure(columnSchema2, 2);
    CarbonMeasure carbonMeasure3 = new CarbonMeasure(columnSchema3, 3);
    carbonMeasure3.getColumnSchema().setDefaultValue("3".getBytes());
    List<CarbonMeasure> currentBlockMeasures = Arrays.asList(carbonMeasure1, carbonMeasure2);
    ProjectionMeasure queryMeasure1 = new ProjectionMeasure(carbonMeasure1);
    ProjectionMeasure queryMeasure2 = new ProjectionMeasure(carbonMeasure2);
    ProjectionMeasure queryMeasure3 = new ProjectionMeasure(carbonMeasure3);
    List<ProjectionMeasure> queryMeasures = Arrays.asList(queryMeasure1, queryMeasure2, queryMeasure3);
    BlockExecutionInfo blockExecutionInfo = new BlockExecutionInfo();
    RestructureUtil.createMeasureInfoAndGetCurrentBlockQueryMeasures(blockExecutionInfo, queryMeasures, currentBlockMeasures);
    MeasureInfo measureInfo = blockExecutionInfo.getMeasureInfo();
    boolean[] measuresExist = { true, true, false };
    assertThat(measureInfo.getMeasureExists(), is(equalTo(measuresExist)));
    Object[] defaultValues = { null, null, 3.0 };
    assertThat(measureInfo.getDefaultValues(), is(equalTo(defaultValues)));
}
Also used : MeasureInfo(org.apache.carbondata.core.scan.executor.infos.MeasureInfo) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockExecutionInfo(org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo) Test(org.junit.Test)

Example 4 with ProjectionMeasure

use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.

the class VectorizedCarbonRecordReader method initBatch.

/**
 * Returns the ColumnarBatch object that will be used for all rows returned by this reader.
 * This object is reused. Calling this enables the vectorized reader. This should be called
 * before any calls to nextKeyValue/nextBatch.
 */
private void initBatch(MemoryMode memMode) {
    List<ProjectionDimension> queryDimension = queryModel.getProjectionDimensions();
    List<ProjectionMeasure> queryMeasures = queryModel.getProjectionMeasures();
    StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
    for (int i = 0; i < queryDimension.size(); i++) {
        ProjectionDimension dim = queryDimension.get(i);
        if (dim.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dim.getDimension().getDataType());
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(generator.getReturnType()), true, null);
        } else if (!dim.getDimension().hasEncoding(Encoding.DICTIONARY)) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
        } else if (dim.getDimension().isComplex()) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
        } else {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataTypes.INT), true, null);
        }
    }
    for (int i = 0; i < queryMeasures.size(); i++) {
        ProjectionMeasure msr = queryMeasures.get(i);
        DataType dataType = msr.getMeasure().getDataType();
        if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(msr.getMeasure().getDataType()), true, null);
        } else if (DataTypes.isDecimal(dataType)) {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), new DecimalType(msr.getMeasure().getPrecision(), msr.getMeasure().getScale()), true, null);
        } else {
            fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataTypes.DOUBLE), true, null);
        }
    }
    columnarBatch = ColumnarBatch.allocate(new StructType(fields), memMode);
    CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
    boolean[] filteredRows = new boolean[columnarBatch.capacity()];
    for (int i = 0; i < fields.length; i++) {
        vectors[i] = new ColumnarVectorWrapper(columnarBatch.column(i), filteredRows);
    }
    carbonColumnarBatch = new CarbonColumnarBatch(vectors, columnarBatch.capacity(), filteredRows);
}
Also used : StructType(org.apache.spark.sql.types.StructType) CarbonColumnarBatch(org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector) ProjectionDimension(org.apache.carbondata.core.scan.model.ProjectionDimension) StructField(org.apache.spark.sql.types.StructField) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DecimalType(org.apache.spark.sql.types.DecimalType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)

Example 5 with ProjectionMeasure

use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.

the class RestructureUtilTest method testToGetUpdatedQueryDimension.

@Test
public void testToGetUpdatedQueryDimension() {
    BlockExecutionInfo blockExecutionInfo = new BlockExecutionInfo();
    List<Encoding> encodingList = new ArrayList<Encoding>();
    encodingList.add(Encoding.DICTIONARY);
    ColumnSchema columnSchema1 = new ColumnSchema();
    columnSchema1.setColumnName("Id");
    columnSchema1.setDataType(DataTypes.STRING);
    columnSchema1.setColumnUniqueId(UUID.randomUUID().toString());
    columnSchema1.setEncodingList(encodingList);
    ColumnSchema columnSchema2 = new ColumnSchema();
    columnSchema2.setColumnName("Name");
    columnSchema2.setDataType(DataTypes.STRING);
    columnSchema2.setColumnUniqueId(UUID.randomUUID().toString());
    columnSchema2.setEncodingList(encodingList);
    ColumnSchema columnSchema3 = new ColumnSchema();
    columnSchema3.setColumnName("Age");
    columnSchema3.setDataType(DataTypes.INT);
    columnSchema3.setColumnUniqueId(UUID.randomUUID().toString());
    columnSchema3.setEncodingList(encodingList);
    ColumnSchema columnSchema4 = new ColumnSchema();
    columnSchema4.setColumnName("Salary");
    columnSchema4.setDataType(DataTypes.INT);
    columnSchema4.setColumnUniqueId(UUID.randomUUID().toString());
    columnSchema4.setEncodingList(encodingList);
    ColumnSchema columnSchema5 = new ColumnSchema();
    columnSchema5.setColumnName("Address");
    columnSchema5.setDataType(DataTypes.STRING);
    columnSchema5.setColumnUniqueId(UUID.randomUUID().toString());
    columnSchema5.setEncodingList(encodingList);
    CarbonDimension tableBlockDimension1 = new CarbonDimension(columnSchema1, 1, 1, 1, 1);
    CarbonDimension tableBlockDimension2 = new CarbonDimension(columnSchema2, 5, 5, 5, 5);
    List<CarbonDimension> tableBlockDimensions = Arrays.asList(tableBlockDimension1, tableBlockDimension2);
    CarbonDimension tableComplexDimension1 = new CarbonDimension(columnSchema3, 4, 4, 4, 4);
    CarbonDimension tableComplexDimension2 = new CarbonDimension(columnSchema4, 2, 2, 2, 2);
    List<CarbonDimension> tableComplexDimensions = Arrays.asList(tableComplexDimension1, tableComplexDimension2);
    ProjectionDimension queryDimension1 = new ProjectionDimension(tableBlockDimension1);
    ProjectionDimension queryDimension2 = new ProjectionDimension(tableComplexDimension2);
    ProjectionDimension queryDimension3 = new ProjectionDimension(new CarbonDimension(columnSchema5, 3, 3, 3, 3));
    ProjectionMeasure queryMeasure1 = new ProjectionMeasure(new CarbonMeasure(columnSchema3, 2));
    ProjectionMeasure queryMeasure2 = new ProjectionMeasure(new CarbonMeasure(columnSchema4, 4));
    List<ProjectionMeasure> queryMeasures = Arrays.asList(queryMeasure1, queryMeasure2);
    List<ProjectionDimension> queryDimensions = Arrays.asList(queryDimension1, queryDimension2, queryDimension3);
    List<ProjectionDimension> result = null;
    result = RestructureUtil.createDimensionInfoAndGetCurrentBlockQueryDimension(blockExecutionInfo, queryDimensions, tableBlockDimensions, tableComplexDimensions, queryMeasures.size());
    List<CarbonDimension> resultDimension = new ArrayList<>(result.size());
    for (ProjectionDimension queryDimension : result) {
        resultDimension.add(queryDimension.getDimension());
    }
    assertThat(resultDimension, is(equalTo(Arrays.asList(queryDimension1.getDimension(), queryDimension2.getDimension()))));
}
Also used : CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.core.metadata.encoder.Encoding) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockExecutionInfo(org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo) ProjectionDimension(org.apache.carbondata.core.scan.model.ProjectionDimension) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) Test(org.junit.Test)

Aggregations

ProjectionMeasure (org.apache.carbondata.core.scan.model.ProjectionMeasure)9 ArrayList (java.util.ArrayList)4 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)4 ProjectionDimension (org.apache.carbondata.core.scan.model.ProjectionDimension)4 DataType (org.apache.carbondata.core.metadata.datatype.DataType)3 BlockExecutionInfo (org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo)3 DirectDictionaryGenerator (org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)2 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 MeasureInfo (org.apache.carbondata.core.scan.executor.infos.MeasureInfo)2 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)2 CarbonColumnarBatch (org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch)2 Test (org.junit.Test)2 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 CacheProvider (org.apache.carbondata.core.cache.CacheProvider)1 BlockIndexStore (org.apache.carbondata.core.datastore.BlockIndexStore)1 IndexKey (org.apache.carbondata.core.datastore.IndexKey)1