use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.
the class AbstractQueryExecutor method initQuery.
/**
* Below method will be used to fill the executor properties based on query
* model it will parse the query model and get the detail and fill it in
* query properties
*
* @param queryModel
*/
protected void initQuery(QueryModel queryModel) throws IOException {
StandardLogService.setThreadName(StandardLogService.getPartitionID(queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName()), queryModel.getQueryId());
LOGGER.info("Query will be executed on table: " + queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName());
// add executor service for query execution
queryProperties.executorService = Executors.newCachedThreadPool();
// Initializing statistics list to record the query statistics
// creating copy on write to handle concurrent scenario
queryProperties.queryStatisticsRecorder = CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId());
queryModel.setStatisticsRecorder(queryProperties.queryStatisticsRecorder);
QueryStatistic queryStatistic = new QueryStatistic();
// sort the block info
// so block will be loaded in sorted order this will be required for
// query execution
Collections.sort(queryModel.getTableBlockInfos());
if (queryModel.getTableBlockInfos().get(0).getDetailInfo() != null) {
List<AbstractIndex> indexList = new ArrayList<>();
Map<String, List<TableBlockInfo>> listMap = new LinkedHashMap<>();
for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) {
List<TableBlockInfo> tableBlockInfos = listMap.get(blockInfo.getFilePath());
if (tableBlockInfos == null) {
tableBlockInfos = new ArrayList<>();
listMap.put(blockInfo.getFilePath(), tableBlockInfos);
}
BlockletDetailInfo blockletDetailInfo = blockInfo.getDetailInfo();
// the blocklet information from block file
if (blockletDetailInfo.getBlockletInfo() == null) {
readAndFillBlockletInfo(blockInfo, tableBlockInfos, blockletDetailInfo);
} else {
tableBlockInfos.add(blockInfo);
}
}
for (List<TableBlockInfo> tableBlockInfos : listMap.values()) {
indexList.add(new IndexWrapper(tableBlockInfos));
}
queryProperties.dataBlocks = indexList;
} else {
// get the table blocks
CacheProvider cacheProvider = CacheProvider.getInstance();
BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex> cache = (BlockIndexStore) cacheProvider.createCache(CacheType.EXECUTOR_BTREE);
// remove the invalid table blocks, block which is deleted or compacted
cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), queryModel.getAbsoluteTableIdentifier());
List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers = prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), queryModel.getAbsoluteTableIdentifier());
cache.removeTableBlocksIfHorizontalCompactionDone(queryModel);
queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers);
}
queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis());
queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
// calculating the total number of aggeragted columns
int measureCount = queryModel.getProjectionMeasures().size();
int currentIndex = 0;
DataType[] dataTypes = new DataType[measureCount];
for (ProjectionMeasure carbonMeasure : queryModel.getProjectionMeasures()) {
// adding the data type and aggregation type of all the measure this
// can be used
// to select the aggregator
dataTypes[currentIndex] = carbonMeasure.getMeasure().getDataType();
currentIndex++;
}
queryProperties.measureDataTypes = dataTypes;
// as aggregation will be executed in following order
// 1.aggregate dimension expression
// 2. expression
// 3. query measure
// so calculating the index of the expression start index
// and measure column start index
queryProperties.filterMeasures = new HashSet<>();
queryProperties.complexFilterDimension = new HashSet<>();
QueryUtil.getAllFilterDimensions(queryModel.getFilterExpressionResolverTree(), queryProperties.complexFilterDimension, queryProperties.filterMeasures);
CarbonTable carbonTable = queryModel.getTable();
TableProvider tableProvider = new SingleTableProvider(carbonTable);
queryStatistic = new QueryStatistic();
// dictionary column unique column id to dictionary mapping
// which will be used to get column actual data
queryProperties.columnToDictionaryMapping = QueryUtil.getDimensionDictionaryDetail(queryModel.getProjectionDimensions(), queryProperties.complexFilterDimension, queryModel.getAbsoluteTableIdentifier(), tableProvider);
queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_DICTIONARY, System.currentTimeMillis());
queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
queryModel.setColumnToDictionaryMapping(queryProperties.columnToDictionaryMapping);
}
use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.
the class RestructureBasedDictionaryResultCollector method fillMeasureData.
protected void fillMeasureData(Object[] msrValues, int offset, BlockletScannedResult scannedResult) {
int measureExistIndex = 0;
for (short i = 0; i < measureInfo.getMeasureDataTypes().length; i++) {
// data chunk to the collector
if (measureInfo.getMeasureExists()[i]) {
ProjectionMeasure queryMeasure = executionInfo.getProjectionMeasures()[measureExistIndex];
msrValues[i + offset] = getMeasureData(scannedResult.getMeasureChunk(measureInfo.getMeasureOrdinals()[measureExistIndex]), scannedResult.getCurrentRowId(), queryMeasure.getMeasure());
measureExistIndex++;
} else if (DataTypes.isDecimal(measureInfo.getMeasureDataTypes()[i])) {
// if not then get the default value
msrValues[i + offset] = DataTypeUtil.getDataTypeConverter().convertFromBigDecimalToDecimal(measureDefaultValues[i]);
} else {
msrValues[i + offset] = measureDefaultValues[i];
}
}
}
use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.
the class RestructureUtilTest method testToGetAggregatorInfos.
@Test
public void testToGetAggregatorInfos() {
ColumnSchema columnSchema1 = new ColumnSchema();
columnSchema1.setColumnName("Id");
columnSchema1.setDataType(DataTypes.STRING);
columnSchema1.setColumnUniqueId(UUID.randomUUID().toString());
ColumnSchema columnSchema2 = new ColumnSchema();
columnSchema2.setColumnName("Name");
columnSchema2.setDataType(DataTypes.STRING);
columnSchema2.setColumnUniqueId(UUID.randomUUID().toString());
ColumnSchema columnSchema3 = new ColumnSchema();
columnSchema3.setColumnName("Age");
columnSchema3.setDataType(DataTypes.STRING);
columnSchema3.setColumnUniqueId(UUID.randomUUID().toString());
CarbonMeasure carbonMeasure1 = new CarbonMeasure(columnSchema1, 1);
CarbonMeasure carbonMeasure2 = new CarbonMeasure(columnSchema2, 2);
CarbonMeasure carbonMeasure3 = new CarbonMeasure(columnSchema3, 3);
carbonMeasure3.getColumnSchema().setDefaultValue("3".getBytes());
List<CarbonMeasure> currentBlockMeasures = Arrays.asList(carbonMeasure1, carbonMeasure2);
ProjectionMeasure queryMeasure1 = new ProjectionMeasure(carbonMeasure1);
ProjectionMeasure queryMeasure2 = new ProjectionMeasure(carbonMeasure2);
ProjectionMeasure queryMeasure3 = new ProjectionMeasure(carbonMeasure3);
List<ProjectionMeasure> queryMeasures = Arrays.asList(queryMeasure1, queryMeasure2, queryMeasure3);
BlockExecutionInfo blockExecutionInfo = new BlockExecutionInfo();
RestructureUtil.createMeasureInfoAndGetCurrentBlockQueryMeasures(blockExecutionInfo, queryMeasures, currentBlockMeasures);
MeasureInfo measureInfo = blockExecutionInfo.getMeasureInfo();
boolean[] measuresExist = { true, true, false };
assertThat(measureInfo.getMeasureExists(), is(equalTo(measuresExist)));
Object[] defaultValues = { null, null, 3.0 };
assertThat(measureInfo.getDefaultValues(), is(equalTo(defaultValues)));
}
use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.
the class VectorizedCarbonRecordReader method initBatch.
/**
* Returns the ColumnarBatch object that will be used for all rows returned by this reader.
* This object is reused. Calling this enables the vectorized reader. This should be called
* before any calls to nextKeyValue/nextBatch.
*/
private void initBatch(MemoryMode memMode) {
List<ProjectionDimension> queryDimension = queryModel.getProjectionDimensions();
List<ProjectionMeasure> queryMeasures = queryModel.getProjectionMeasures();
StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
for (int i = 0; i < queryDimension.size(); i++) {
ProjectionDimension dim = queryDimension.get(i);
if (dim.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dim.getDimension().getDataType());
fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(generator.getReturnType()), true, null);
} else if (!dim.getDimension().hasEncoding(Encoding.DICTIONARY)) {
fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
} else if (dim.getDimension().isComplex()) {
fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
} else {
fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataTypes.INT), true, null);
}
}
for (int i = 0; i < queryMeasures.size(); i++) {
ProjectionMeasure msr = queryMeasures.get(i);
DataType dataType = msr.getMeasure().getDataType();
if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(msr.getMeasure().getDataType()), true, null);
} else if (DataTypes.isDecimal(dataType)) {
fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), new DecimalType(msr.getMeasure().getPrecision(), msr.getMeasure().getScale()), true, null);
} else {
fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataTypes.DOUBLE), true, null);
}
}
columnarBatch = ColumnarBatch.allocate(new StructType(fields), memMode);
CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
boolean[] filteredRows = new boolean[columnarBatch.capacity()];
for (int i = 0; i < fields.length; i++) {
vectors[i] = new ColumnarVectorWrapper(columnarBatch.column(i), filteredRows);
}
carbonColumnarBatch = new CarbonColumnarBatch(vectors, columnarBatch.capacity(), filteredRows);
}
use of org.apache.carbondata.core.scan.model.ProjectionMeasure in project carbondata by apache.
the class RestructureUtilTest method testToGetUpdatedQueryDimension.
@Test
public void testToGetUpdatedQueryDimension() {
BlockExecutionInfo blockExecutionInfo = new BlockExecutionInfo();
List<Encoding> encodingList = new ArrayList<Encoding>();
encodingList.add(Encoding.DICTIONARY);
ColumnSchema columnSchema1 = new ColumnSchema();
columnSchema1.setColumnName("Id");
columnSchema1.setDataType(DataTypes.STRING);
columnSchema1.setColumnUniqueId(UUID.randomUUID().toString());
columnSchema1.setEncodingList(encodingList);
ColumnSchema columnSchema2 = new ColumnSchema();
columnSchema2.setColumnName("Name");
columnSchema2.setDataType(DataTypes.STRING);
columnSchema2.setColumnUniqueId(UUID.randomUUID().toString());
columnSchema2.setEncodingList(encodingList);
ColumnSchema columnSchema3 = new ColumnSchema();
columnSchema3.setColumnName("Age");
columnSchema3.setDataType(DataTypes.INT);
columnSchema3.setColumnUniqueId(UUID.randomUUID().toString());
columnSchema3.setEncodingList(encodingList);
ColumnSchema columnSchema4 = new ColumnSchema();
columnSchema4.setColumnName("Salary");
columnSchema4.setDataType(DataTypes.INT);
columnSchema4.setColumnUniqueId(UUID.randomUUID().toString());
columnSchema4.setEncodingList(encodingList);
ColumnSchema columnSchema5 = new ColumnSchema();
columnSchema5.setColumnName("Address");
columnSchema5.setDataType(DataTypes.STRING);
columnSchema5.setColumnUniqueId(UUID.randomUUID().toString());
columnSchema5.setEncodingList(encodingList);
CarbonDimension tableBlockDimension1 = new CarbonDimension(columnSchema1, 1, 1, 1, 1);
CarbonDimension tableBlockDimension2 = new CarbonDimension(columnSchema2, 5, 5, 5, 5);
List<CarbonDimension> tableBlockDimensions = Arrays.asList(tableBlockDimension1, tableBlockDimension2);
CarbonDimension tableComplexDimension1 = new CarbonDimension(columnSchema3, 4, 4, 4, 4);
CarbonDimension tableComplexDimension2 = new CarbonDimension(columnSchema4, 2, 2, 2, 2);
List<CarbonDimension> tableComplexDimensions = Arrays.asList(tableComplexDimension1, tableComplexDimension2);
ProjectionDimension queryDimension1 = new ProjectionDimension(tableBlockDimension1);
ProjectionDimension queryDimension2 = new ProjectionDimension(tableComplexDimension2);
ProjectionDimension queryDimension3 = new ProjectionDimension(new CarbonDimension(columnSchema5, 3, 3, 3, 3));
ProjectionMeasure queryMeasure1 = new ProjectionMeasure(new CarbonMeasure(columnSchema3, 2));
ProjectionMeasure queryMeasure2 = new ProjectionMeasure(new CarbonMeasure(columnSchema4, 4));
List<ProjectionMeasure> queryMeasures = Arrays.asList(queryMeasure1, queryMeasure2);
List<ProjectionDimension> queryDimensions = Arrays.asList(queryDimension1, queryDimension2, queryDimension3);
List<ProjectionDimension> result = null;
result = RestructureUtil.createDimensionInfoAndGetCurrentBlockQueryDimension(blockExecutionInfo, queryDimensions, tableBlockDimensions, tableComplexDimensions, queryMeasures.size());
List<CarbonDimension> resultDimension = new ArrayList<>(result.size());
for (ProjectionDimension queryDimension : result) {
resultDimension.add(queryDimension.getDimension());
}
assertThat(resultDimension, is(equalTo(Arrays.asList(queryDimension1.getDimension(), queryDimension2.getDimension()))));
}
Aggregations