use of org.apache.carbondata.core.datastore.chunk.DimensionColumnPage in project carbondata by apache.
the class BlockletFilterScanner method executeFilter.
/**
* This method will process the data in below order
* 1. first apply min max on the filter tree and check whether any of the filter
* is fall on the range of min max, if not then return empty result
* 2. If filter falls on min max range then apply filter on actual
* data and get the filtered row index
* 3. if row index is empty then return the empty result
* 4. if row indexes is not empty then read only those blocks(measure or dimension)
* which was present in the query but not present in the filter, as while applying filter
* some of the blocks where already read and present in chunk holder so not need to
* read those blocks again, this is to avoid reading of same blocks which was already read
* 5. Set the blocks and filter indexes to result
*
* @param rawBlockletColumnChunks
* @throws FilterUnsupportedException
*/
private BlockletScannedResult executeFilter(RawBlockletColumnChunks rawBlockletColumnChunks) throws FilterUnsupportedException, IOException {
long startTime = System.currentTimeMillis();
QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, totalBlockletStatistic.getCount() + 1);
// set the indexed data if it has any during fgdatamap pruning.
rawBlockletColumnChunks.setBitSetGroup(rawBlockletColumnChunks.getDataBlock().getIndexedData());
// apply filter on actual data, for each page
BitSetGroup bitSetGroup = this.filterExecuter.applyFilter(rawBlockletColumnChunks, useBitSetPipeLine);
// if filter result is empty then return with empty result
if (bitSetGroup.isEmpty()) {
CarbonUtil.freeMemory(rawBlockletColumnChunks.getDimensionRawColumnChunks(), rawBlockletColumnChunks.getMeasureRawColumnChunks());
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime));
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
return createEmptyResult();
}
BlockletScannedResult scannedResult = new FilterQueryScannedResult(blockExecutionInfo);
scannedResult.setBlockletId(blockExecutionInfo.getBlockIdString() + CarbonCommonConstants.FILE_SEPARATOR + rawBlockletColumnChunks.getDataBlock().blockletIndex());
// valid scanned blocklet
QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
validScannedBlockletStatistic.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, validScannedBlockletStatistic.getCount() + 1);
// adding statistics for valid number of pages
QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, validPages.getCount() + bitSetGroup.getValidPages());
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
int[] pageFilteredRowCount = new int[bitSetGroup.getNumberOfPages()];
// get the row indexes from bit set for each page
int[][] pageFilteredRowId = new int[bitSetGroup.getNumberOfPages()][];
int numPages = pageFilteredRowId.length;
for (int pageId = 0; pageId < numPages; pageId++) {
BitSet bitSet = bitSetGroup.getBitSet(pageId);
if (bitSet != null && !bitSet.isEmpty()) {
int[] matchedRowId = new int[bitSet.cardinality()];
int index = 0;
for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
matchedRowId[index++] = i;
}
pageFilteredRowCount[pageId] = matchedRowId.length;
pageFilteredRowId[pageId] = matchedRowId;
}
}
long dimensionReadTime = System.currentTimeMillis();
dimensionReadTime = System.currentTimeMillis() - dimensionReadTime;
FileReader fileReader = rawBlockletColumnChunks.getFileReader();
DimensionRawColumnChunk[] dimensionRawColumnChunks = new DimensionRawColumnChunk[blockExecutionInfo.getTotalNumberDimensionToRead()];
int numDimensionChunks = dimensionRawColumnChunks.length;
// read dimension chunk blocks from file which is not present
for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) {
dimensionRawColumnChunks[chunkIndex] = rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex];
}
int[][] allSelectedDimensionColumnIndexRange = blockExecutionInfo.getAllSelectedDimensionColumnIndexRange();
DimensionRawColumnChunk[] projectionListDimensionChunk = rawBlockletColumnChunks.getDataBlock().readDimensionChunks(fileReader, allSelectedDimensionColumnIndexRange);
for (int[] columnIndexRange : allSelectedDimensionColumnIndexRange) {
System.arraycopy(projectionListDimensionChunk, columnIndexRange[0], dimensionRawColumnChunks, columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]);
}
/*
* in case projection if the projected dimension are not loaded in the dimensionColumnDataChunk
* then loading them
*/
int[] projectionListDimensionIndexes = blockExecutionInfo.getProjectionListDimensionIndexes();
for (int projectionListDimensionIndex : projectionListDimensionIndexes) {
if (null == dimensionRawColumnChunks[projectionListDimensionIndex]) {
dimensionRawColumnChunks[projectionListDimensionIndex] = rawBlockletColumnChunks.getDataBlock().readDimensionChunk(fileReader, projectionListDimensionIndex);
}
}
DimensionColumnPage[][] dimensionColumnPages = new DimensionColumnPage[numDimensionChunks][numPages];
for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) {
if (dimensionRawColumnChunks[chunkIndex] != null) {
for (int pageId = 0; pageId < numPages; pageId++) {
dimensionColumnPages[chunkIndex][pageId] = dimensionRawColumnChunks[chunkIndex].decodeColumnPage(pageId);
}
}
}
MeasureRawColumnChunk[] measureRawColumnChunks = new MeasureRawColumnChunk[blockExecutionInfo.getTotalNumberOfMeasureToRead()];
int numMeasureChunks = measureRawColumnChunks.length;
// read the measure chunk blocks which is not present
for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) {
if (null != rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) {
measureRawColumnChunks[chunkIndex] = rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex];
}
}
int[][] allSelectedMeasureColumnIndexRange = blockExecutionInfo.getAllSelectedMeasureIndexRange();
MeasureRawColumnChunk[] projectionListMeasureChunk = rawBlockletColumnChunks.getDataBlock().readMeasureChunks(fileReader, allSelectedMeasureColumnIndexRange);
for (int[] columnIndexRange : allSelectedMeasureColumnIndexRange) {
System.arraycopy(projectionListMeasureChunk, columnIndexRange[0], measureRawColumnChunks, columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]);
}
/*
* in case projection if the projected measure are not loaded in the ColumnPage
* then loading them
*/
int[] projectionListMeasureIndexes = blockExecutionInfo.getProjectionListMeasureIndexes();
for (int projectionListMeasureIndex : projectionListMeasureIndexes) {
if (null == measureRawColumnChunks[projectionListMeasureIndex]) {
measureRawColumnChunks[projectionListMeasureIndex] = rawBlockletColumnChunks.getDataBlock().readMeasureChunk(fileReader, projectionListMeasureIndex);
}
}
ColumnPage[][] measureColumnPages = new ColumnPage[numMeasureChunks][numPages];
for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) {
if (measureRawColumnChunks[chunkIndex] != null) {
for (int pageId = 0; pageId < numPages; pageId++) {
measureColumnPages[chunkIndex][pageId] = measureRawColumnChunks[chunkIndex].decodeColumnPage(pageId);
}
}
}
scannedResult.setDimensionColumnPages(dimensionColumnPages);
scannedResult.setPageFilteredRowId(pageFilteredRowId);
scannedResult.setMeasureColumnPages(measureColumnPages);
scannedResult.setDimRawColumnChunks(dimensionRawColumnChunks);
scannedResult.setMsrRawColumnChunks(measureRawColumnChunks);
scannedResult.setPageFilteredRowCount(pageFilteredRowCount);
// adding statistics for carbon scan time
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime - dimensionReadTime));
QueryStatistic readTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.READ_BLOCKlET_TIME);
readTime.addCountStatistic(QueryStatisticsConstants.READ_BLOCKlET_TIME, readTime.getCount() + dimensionReadTime);
return scannedResult;
}
use of org.apache.carbondata.core.datastore.chunk.DimensionColumnPage in project carbondata by apache.
the class BlockletFullScanner method scanBlocklet.
@Override
public BlockletScannedResult scanBlocklet(RawBlockletColumnChunks rawBlockletColumnChunks) throws IOException, FilterUnsupportedException {
long startTime = System.currentTimeMillis();
BlockletScannedResult scannedResult = new NonFilterQueryScannedResult(blockExecutionInfo);
QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, totalBlockletStatistic.getCount() + 1);
QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
validScannedBlockletStatistic.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, validScannedBlockletStatistic.getCount() + 1);
// adding statistics for valid number of pages
QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, validPages.getCount() + rawBlockletColumnChunks.getDataBlock().numberOfPages());
// adding statistics for number of pages
QueryStatistic totalPagesScanned = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_PAGE_SCANNED);
totalPagesScanned.addCountStatistic(QueryStatisticsConstants.TOTAL_PAGE_SCANNED, totalPagesScanned.getCount() + rawBlockletColumnChunks.getDataBlock().numberOfPages());
scannedResult.setBlockletId(blockExecutionInfo.getBlockIdString() + CarbonCommonConstants.FILE_SEPARATOR + rawBlockletColumnChunks.getDataBlock().blockletIndex());
if (!blockExecutionInfo.isPrefetchBlocklet()) {
readBlocklet(rawBlockletColumnChunks);
}
DimensionRawColumnChunk[] dimensionRawColumnChunks = rawBlockletColumnChunks.getDimensionRawColumnChunks();
DimensionColumnPage[][] dimensionColumnDataChunks = new DimensionColumnPage[dimensionRawColumnChunks.length][rawBlockletColumnChunks.getDataBlock().numberOfPages()];
MeasureRawColumnChunk[] measureRawColumnChunks = rawBlockletColumnChunks.getMeasureRawColumnChunks();
ColumnPage[][] measureColumnPages = new ColumnPage[measureRawColumnChunks.length][rawBlockletColumnChunks.getDataBlock().numberOfPages()];
scannedResult.setDimensionColumnPages(dimensionColumnDataChunks);
scannedResult.setMeasureColumnPages(measureColumnPages);
scannedResult.setDimRawColumnChunks(dimensionRawColumnChunks);
scannedResult.setMsrRawColumnChunks(measureRawColumnChunks);
if (blockExecutionInfo.isPrefetchBlocklet()) {
for (int i = 0; i < dimensionRawColumnChunks.length; i++) {
if (dimensionRawColumnChunks[i] != null) {
dimensionColumnDataChunks[i] = dimensionRawColumnChunks[i].decodeAllColumnPages();
}
}
for (int i = 0; i < measureRawColumnChunks.length; i++) {
if (measureRawColumnChunks[i] != null) {
measureColumnPages[i] = measureRawColumnChunks[i].decodeAllColumnPages();
}
}
}
int[] numberOfRows = null;
if (blockExecutionInfo.getAllSelectedDimensionColumnIndexRange().length > 0) {
for (int i = 0; i < dimensionRawColumnChunks.length; i++) {
if (dimensionRawColumnChunks[i] != null) {
numberOfRows = dimensionRawColumnChunks[i].getRowCount();
break;
}
}
} else if (blockExecutionInfo.getAllSelectedMeasureIndexRange().length > 0) {
for (int i = 0; i < measureRawColumnChunks.length; i++) {
if (measureRawColumnChunks[i] != null) {
numberOfRows = measureRawColumnChunks[i].getRowCount();
break;
}
}
}
// count(*) case there would not be any dimensions are measures selected.
if (numberOfRows == null) {
numberOfRows = new int[rawBlockletColumnChunks.getDataBlock().numberOfPages()];
for (int i = 0; i < numberOfRows.length; i++) {
numberOfRows[i] = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
}
int lastPageSize = rawBlockletColumnChunks.getDataBlock().numRows() % CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
;
if (lastPageSize > 0) {
numberOfRows[numberOfRows.length - 1] = lastPageSize;
}
}
scannedResult.setPageFilteredRowCount(numberOfRows);
if (!blockExecutionInfo.isPrefetchBlocklet()) {
scannedResult.fillDataChunks();
}
// adding statistics for carbon scan time
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime));
return scannedResult;
}
use of org.apache.carbondata.core.datastore.chunk.DimensionColumnPage in project carbondata by apache.
the class RowLevelFilterExecuterImpl method createRow.
/**
* Method will read the members of particular dimension block and create
* a row instance for further processing of the filters
*
* @param blockChunkHolder
* @param row
* @param index
* @throws IOException
*/
private void createRow(RawBlockletColumnChunks blockChunkHolder, RowIntf row, int pageIndex, int index) throws IOException {
Object[] record = new Object[dimColEvaluatorInfoList.size() + msrColEvalutorInfoList.size()];
String memberString;
for (int i = 0; i < dimColEvaluatorInfoList.size(); i++) {
DimColumnResolvedFilterInfo dimColumnEvaluatorInfo = dimColEvaluatorInfoList.get(i);
// if filter dimension is not present in the current add its default value
if (!isDimensionPresentInCurrentBlock[i]) {
// fill default value here
record[dimColumnEvaluatorInfo.getRowIndex()] = getDimensionDefaultValue(dimColumnEvaluatorInfo);
continue;
}
if (!dimColumnEvaluatorInfo.getDimension().getDataType().isComplexType()) {
if (!dimColumnEvaluatorInfo.isDimensionExistsInCurrentSilce()) {
record[dimColumnEvaluatorInfo.getRowIndex()] = dimColumnEvaluatorInfo.getDimension().getDefaultValue();
}
DimensionColumnPage columnDataChunk = blockChunkHolder.getDimensionRawColumnChunks()[dimensionChunkIndex[i]].decodeColumnPage(pageIndex);
if (!dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && columnDataChunk instanceof VariableLengthDimensionColumnPage) {
VariableLengthDimensionColumnPage dimensionColumnDataChunk = (VariableLengthDimensionColumnPage) columnDataChunk;
byte[] memberBytes = dimensionColumnDataChunk.getChunkData(index);
if (null != memberBytes) {
if (Arrays.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, memberBytes)) {
memberBytes = null;
} else if (memberBytes.length == 0) {
memberBytes = null;
}
record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(memberBytes, dimColumnEvaluatorInfo.getDimension().getDataType());
}
} else {
int dictionaryValue = readSurrogatesFromColumnChunk(blockChunkHolder, index, pageIndex, dimColumnEvaluatorInfo, dimensionChunkIndex[i]);
if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && !dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
memberString = getFilterActualValueFromDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataType(memberString, dimColumnEvaluatorInfo.getDimension().getDataType());
} else if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
Object member = getFilterActualValueFromDirectDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
record[dimColumnEvaluatorInfo.getRowIndex()] = member;
}
}
} else {
try {
GenericQueryType complexType = complexDimensionInfoMap.get(dimensionChunkIndex[i]);
ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
DataOutputStream dataOutputStream = new DataOutputStream(byteStream);
complexType.parseBlocksAndReturnComplexColumnByteArray(blockChunkHolder.getDimensionRawColumnChunks(), index, pageIndex, dataOutputStream);
record[dimColumnEvaluatorInfo.getRowIndex()] = complexType.getDataBasedOnDataTypeFromSurrogates(ByteBuffer.wrap(byteStream.toByteArray()));
byteStream.close();
} catch (IOException e) {
LOGGER.info(e.getMessage());
}
}
}
DataType msrType;
for (int i = 0; i < msrColEvalutorInfoList.size(); i++) {
MeasureColumnResolvedFilterInfo msrColumnEvalutorInfo = msrColEvalutorInfoList.get(i);
DataType dataType = msrColumnEvalutorInfo.getType();
if (dataType == DataTypes.BOOLEAN) {
msrType = DataTypes.BOOLEAN;
} else if (dataType == DataTypes.SHORT) {
msrType = DataTypes.SHORT;
} else if (dataType == DataTypes.INT) {
msrType = DataTypes.INT;
} else if (dataType == DataTypes.LONG) {
msrType = DataTypes.LONG;
} else if (DataTypes.isDecimal(dataType)) {
msrType = DataTypes.createDefaultDecimalType();
} else {
msrType = DataTypes.DOUBLE;
}
// in the current block measure list
if (!isMeasurePresentInCurrentBlock[i]) {
byte[] defaultValue = msrColumnEvalutorInfo.getCarbonColumn().getDefaultValue();
record[msrColumnEvalutorInfo.getRowIndex()] = RestructureUtil.getMeasureDefaultValue(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema(), defaultValue);
continue;
}
Object msrValue;
ColumnPage columnPage = blockChunkHolder.getMeasureRawColumnChunks()[measureChunkIndex[0]].decodeColumnPage(pageIndex);
if (msrType == DataTypes.BOOLEAN) {
msrValue = columnPage.getBoolean(index);
} else if (msrType == DataTypes.SHORT) {
msrValue = (short) columnPage.getLong(index);
} else if (msrType == DataTypes.INT) {
msrValue = (int) columnPage.getLong(index);
} else if (msrType == DataTypes.LONG) {
msrValue = columnPage.getLong(index);
} else if (DataTypes.isDecimal(msrType)) {
BigDecimal bigDecimalValue = columnPage.getDecimal(index);
if (null != bigDecimalValue && msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale() > bigDecimalValue.scale()) {
bigDecimalValue = bigDecimalValue.setScale(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale(), RoundingMode.HALF_UP);
}
msrValue = bigDecimalValue;
} else {
msrValue = columnPage.getDouble(index);
}
record[msrColumnEvalutorInfo.getRowIndex()] = columnPage.getNullBits().get(index) ? null : msrValue;
}
row.setValues(record);
}
Aggregations