use of org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk in project carbondata by apache.
the class RowLevelRangeLessThanFiterExecuterImpl method applyFilter.
@Override
public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws FilterUnsupportedException, IOException {
// select all rows if dimension does not exists in the current block
if (!isDimensionPresentInCurrentBlock[0]) {
int numberOfRows = blockChunkHolder.getDataBlock().nodeSize();
return FilterUtil.createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), numberOfRows, true);
}
int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(dimensionBlocksIndex[0]);
if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock().getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
}
DimensionRawColumnChunk rawColumnChunk = blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount());
for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) {
if (rawColumnChunk.getMinValues() != null) {
if (isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues)) {
BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), rawColumnChunk.getRowCount()[i]);
bitSetGroup.setBitSet(bitSet, i);
}
} else {
BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), rawColumnChunk.getRowCount()[i]);
bitSetGroup.setBitSet(bitSet, i);
}
}
return bitSetGroup;
}
use of org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method readRawDimensionChunksInGroup.
/**
* Below method will be used to read the multiple dimension column data in group
* and divide into dimension raw chunk object
* Steps for reading
* 1. Get the length of the data to be read
* 2. Allocate the direct buffer
* 3. read the data from file
* 4. Get the data chunk object from file for each column
* 5. Create the raw chunk object and fill the details for each column
* 6. increment the offset of the data
*
* @param fileReader
* reader which will be used to read the dimension columns data from file
* @param startBlockletColumnIndex
* blocklet index of the first dimension column
* @param endBlockletColumnIndex
* blocklet index of the last dimension column
* @ DimensionRawColumnChunk array
*/
protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileHolder fileReader, int startBlockletColumnIndex, int endBlockletColumnIndex) throws IOException {
// to calculate the length of the data to be read
// column we can subtract the offset of start column offset with
// end column+1 offset and get the total length.
long currentDimensionOffset = dimensionChunksOffset.get(startBlockletColumnIndex);
ByteBuffer buffer = null;
// read the data from carbon data file
synchronized (fileReader) {
buffer = fileReader.readByteBuffer(filePath, currentDimensionOffset, (int) (dimensionChunksOffset.get(endBlockletColumnIndex + 1) - currentDimensionOffset));
}
// create raw chunk for each dimension column
DimensionRawColumnChunk[] dimensionDataChunks = new DimensionRawColumnChunk[endBlockletColumnIndex - startBlockletColumnIndex + 1];
int index = 0;
int runningLength = 0;
for (int i = startBlockletColumnIndex; i <= endBlockletColumnIndex; i++) {
int currentLength = (int) (dimensionChunksOffset.get(i + 1) - dimensionChunksOffset.get(i));
dimensionDataChunks[index] = new DimensionRawColumnChunk(i, buffer, runningLength, currentLength, this);
DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, runningLength, dimensionChunksLength.get(i));
int numberOfPages = dataChunk.getPage_length().size();
byte[][] maxValueOfEachPage = new byte[numberOfPages][];
byte[][] minValueOfEachPage = new byte[numberOfPages][];
int[] eachPageLength = new int[numberOfPages];
for (int j = 0; j < minValueOfEachPage.length; j++) {
maxValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMax_values().get(0).array();
minValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMin_values().get(0).array();
eachPageLength[j] = dataChunk.getData_chunk_list().get(j).getNumberOfRowsInpage();
}
dimensionDataChunks[index].setDataChunkV3(dataChunk);
dimensionDataChunks[index].setFileHolder(fileReader);
dimensionDataChunks[index].setPagesCount(dataChunk.getPage_length().size());
dimensionDataChunks[index].setMaxValues(maxValueOfEachPage);
dimensionDataChunks[index].setMinValues(minValueOfEachPage);
dimensionDataChunks[index].setRowCount(eachPageLength);
dimensionDataChunks[index].setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
dimensionDataChunks[index].setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
runningLength += currentLength;
index++;
}
return dimensionDataChunks;
}
use of org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method readRawDimensionChunk.
/**
* Below method will be used to read the dimension column data form carbon data file
* Steps for reading
* 1. Get the length of the data to be read
* 2. Allocate the direct buffer
* 3. read the data from file
* 4. Get the data chunk object from data read
* 5. Create the raw chunk object and fill the details
*
* @param fileReader reader for reading the column from carbon data file
* @param blockletColumnIndex blocklet index of the column in carbon data file
* @return dimension raw chunk
*/
public DimensionRawColumnChunk readRawDimensionChunk(FileHolder fileReader, int blockletColumnIndex) throws IOException {
// get the current dimension offset
long currentDimensionOffset = dimensionChunksOffset.get(blockletColumnIndex);
int length = 0;
// of the last dimension, we can subtract current dimension offset from lastDimesionOffset
if (dimensionChunksOffset.size() - 1 == blockletColumnIndex) {
length = (int) (lastDimensionOffsets - currentDimensionOffset);
} else {
length = (int) (dimensionChunksOffset.get(blockletColumnIndex + 1) - currentDimensionOffset);
}
ByteBuffer buffer = null;
// read the data from carbon data file
synchronized (fileReader) {
buffer = fileReader.readByteBuffer(filePath, currentDimensionOffset, length);
}
// get the data chunk which will have all the details about the data pages
DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, 0, length);
// creating a raw chunks instance and filling all the details
DimensionRawColumnChunk rawColumnChunk = new DimensionRawColumnChunk(blockletColumnIndex, buffer, 0, length, this);
int numberOfPages = dataChunk.getPage_length().size();
byte[][] maxValueOfEachPage = new byte[numberOfPages][];
byte[][] minValueOfEachPage = new byte[numberOfPages][];
int[] eachPageLength = new int[numberOfPages];
for (int i = 0; i < minValueOfEachPage.length; i++) {
maxValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMax_values().get(0).array();
minValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_values().get(0).array();
eachPageLength[i] = dataChunk.getData_chunk_list().get(i).getNumberOfRowsInpage();
}
rawColumnChunk.setDataChunkV3(dataChunk);
rawColumnChunk.setFileHolder(fileReader);
rawColumnChunk.setPagesCount(dataChunk.getPage_length().size());
rawColumnChunk.setMaxValues(maxValueOfEachPage);
rawColumnChunk.setMinValues(minValueOfEachPage);
rawColumnChunk.setRowCount(eachPageLength);
rawColumnChunk.setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
rawColumnChunk.setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
return rawColumnChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV2 method readRawDimensionChunk.
/**
* Below method will be used to read the chunk based on block index
*
* @param fileReader file reader to read the blocks from file
* @param blockletIndex block to be read
* @return dimension column chunk
*/
public DimensionRawColumnChunk readRawDimensionChunk(FileHolder fileReader, int blockletIndex) throws IOException {
int length = 0;
if (dimensionChunksOffset.size() - 1 == blockletIndex) {
// Incase of last block read only for datachunk and read remaining while converting it.
length = dimensionChunksLength.get(blockletIndex);
} else {
long currentDimensionOffset = dimensionChunksOffset.get(blockletIndex);
length = (int) (dimensionChunksOffset.get(blockletIndex + 1) - currentDimensionOffset);
}
ByteBuffer buffer = null;
synchronized (fileReader) {
buffer = fileReader.readByteBuffer(filePath, dimensionChunksOffset.get(blockletIndex), length);
}
DimensionRawColumnChunk rawColumnChunk = new DimensionRawColumnChunk(blockletIndex, buffer, 0, length, this);
rawColumnChunk.setFileHolder(fileReader);
rawColumnChunk.setPagesCount(1);
rawColumnChunk.setRowCount(new int[] { numberOfRows });
return rawColumnChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk in project carbondata by apache.
the class FilterScanner method fillScannedResult.
/**
* This method will process the data in below order
* 1. first apply min max on the filter tree and check whether any of the filter
* is fall on the range of min max, if not then return empty result
* 2. If filter falls on min max range then apply filter on actual
* data and get the filtered row index
* 3. if row index is empty then return the empty result
* 4. if row indexes is not empty then read only those blocks(measure or dimension)
* which was present in the query but not present in the filter, as while applying filter
* some of the blocks where already read and present in chunk holder so not need to
* read those blocks again, this is to avoid reading of same blocks which was already read
* 5. Set the blocks and filter indexes to result
*
* @param blocksChunkHolder
* @throws FilterUnsupportedException
*/
private AbstractScannedResult fillScannedResult(BlocksChunkHolder blocksChunkHolder) throws FilterUnsupportedException, IOException {
long startTime = System.currentTimeMillis();
QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, totalBlockletStatistic.getCount() + 1);
// apply filter on actual data
BitSetGroup bitSetGroup = this.filterExecuter.applyFilter(blocksChunkHolder);
// if indexes is empty then return with empty result
if (bitSetGroup.isEmpty()) {
CarbonUtil.freeMemory(blocksChunkHolder.getDimensionRawDataChunk(), blocksChunkHolder.getMeasureRawDataChunk());
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime));
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
return createEmptyResult();
}
AbstractScannedResult scannedResult = new FilterQueryScannedResult(blockExecutionInfo);
scannedResult.setBlockletId(blockExecutionInfo.getBlockId() + CarbonCommonConstants.FILE_SEPARATOR + blocksChunkHolder.getDataBlock().nodeNumber());
// valid scanned blocklet
QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
validScannedBlockletStatistic.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, validScannedBlockletStatistic.getCount() + 1);
// adding statistics for valid number of pages
QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, validPages.getCount() + bitSetGroup.getValidPages());
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
int[] rowCount = new int[bitSetGroup.getNumberOfPages()];
// get the row indexes from bot set
int[][] indexesGroup = new int[bitSetGroup.getNumberOfPages()][];
for (int k = 0; k < indexesGroup.length; k++) {
BitSet bitSet = bitSetGroup.getBitSet(k);
if (bitSet != null && !bitSet.isEmpty()) {
int[] indexes = new int[bitSet.cardinality()];
int index = 0;
for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
indexes[index++] = i;
}
rowCount[k] = indexes.length;
indexesGroup[k] = indexes;
}
}
// loading delete data cache in blockexecutioninfo instance
DeleteDeltaCacheLoaderIntf deleteCacheLoader = new BlockletDeleteDeltaCacheLoader(scannedResult.getBlockletId(), blocksChunkHolder.getDataBlock(), blockExecutionInfo.getAbsoluteTableIdentifier());
deleteCacheLoader.loadDeleteDeltaFileDataToCache();
scannedResult.setBlockletDeleteDeltaCache(blocksChunkHolder.getDataBlock().getDeleteDeltaDataCache());
FileHolder fileReader = blocksChunkHolder.getFileReader();
int[][] allSelectedDimensionBlocksIndexes = blockExecutionInfo.getAllSelectedDimensionBlocksIndexes();
long dimensionReadTime = System.currentTimeMillis();
DimensionRawColumnChunk[] projectionListDimensionChunk = blocksChunkHolder.getDataBlock().getDimensionChunks(fileReader, allSelectedDimensionBlocksIndexes);
dimensionReadTime = System.currentTimeMillis() - dimensionReadTime;
DimensionRawColumnChunk[] dimensionRawColumnChunks = new DimensionRawColumnChunk[blockExecutionInfo.getTotalNumberDimensionBlock()];
// read dimension chunk blocks from file which is not present
for (int i = 0; i < dimensionRawColumnChunks.length; i++) {
if (null != blocksChunkHolder.getDimensionRawDataChunk()[i]) {
dimensionRawColumnChunks[i] = blocksChunkHolder.getDimensionRawDataChunk()[i];
}
}
for (int i = 0; i < allSelectedDimensionBlocksIndexes.length; i++) {
for (int j = allSelectedDimensionBlocksIndexes[i][0]; j <= allSelectedDimensionBlocksIndexes[i][1]; j++) {
dimensionRawColumnChunks[j] = projectionListDimensionChunk[j];
}
}
long dimensionReadTime1 = System.currentTimeMillis();
/**
* in case projection if the projected dimension are not loaded in the dimensionColumnDataChunk
* then loading them
*/
int[] projectionListDimensionIndexes = blockExecutionInfo.getProjectionListDimensionIndexes();
int projectionListDimensionIndexesLength = projectionListDimensionIndexes.length;
for (int i = 0; i < projectionListDimensionIndexesLength; i++) {
if (null == dimensionRawColumnChunks[projectionListDimensionIndexes[i]]) {
dimensionRawColumnChunks[projectionListDimensionIndexes[i]] = blocksChunkHolder.getDataBlock().getDimensionChunk(fileReader, projectionListDimensionIndexes[i]);
}
}
dimensionReadTime += (System.currentTimeMillis() - dimensionReadTime1);
dimensionReadTime1 = System.currentTimeMillis();
MeasureRawColumnChunk[] measureRawColumnChunks = new MeasureRawColumnChunk[blockExecutionInfo.getTotalNumberOfMeasureBlock()];
int[][] allSelectedMeasureBlocksIndexes = blockExecutionInfo.getAllSelectedMeasureBlocksIndexes();
MeasureRawColumnChunk[] projectionListMeasureChunk = blocksChunkHolder.getDataBlock().getMeasureChunks(fileReader, allSelectedMeasureBlocksIndexes);
dimensionReadTime += System.currentTimeMillis() - dimensionReadTime1;
// read the measure chunk blocks which is not present
for (int i = 0; i < measureRawColumnChunks.length; i++) {
if (null != blocksChunkHolder.getMeasureRawDataChunk()[i]) {
measureRawColumnChunks[i] = blocksChunkHolder.getMeasureRawDataChunk()[i];
}
}
for (int i = 0; i < allSelectedMeasureBlocksIndexes.length; i++) {
for (int j = allSelectedMeasureBlocksIndexes[i][0]; j <= allSelectedMeasureBlocksIndexes[i][1]; j++) {
measureRawColumnChunks[j] = projectionListMeasureChunk[j];
}
}
dimensionReadTime1 = System.currentTimeMillis();
/**
* in case projection if the projected measure are not loaded in the measureColumnDataChunk
* then loading them
*/
int[] projectionListMeasureIndexes = blockExecutionInfo.getProjectionListMeasureIndexes();
int projectionListMeasureIndexesLength = projectionListMeasureIndexes.length;
for (int i = 0; i < projectionListMeasureIndexesLength; i++) {
if (null == measureRawColumnChunks[projectionListMeasureIndexes[i]]) {
measureRawColumnChunks[projectionListMeasureIndexes[i]] = blocksChunkHolder.getDataBlock().getMeasureChunk(fileReader, projectionListMeasureIndexes[i]);
}
}
dimensionReadTime += System.currentTimeMillis() - dimensionReadTime1;
DimensionColumnDataChunk[][] dimensionColumnDataChunks = new DimensionColumnDataChunk[dimensionRawColumnChunks.length][indexesGroup.length];
MeasureColumnDataChunk[][] measureColumnDataChunks = new MeasureColumnDataChunk[measureRawColumnChunks.length][indexesGroup.length];
for (int i = 0; i < dimensionRawColumnChunks.length; i++) {
for (int j = 0; j < indexesGroup.length; j++) {
if (dimensionRawColumnChunks[i] != null) {
dimensionColumnDataChunks[i][j] = dimensionRawColumnChunks[i].convertToDimColDataChunk(j);
}
}
}
for (int i = 0; i < measureRawColumnChunks.length; i++) {
for (int j = 0; j < indexesGroup.length; j++) {
if (measureRawColumnChunks[i] != null) {
measureColumnDataChunks[i][j] = measureRawColumnChunks[i].convertToMeasureColDataChunk(j);
}
}
}
scannedResult.setDimensionChunks(dimensionColumnDataChunks);
scannedResult.setIndexes(indexesGroup);
scannedResult.setMeasureChunks(measureColumnDataChunks);
scannedResult.setRawColumnChunks(dimensionRawColumnChunks);
scannedResult.setNumberOfRows(rowCount);
// adding statistics for carbon scan time
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime - dimensionReadTime));
QueryStatistic readTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.READ_BLOCKlET_TIME);
readTime.addCountStatistic(QueryStatisticsConstants.READ_BLOCKlET_TIME, readTime.getCount() + dimensionReadTime);
return scannedResult;
}
Aggregations