use of org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk in project carbondata by apache.
the class RowLevelFilterExecuterImpl method createRow.
/**
* Method will read the members of particular dimension block and create
* a row instance for further processing of the filters
*
* @param blockChunkHolder
* @param row
* @param index
* @throws IOException
*/
private void createRow(BlocksChunkHolder blockChunkHolder, RowIntf row, int pageIndex, int index) throws IOException {
Object[] record = new Object[dimColEvaluatorInfoList.size() + msrColEvalutorInfoList.size()];
String memberString;
for (int i = 0; i < dimColEvaluatorInfoList.size(); i++) {
DimColumnResolvedFilterInfo dimColumnEvaluatorInfo = dimColEvaluatorInfoList.get(i);
// if filter dimension is not present in the current add its default value
if (!isDimensionPresentInCurrentBlock[i]) {
// fill default value here
record[dimColumnEvaluatorInfo.getRowIndex()] = getDimensionDefaultValue(dimColumnEvaluatorInfo);
continue;
}
if (dimColumnEvaluatorInfo.getDimension().getDataType() != DataType.ARRAY && dimColumnEvaluatorInfo.getDimension().getDataType() != DataType.STRUCT) {
if (!dimColumnEvaluatorInfo.isDimensionExistsInCurrentSilce()) {
record[dimColumnEvaluatorInfo.getRowIndex()] = dimColumnEvaluatorInfo.getDimension().getDefaultValue();
}
DimensionColumnDataChunk columnDataChunk = blockChunkHolder.getDimensionRawDataChunk()[dimensionBlocksIndex[i]].convertToDimColDataChunk(pageIndex);
if (!dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && columnDataChunk instanceof VariableLengthDimensionDataChunk) {
VariableLengthDimensionDataChunk dimensionColumnDataChunk = (VariableLengthDimensionDataChunk) columnDataChunk;
byte[] memberBytes = dimensionColumnDataChunk.getChunkData(index);
if (null != memberBytes) {
if (Arrays.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, memberBytes)) {
memberBytes = null;
}
record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(memberBytes, dimColumnEvaluatorInfo.getDimension().getDataType());
} else {
continue;
}
} else {
int dictionaryValue = readSurrogatesFromColumnBlock(blockChunkHolder, index, pageIndex, dimColumnEvaluatorInfo, dimensionBlocksIndex[i]);
if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && !dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
memberString = getFilterActualValueFromDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataType(memberString, dimColumnEvaluatorInfo.getDimension().getDataType());
} else if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
Object member = getFilterActualValueFromDirectDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
record[dimColumnEvaluatorInfo.getRowIndex()] = member;
}
}
} else {
try {
GenericQueryType complexType = complexDimensionInfoMap.get(dimensionBlocksIndex[i]);
ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
DataOutputStream dataOutputStream = new DataOutputStream(byteStream);
complexType.parseBlocksAndReturnComplexColumnByteArray(blockChunkHolder.getDimensionRawDataChunk(), index, pageIndex, dataOutputStream);
record[dimColumnEvaluatorInfo.getRowIndex()] = complexType.getDataBasedOnDataTypeFromSurrogates(ByteBuffer.wrap(byteStream.toByteArray()));
byteStream.close();
} catch (IOException e) {
LOGGER.info(e.getMessage());
}
}
}
DataType msrType;
for (int i = 0; i < msrColEvalutorInfoList.size(); i++) {
MeasureColumnResolvedFilterInfo msrColumnEvalutorInfo = msrColEvalutorInfoList.get(i);
switch(msrColumnEvalutorInfo.getType()) {
case SHORT:
msrType = DataType.SHORT;
break;
case INT:
msrType = DataType.INT;
break;
case LONG:
msrType = DataType.LONG;
break;
case DECIMAL:
msrType = DataType.DECIMAL;
break;
default:
msrType = DataType.DOUBLE;
}
// in the current block measure list
if (!isMeasurePresentInCurrentBlock[i]) {
byte[] defaultValue = msrColumnEvalutorInfo.getCarbonColumn().getDefaultValue();
record[msrColumnEvalutorInfo.getRowIndex()] = RestructureUtil.getMeasureDefaultValue(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema(), defaultValue);
continue;
}
Object msrValue;
MeasureColumnDataChunk measureColumnDataChunk = blockChunkHolder.getMeasureRawDataChunk()[measureBlocksIndex[0]].convertToMeasureColDataChunk(pageIndex);
switch(msrType) {
case SHORT:
msrValue = (short) measureColumnDataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index);
break;
case INT:
msrValue = (int) measureColumnDataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index);
break;
case LONG:
msrValue = measureColumnDataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index);
break;
case DECIMAL:
BigDecimal bigDecimalValue = measureColumnDataChunk.getMeasureDataHolder().getReadableBigDecimalValueByIndex(index);
if (null != bigDecimalValue && msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale() > bigDecimalValue.scale()) {
bigDecimalValue = bigDecimalValue.setScale(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale(), RoundingMode.HALF_UP);
}
msrValue = bigDecimalValue;
break;
default:
msrValue = measureColumnDataChunk.getMeasureDataHolder().getReadableDoubleValueByIndex(index);
}
record[msrColumnEvalutorInfo.getRowIndex()] = measureColumnDataChunk.getNullValueIndexHolder().getBitSet().get(index) ? null : msrValue;
}
row.setValues(record);
}
use of org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV2 method convertToDimensionChunk.
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
DataChunk2 dimensionColumnChunk = null;
int copySourcePoint = dimensionRawColumnChunk.getOffSet();
int blockIndex = dimensionRawColumnChunk.getBlockletId();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
if (dimensionChunksOffset.size() - 1 == blockIndex) {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength());
int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length + dimensionColumnChunk.rowid_page_length;
synchronized (dimensionRawColumnChunk.getFileReader()) {
rawData = dimensionRawColumnChunk.getFileReader().readByteBuffer(filePath, dimensionChunksOffset.get(blockIndex) + dimensionChunksLength.get(blockIndex), totalDimensionDataLength);
}
} else {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex));
copySourcePoint += dimensionChunksLength.get(blockIndex);
}
// first read the data and uncompressed it
dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
copySourcePoint += dimensionColumnChunk.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length];
rawData.position(copySourcePoint);
rawData.get(dataInv);
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberComressor, 0);
copySourcePoint += dimensionColumnChunk.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
//then actual data based on rle block
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length];
rawData.position(copySourcePoint);
rawData.get(dataRle);
rlePage = numberComressor.unCompress(dataRle, 0, dimensionColumnChunk.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
}
// fill chunk attributes
DimensionColumnDataChunk columnDataChunk = null;
if (dimensionColumnChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV1 method convertToDimensionChunk.
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
int blockIndex = dimensionRawColumnChunk.getBlockletId();
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
FileHolder fileReader = dimensionRawColumnChunk.getFileReader();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
dataPage = COMPRESSOR.unCompressByte(rawData.array(), dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength());
// if row id block is present then read the row id chunk and uncompress it
DataChunk dataChunk = dimensionColumnChunk.get(blockIndex);
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) {
byte[] columnIndexData;
synchronized (fileReader) {
columnIndexData = fileReader.readByteArray(filePath, dataChunk.getRowIdPageOffset(), dataChunk.getRowIdPageLength());
}
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberComressor, 0);
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
//then actual data based on rle block
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) {
// read and uncompress the rle block
byte[] key;
synchronized (fileReader) {
key = fileReader.readByteArray(filePath, dataChunk.getRlePageOffset(), dataChunk.getRlePageLength());
}
rlePage = numberComressor.unCompress(key, 0, dataChunk.getRlePageLength());
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
rlePage = null;
}
// fill chunk attributes
DimensionColumnDataChunk columnDataChunk = null;
if (dataChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk in project carbondata by apache.
the class FilterScanner method fillScannedResult.
/**
* This method will process the data in below order
* 1. first apply min max on the filter tree and check whether any of the filter
* is fall on the range of min max, if not then return empty result
* 2. If filter falls on min max range then apply filter on actual
* data and get the filtered row index
* 3. if row index is empty then return the empty result
* 4. if row indexes is not empty then read only those blocks(measure or dimension)
* which was present in the query but not present in the filter, as while applying filter
* some of the blocks where already read and present in chunk holder so not need to
* read those blocks again, this is to avoid reading of same blocks which was already read
* 5. Set the blocks and filter indexes to result
*
* @param blocksChunkHolder
* @throws FilterUnsupportedException
*/
private AbstractScannedResult fillScannedResult(BlocksChunkHolder blocksChunkHolder) throws FilterUnsupportedException, IOException {
long startTime = System.currentTimeMillis();
QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, totalBlockletStatistic.getCount() + 1);
// apply filter on actual data
BitSetGroup bitSetGroup = this.filterExecuter.applyFilter(blocksChunkHolder);
// if indexes is empty then return with empty result
if (bitSetGroup.isEmpty()) {
CarbonUtil.freeMemory(blocksChunkHolder.getDimensionRawDataChunk(), blocksChunkHolder.getMeasureRawDataChunk());
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime));
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
return createEmptyResult();
}
AbstractScannedResult scannedResult = new FilterQueryScannedResult(blockExecutionInfo);
scannedResult.setBlockletId(blockExecutionInfo.getBlockId() + CarbonCommonConstants.FILE_SEPARATOR + blocksChunkHolder.getDataBlock().nodeNumber());
// valid scanned blocklet
QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
validScannedBlockletStatistic.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, validScannedBlockletStatistic.getCount() + 1);
// adding statistics for valid number of pages
QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, validPages.getCount() + bitSetGroup.getValidPages());
QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.PAGE_SCANNED);
scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, scannedPages.getCount() + bitSetGroup.getScannedPages());
int[] rowCount = new int[bitSetGroup.getNumberOfPages()];
// get the row indexes from bot set
int[][] indexesGroup = new int[bitSetGroup.getNumberOfPages()][];
for (int k = 0; k < indexesGroup.length; k++) {
BitSet bitSet = bitSetGroup.getBitSet(k);
if (bitSet != null && !bitSet.isEmpty()) {
int[] indexes = new int[bitSet.cardinality()];
int index = 0;
for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
indexes[index++] = i;
}
rowCount[k] = indexes.length;
indexesGroup[k] = indexes;
}
}
// loading delete data cache in blockexecutioninfo instance
DeleteDeltaCacheLoaderIntf deleteCacheLoader = new BlockletDeleteDeltaCacheLoader(scannedResult.getBlockletId(), blocksChunkHolder.getDataBlock(), blockExecutionInfo.getAbsoluteTableIdentifier());
deleteCacheLoader.loadDeleteDeltaFileDataToCache();
scannedResult.setBlockletDeleteDeltaCache(blocksChunkHolder.getDataBlock().getDeleteDeltaDataCache());
FileHolder fileReader = blocksChunkHolder.getFileReader();
int[][] allSelectedDimensionBlocksIndexes = blockExecutionInfo.getAllSelectedDimensionBlocksIndexes();
long dimensionReadTime = System.currentTimeMillis();
DimensionRawColumnChunk[] projectionListDimensionChunk = blocksChunkHolder.getDataBlock().getDimensionChunks(fileReader, allSelectedDimensionBlocksIndexes);
dimensionReadTime = System.currentTimeMillis() - dimensionReadTime;
DimensionRawColumnChunk[] dimensionRawColumnChunks = new DimensionRawColumnChunk[blockExecutionInfo.getTotalNumberDimensionBlock()];
// read dimension chunk blocks from file which is not present
for (int i = 0; i < dimensionRawColumnChunks.length; i++) {
if (null != blocksChunkHolder.getDimensionRawDataChunk()[i]) {
dimensionRawColumnChunks[i] = blocksChunkHolder.getDimensionRawDataChunk()[i];
}
}
for (int i = 0; i < allSelectedDimensionBlocksIndexes.length; i++) {
for (int j = allSelectedDimensionBlocksIndexes[i][0]; j <= allSelectedDimensionBlocksIndexes[i][1]; j++) {
dimensionRawColumnChunks[j] = projectionListDimensionChunk[j];
}
}
long dimensionReadTime1 = System.currentTimeMillis();
/**
* in case projection if the projected dimension are not loaded in the dimensionColumnDataChunk
* then loading them
*/
int[] projectionListDimensionIndexes = blockExecutionInfo.getProjectionListDimensionIndexes();
int projectionListDimensionIndexesLength = projectionListDimensionIndexes.length;
for (int i = 0; i < projectionListDimensionIndexesLength; i++) {
if (null == dimensionRawColumnChunks[projectionListDimensionIndexes[i]]) {
dimensionRawColumnChunks[projectionListDimensionIndexes[i]] = blocksChunkHolder.getDataBlock().getDimensionChunk(fileReader, projectionListDimensionIndexes[i]);
}
}
dimensionReadTime += (System.currentTimeMillis() - dimensionReadTime1);
dimensionReadTime1 = System.currentTimeMillis();
MeasureRawColumnChunk[] measureRawColumnChunks = new MeasureRawColumnChunk[blockExecutionInfo.getTotalNumberOfMeasureBlock()];
int[][] allSelectedMeasureBlocksIndexes = blockExecutionInfo.getAllSelectedMeasureBlocksIndexes();
MeasureRawColumnChunk[] projectionListMeasureChunk = blocksChunkHolder.getDataBlock().getMeasureChunks(fileReader, allSelectedMeasureBlocksIndexes);
dimensionReadTime += System.currentTimeMillis() - dimensionReadTime1;
// read the measure chunk blocks which is not present
for (int i = 0; i < measureRawColumnChunks.length; i++) {
if (null != blocksChunkHolder.getMeasureRawDataChunk()[i]) {
measureRawColumnChunks[i] = blocksChunkHolder.getMeasureRawDataChunk()[i];
}
}
for (int i = 0; i < allSelectedMeasureBlocksIndexes.length; i++) {
for (int j = allSelectedMeasureBlocksIndexes[i][0]; j <= allSelectedMeasureBlocksIndexes[i][1]; j++) {
measureRawColumnChunks[j] = projectionListMeasureChunk[j];
}
}
dimensionReadTime1 = System.currentTimeMillis();
/**
* in case projection if the projected measure are not loaded in the measureColumnDataChunk
* then loading them
*/
int[] projectionListMeasureIndexes = blockExecutionInfo.getProjectionListMeasureIndexes();
int projectionListMeasureIndexesLength = projectionListMeasureIndexes.length;
for (int i = 0; i < projectionListMeasureIndexesLength; i++) {
if (null == measureRawColumnChunks[projectionListMeasureIndexes[i]]) {
measureRawColumnChunks[projectionListMeasureIndexes[i]] = blocksChunkHolder.getDataBlock().getMeasureChunk(fileReader, projectionListMeasureIndexes[i]);
}
}
dimensionReadTime += System.currentTimeMillis() - dimensionReadTime1;
DimensionColumnDataChunk[][] dimensionColumnDataChunks = new DimensionColumnDataChunk[dimensionRawColumnChunks.length][indexesGroup.length];
MeasureColumnDataChunk[][] measureColumnDataChunks = new MeasureColumnDataChunk[measureRawColumnChunks.length][indexesGroup.length];
for (int i = 0; i < dimensionRawColumnChunks.length; i++) {
for (int j = 0; j < indexesGroup.length; j++) {
if (dimensionRawColumnChunks[i] != null) {
dimensionColumnDataChunks[i][j] = dimensionRawColumnChunks[i].convertToDimColDataChunk(j);
}
}
}
for (int i = 0; i < measureRawColumnChunks.length; i++) {
for (int j = 0; j < indexesGroup.length; j++) {
if (measureRawColumnChunks[i] != null) {
measureColumnDataChunks[i][j] = measureRawColumnChunks[i].convertToMeasureColDataChunk(j);
}
}
}
scannedResult.setDimensionChunks(dimensionColumnDataChunks);
scannedResult.setIndexes(indexesGroup);
scannedResult.setMeasureChunks(measureColumnDataChunks);
scannedResult.setRawColumnChunks(dimensionRawColumnChunks);
scannedResult.setNumberOfRows(rowCount);
// adding statistics for carbon scan time
QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, scanTime.getCount() + (System.currentTimeMillis() - startTime - dimensionReadTime));
QueryStatistic readTime = queryStatisticsModel.getStatisticsTypeAndObjMap().get(QueryStatisticsConstants.READ_BLOCKlET_TIME);
readTime.addCountStatistic(QueryStatisticsConstants.READ_BLOCKlET_TIME, readTime.getCount() + dimensionReadTime);
return scannedResult;
}
Aggregations