use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV1 method decodeColumnPage.
@Override
public DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
int blockIndex = dimensionRawColumnChunk.getColumnIndex();
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
FileReader fileReader = dimensionRawColumnChunk.getFileReader();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
dataPage = COMPRESSOR.unCompressByte(rawData.array(), (int) dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength());
// if row id block is present then read the row id chunk and uncompress it
DataChunk dataChunk = dimensionColumnChunk.get(blockIndex);
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) {
byte[] columnIndexData;
synchronized (fileReader) {
columnIndexData = fileReader.readByteArray(filePath, dataChunk.getRowIdPageOffset(), dataChunk.getRowIdPageLength());
}
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberComressor, 0);
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
// then actual data based on rle block
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) {
// read and uncompress the rle block
byte[] key;
synchronized (fileReader) {
key = fileReader.readByteArray(filePath, dataChunk.getRlePageOffset(), dataChunk.getRlePageLength());
}
rlePage = numberComressor.unCompress(key, 0, dataChunk.getRlePageLength());
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
rlePage = null;
}
// fill chunk attributes
DimensionColumnPage columnDataChunk = null;
if (dataChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionColumnPage(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method decodeDimensionLegacy.
private DimensionColumnPage decodeDimensionLegacy(DimensionRawColumnChunk rawColumnPage, ByteBuffer pageData, DataChunk2 pageMetadata, int offset) {
byte[] dataPage;
int[] rlePage;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
dataPage = COMPRESSOR.unCompressByte(pageData.array(), offset, pageMetadata.data_page_length);
offset += pageMetadata.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX)) {
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(pageMetadata.rowid_page_length, pageData, offset);
offset += pageMetadata.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
// then actual data based on rle block
if (hasEncoding(pageMetadata.encoders, Encoding.RLE)) {
rlePage = CarbonUtil.getIntArray(pageData, offset, pageMetadata.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[rawColumnPage.getColumnIndex()]);
}
DimensionColumnPage columnDataChunk = null;
// and set to data chunk instance
if (!hasEncoding(pageMetadata.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, pageMetadata.getNumberOfRowsInpage());
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, pageMetadata.getNumberOfRowsInpage(), eachColumnValueSize[rawColumnPage.getColumnIndex()]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV2 method decodeColumnPage.
public DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
DataChunk2 dimensionColumnChunk = null;
int copySourcePoint = (int) dimensionRawColumnChunk.getOffSet();
int blockIndex = dimensionRawColumnChunk.getColumnIndex();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
if (dimensionChunksOffset.size() - 1 == blockIndex) {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength());
int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length + dimensionColumnChunk.rowid_page_length;
synchronized (dimensionRawColumnChunk.getFileReader()) {
rawData = dimensionRawColumnChunk.getFileReader().readByteBuffer(filePath, dimensionChunksOffset.get(blockIndex) + dimensionChunksLength.get(blockIndex), totalDimensionDataLength);
}
} else {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex));
copySourcePoint += dimensionChunksLength.get(blockIndex);
}
// first read the data and uncompressed it
dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
copySourcePoint += dimensionColumnChunk.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length];
rawData.position(copySourcePoint);
rawData.get(dataInv);
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberComressor, 0);
copySourcePoint += dimensionColumnChunk.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
// then actual data based on rle block
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length];
rawData.position(copySourcePoint);
rawData.get(dataRle);
rlePage = numberComressor.unCompress(dataRle, 0, dimensionColumnChunk.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
}
// fill chunk attributes
DimensionColumnPage columnDataChunk = null;
if (dimensionColumnChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionColumnPage(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage in project carbondata by apache.
the class RowLevelFilterExecuterImpl method createRow.
/**
* Method will read the members of particular dimension block and create
* a row instance for further processing of the filters
*
* @param blockChunkHolder
* @param row
* @param index
* @throws IOException
*/
private void createRow(RawBlockletColumnChunks blockChunkHolder, RowIntf row, int pageIndex, int index) throws IOException {
Object[] record = new Object[dimColEvaluatorInfoList.size() + msrColEvalutorInfoList.size()];
String memberString;
for (int i = 0; i < dimColEvaluatorInfoList.size(); i++) {
DimColumnResolvedFilterInfo dimColumnEvaluatorInfo = dimColEvaluatorInfoList.get(i);
// if filter dimension is not present in the current add its default value
if (!isDimensionPresentInCurrentBlock[i]) {
// fill default value here
record[dimColumnEvaluatorInfo.getRowIndex()] = getDimensionDefaultValue(dimColumnEvaluatorInfo);
continue;
}
if (!dimColumnEvaluatorInfo.getDimension().getDataType().isComplexType()) {
if (!dimColumnEvaluatorInfo.isDimensionExistsInCurrentSilce()) {
record[dimColumnEvaluatorInfo.getRowIndex()] = dimColumnEvaluatorInfo.getDimension().getDefaultValue();
}
DimensionColumnPage columnDataChunk = blockChunkHolder.getDimensionRawColumnChunks()[dimensionChunkIndex[i]].decodeColumnPage(pageIndex);
if (!dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && columnDataChunk instanceof VariableLengthDimensionColumnPage) {
VariableLengthDimensionColumnPage dimensionColumnDataChunk = (VariableLengthDimensionColumnPage) columnDataChunk;
byte[] memberBytes = dimensionColumnDataChunk.getChunkData(index);
if (null != memberBytes) {
if (Arrays.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, memberBytes)) {
memberBytes = null;
} else if (memberBytes.length == 0) {
memberBytes = null;
}
record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(memberBytes, dimColumnEvaluatorInfo.getDimension().getDataType());
}
} else {
int dictionaryValue = readSurrogatesFromColumnChunk(blockChunkHolder, index, pageIndex, dimColumnEvaluatorInfo, dimensionChunkIndex[i]);
if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && !dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
memberString = getFilterActualValueFromDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataType(memberString, dimColumnEvaluatorInfo.getDimension().getDataType());
} else if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
Object member = getFilterActualValueFromDirectDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
record[dimColumnEvaluatorInfo.getRowIndex()] = member;
}
}
} else {
try {
GenericQueryType complexType = complexDimensionInfoMap.get(dimensionChunkIndex[i]);
ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
DataOutputStream dataOutputStream = new DataOutputStream(byteStream);
complexType.parseBlocksAndReturnComplexColumnByteArray(blockChunkHolder.getDimensionRawColumnChunks(), index, pageIndex, dataOutputStream);
record[dimColumnEvaluatorInfo.getRowIndex()] = complexType.getDataBasedOnDataTypeFromSurrogates(ByteBuffer.wrap(byteStream.toByteArray()));
byteStream.close();
} catch (IOException e) {
LOGGER.info(e.getMessage());
}
}
}
DataType msrType;
for (int i = 0; i < msrColEvalutorInfoList.size(); i++) {
MeasureColumnResolvedFilterInfo msrColumnEvalutorInfo = msrColEvalutorInfoList.get(i);
DataType dataType = msrColumnEvalutorInfo.getType();
if (dataType == DataTypes.BOOLEAN) {
msrType = DataTypes.BOOLEAN;
} else if (dataType == DataTypes.SHORT) {
msrType = DataTypes.SHORT;
} else if (dataType == DataTypes.INT) {
msrType = DataTypes.INT;
} else if (dataType == DataTypes.LONG) {
msrType = DataTypes.LONG;
} else if (DataTypes.isDecimal(dataType)) {
msrType = DataTypes.createDefaultDecimalType();
} else {
msrType = DataTypes.DOUBLE;
}
// in the current block measure list
if (!isMeasurePresentInCurrentBlock[i]) {
byte[] defaultValue = msrColumnEvalutorInfo.getCarbonColumn().getDefaultValue();
record[msrColumnEvalutorInfo.getRowIndex()] = RestructureUtil.getMeasureDefaultValue(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema(), defaultValue);
continue;
}
Object msrValue;
ColumnPage columnPage = blockChunkHolder.getMeasureRawColumnChunks()[measureChunkIndex[0]].decodeColumnPage(pageIndex);
if (msrType == DataTypes.BOOLEAN) {
msrValue = columnPage.getBoolean(index);
} else if (msrType == DataTypes.SHORT) {
msrValue = (short) columnPage.getLong(index);
} else if (msrType == DataTypes.INT) {
msrValue = (int) columnPage.getLong(index);
} else if (msrType == DataTypes.LONG) {
msrValue = columnPage.getLong(index);
} else if (DataTypes.isDecimal(msrType)) {
BigDecimal bigDecimalValue = columnPage.getDecimal(index);
if (null != bigDecimalValue && msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale() > bigDecimalValue.scale()) {
bigDecimalValue = bigDecimalValue.setScale(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale(), RoundingMode.HALF_UP);
}
msrValue = bigDecimalValue;
} else {
msrValue = columnPage.getDouble(index);
}
record[msrColumnEvalutorInfo.getRowIndex()] = columnPage.getNullBits().get(index) ? null : msrValue;
}
row.setValues(record);
}
Aggregations