use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk in project carbondata by apache.
the class CarbonUtilTest method testToGetFirstIndexUsingBinarySearchWithCompareTo1.
@Test
public void testToGetFirstIndexUsingBinarySearchWithCompareTo1() {
byte[] dataChunks = { 10, 20, 30, 40, 50, 60 };
byte[] compareValue = { 5 };
FixedLengthDimensionDataChunk fixedLengthDimensionDataChunk = new FixedLengthDimensionDataChunk(dataChunks, null, null, 6, 1);
int result = CarbonUtil.getFirstIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 1, 3, compareValue, false);
assertEquals(-2, result);
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method convertToDimensionChunk.
/**
* Below method will be used to convert the compressed dimension chunk raw data to actual data
*
* @param dimensionRawColumnChunk dimension raw chunk
* @param pageNumber number
* @return DimensionColumnDataChunk
*/
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
// data chunk of page
DataChunk2 dimensionColumnChunk = null;
// data chunk of blocklet column
DataChunk3 dataChunk3 = dimensionRawColumnChunk.getDataChunkV3();
// get the data buffer
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
dimensionColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
// calculating the start point of data
// as buffer can contain multiple column data, start point will be datachunkoffset +
// data chunk length + page offset
int copySourcePoint = dimensionRawColumnChunk.getOffSet() + dimensionChunksLength.get(dimensionRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
// first read the data and uncompressed it
dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
copySourcePoint += dimensionColumnChunk.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, rawData, copySourcePoint);
copySourcePoint += dimensionColumnChunk.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
//then actual data based on rle block
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
rlePage = CarbonUtil.getIntArray(rawData, copySourcePoint, dimensionColumnChunk.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
rlePage = null;
}
// fill chunk attributes
DimensionColumnDataChunk columnDataChunk = null;
if (dimensionColumnChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()], dimensionRawColumnChunk.getRowCount()[pageNumber]);
} else // and set to data chunk instance
if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber]);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber], eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk in project carbondata by apache.
the class IncludeFilterExecuterImpl method setFilterdIndexToBitSet.
private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnDataChunk, int numerOfRows) {
BitSet bitSet = new BitSet(numerOfRows);
if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
int startIndex = 0;
byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
// inverted index exists for that column
if (dimensionColumnDataChunk.isExplicitSorted()) {
for (int i = 0; i < filterValues.length; i++) {
if (startIndex >= numerOfRows) {
break;
}
int[] rangeIndex = CarbonUtil.getRangeIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, filterValues[i]);
for (int j = rangeIndex[0]; j <= rangeIndex[1]; j++) {
bitSet.set(j);
}
if (rangeIndex[1] >= 0) {
startIndex = rangeIndex[1] + 1;
}
}
} else {
if (filterValues.length > 1) {
for (int i = 0; i < numerOfRows; i++) {
int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1, dimensionColumnDataChunk.getChunkData(i));
if (index >= 0) {
bitSet.set(i);
}
}
} else {
for (int j = 0; j < numerOfRows; j++) {
if (dimensionColumnDataChunk.compareTo(j, filterValues[0]) == 0) {
bitSet.set(j);
}
}
}
}
}
return bitSet;
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV2 method convertToDimensionChunk.
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
DataChunk2 dimensionColumnChunk = null;
int copySourcePoint = dimensionRawColumnChunk.getOffSet();
int blockIndex = dimensionRawColumnChunk.getBlockletId();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
if (dimensionChunksOffset.size() - 1 == blockIndex) {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength());
int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length + dimensionColumnChunk.rowid_page_length;
synchronized (dimensionRawColumnChunk.getFileReader()) {
rawData = dimensionRawColumnChunk.getFileReader().readByteBuffer(filePath, dimensionChunksOffset.get(blockIndex) + dimensionChunksLength.get(blockIndex), totalDimensionDataLength);
}
} else {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex));
copySourcePoint += dimensionChunksLength.get(blockIndex);
}
// first read the data and uncompressed it
dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
copySourcePoint += dimensionColumnChunk.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length];
rawData.position(copySourcePoint);
rawData.get(dataInv);
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberComressor, 0);
copySourcePoint += dimensionColumnChunk.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
//then actual data based on rle block
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length];
rawData.position(copySourcePoint);
rawData.get(dataRle);
rlePage = numberComressor.unCompress(dataRle, 0, dimensionColumnChunk.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
}
// fill chunk attributes
DimensionColumnDataChunk columnDataChunk = null;
if (dimensionColumnChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV1 method convertToDimensionChunk.
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
int blockIndex = dimensionRawColumnChunk.getBlockletId();
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
FileHolder fileReader = dimensionRawColumnChunk.getFileReader();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
dataPage = COMPRESSOR.unCompressByte(rawData.array(), dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength());
// if row id block is present then read the row id chunk and uncompress it
DataChunk dataChunk = dimensionColumnChunk.get(blockIndex);
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) {
byte[] columnIndexData;
synchronized (fileReader) {
columnIndexData = fileReader.readByteArray(filePath, dataChunk.getRowIdPageOffset(), dataChunk.getRowIdPageLength());
}
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberComressor, 0);
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
//then actual data based on rle block
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) {
// read and uncompress the rle block
byte[] key;
synchronized (fileReader) {
key = fileReader.readByteArray(filePath, dataChunk.getRlePageOffset(), dataChunk.getRlePageLength());
}
rlePage = numberComressor.unCompress(key, 0, dataChunk.getRlePageLength());
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
rlePage = null;
}
// fill chunk attributes
DimensionColumnDataChunk columnDataChunk = null;
if (dataChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
Aggregations