use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV2 method decodeColumnPage.
public DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
DataChunk2 dimensionColumnChunk = null;
int copySourcePoint = (int) dimensionRawColumnChunk.getOffSet();
int blockIndex = dimensionRawColumnChunk.getColumnIndex();
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
if (dimensionChunksOffset.size() - 1 == blockIndex) {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength());
int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length + dimensionColumnChunk.rowid_page_length;
synchronized (dimensionRawColumnChunk.getFileReader()) {
rawData = dimensionRawColumnChunk.getFileReader().readByteBuffer(filePath, dimensionChunksOffset.get(blockIndex) + dimensionChunksLength.get(blockIndex), totalDimensionDataLength);
}
} else {
dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex));
copySourcePoint += dimensionChunksLength.get(blockIndex);
}
// first read the data and uncompressed it
dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
copySourcePoint += dimensionColumnChunk.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length];
rawData.position(copySourcePoint);
rawData.get(dataInv);
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberComressor, 0);
copySourcePoint += dimensionColumnChunk.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
// then actual data based on rle block
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length];
rawData.position(copySourcePoint);
rawData.get(dataRle);
rlePage = numberComressor.unCompress(dataRle, 0, dimensionColumnChunk.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
}
// fill chunk attributes
DimensionColumnPage columnDataChunk = null;
if (dimensionColumnChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionColumnPage(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
} else // and set to data chunk instance
if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
}
return columnDataChunk;
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.
the class IncludeFilterExecuterImplTest method testRangBinarySearch.
@Test
public void testRangBinarySearch() {
long oldTime = 0;
long newTime = 0;
long start;
long end;
// dimension's data number in a blocklet, usually default is 32000
int dataChunkSize = 32000;
// repeat query times in the test
int queryTimes = 10000;
// repeated times for a dictionary value
int repeatTimes = 200;
// filtered value count in a blocklet
int filteredValueCnt = 800;
// column dictionary size
int dimColumnSize = 2;
FixedLengthDimensionColumnPage dimensionColumnDataChunk;
DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
byte[] dataChunk = new byte[dataChunkSize * dimColumnSize];
for (int i = 0; i < dataChunkSize; i++) {
if (i % repeatTimes == 0) {
repeatTimes++;
}
byte[] data = transferIntToByteArr(repeatTimes, dimColumnSize);
dataChunk[2 * i] = data[0];
dataChunk[2 * i + 1] = data[1];
}
byte[][] filterKeys = new byte[filteredValueCnt][2];
for (int k = 0; k < filteredValueCnt; k++) {
filterKeys[k] = transferIntToByteArr(100 + k, dimColumnSize);
}
dim.setFilterKeys(filterKeys);
dimensionColumnDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / dimColumnSize, dimColumnSize);
// initial to run
BitSet bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize, filterKeys);
BitSet bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize, filterKeys);
// performance run
for (int j = 0; j < queryTimes; j++) {
start = System.currentTimeMillis();
bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize, filterKeys);
end = System.currentTimeMillis();
oldTime = oldTime + end - start;
start = System.currentTimeMillis();
bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize, filterKeys);
end = System.currentTimeMillis();
newTime = newTime + end - start;
assertTrue(bitOld.equals(bitNew));
}
System.out.println("old code performance time: " + oldTime + " ms");
System.out.println("new code performance time: " + newTime + " ms");
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.
the class CarbonUtilTest method testToGetnextGreaterValueToTarget.
@Test
public void testToGetnextGreaterValueToTarget() {
byte[] dataChunks = { 5, 6, 7, 7, 7 };
byte[] compareValues = { 7 };
FixedLengthDimensionColumnPage fixedLengthDataChunk = new FixedLengthDimensionColumnPage(dataChunks, null, null, 5, 1, dataChunks.length);
int result = CarbonUtil.nextGreaterValueToTarget(2, fixedLengthDataChunk, compareValues, 5);
assertEquals(result, 5);
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.
the class CarbonUtilTest method testToGetFirstIndexUsingBinarySearchWithCompareTo0.
@Test
public void testToGetFirstIndexUsingBinarySearchWithCompareTo0() {
byte[] dataChunks = { 10, 10, 10, 40, 50, 60 };
byte[] compareValue = { 10 };
FixedLengthDimensionColumnPage fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunks, null, null, 6, 1, dataChunks.length);
int result = CarbonUtil.getFirstIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 1, 3, compareValue, false);
assertEquals(0, result);
}
use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.
the class CarbonUtilTest method testBinaryRangeSearch.
@Test
public void testBinaryRangeSearch() {
byte[] dataChunk = new byte[10];
FixedLengthDimensionColumnPage fixedLengthDimensionDataChunk;
byte[] keyWord = new byte[1];
int[] range;
dataChunk = "abbcccddddeffgggh".getBytes();
byte[][] dataArr = new byte[dataChunk.length / keyWord.length][keyWord.length];
fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
for (int ii = 0; ii < dataChunk.length / keyWord.length; ii++) {
dataArr[ii] = fixedLengthDimensionDataChunk.getChunkData(ii);
}
keyWord[0] = Byte.valueOf("97");
int[] expectRangeIndex = new int[2];
expectRangeIndex[0] = 0;
expectRangeIndex[1] = 0;
assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
keyWord[0] = Byte.valueOf("104");
expectRangeIndex = new int[2];
expectRangeIndex[0] = 16;
expectRangeIndex[1] = 16;
assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
keyWord[0] = Byte.valueOf("101");
expectRangeIndex = new int[2];
expectRangeIndex[0] = 10;
expectRangeIndex[1] = 10;
assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
keyWord[0] = Byte.valueOf("99");
expectRangeIndex = new int[2];
expectRangeIndex[0] = 3;
expectRangeIndex[1] = 5;
assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
dataChunk = "ab".getBytes();
fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
keyWord[0] = Byte.valueOf("97");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(0, range[0]);
assertEquals(0, range[1]);
keyWord[0] = Byte.valueOf("98");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(1, range[0]);
assertEquals(1, range[1]);
dataChunk = "aabb".getBytes();
fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
keyWord[0] = Byte.valueOf("97");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(0, range[0]);
assertEquals(1, range[1]);
keyWord[0] = Byte.valueOf("98");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(2, range[0]);
assertEquals(3, range[1]);
dataChunk = "a".getBytes();
fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
keyWord[0] = Byte.valueOf("97");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(0, range[0]);
assertEquals(0, range[1]);
dataChunk = "aa".getBytes();
fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
keyWord[0] = Byte.valueOf("97");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(0, range[0]);
assertEquals(1, range[1]);
dataChunk = "aabbbbbbbbbbcc".getBytes();
fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
keyWord[0] = Byte.valueOf("98");
range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
assertEquals(2, range[0]);
assertEquals(11, range[1]);
}
Aggregations