Search in sources :

Example 11 with FixedLengthDimensionColumnPage

use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV2 method decodeColumnPage.

public DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    DataChunk2 dimensionColumnChunk = null;
    int copySourcePoint = (int) dimensionRawColumnChunk.getOffSet();
    int blockIndex = dimensionRawColumnChunk.getColumnIndex();
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    if (dimensionChunksOffset.size() - 1 == blockIndex) {
        dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength());
        int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length + dimensionColumnChunk.rowid_page_length;
        synchronized (dimensionRawColumnChunk.getFileReader()) {
            rawData = dimensionRawColumnChunk.getFileReader().readByteBuffer(filePath, dimensionChunksOffset.get(blockIndex) + dimensionChunksLength.get(blockIndex), totalDimensionDataLength);
        }
    } else {
        dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex));
        copySourcePoint += dimensionChunksLength.get(blockIndex);
    }
    // first read the data and uncompressed it
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
    copySourcePoint += dimensionColumnChunk.data_page_length;
    // if row id block is present then read the row id chunk and uncompress it
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
        byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length];
        rawData.position(copySourcePoint);
        rawData.get(dataInv);
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberComressor, 0);
        copySourcePoint += dimensionColumnChunk.rowid_page_length;
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    // then actual data based on rle block
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
        byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length];
        rawData.position(copySourcePoint);
        rawData.get(dataRle);
        rlePage = numberComressor.unCompress(dataRle, 0, dimensionColumnChunk.rle_page_length);
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
    }
    // fill chunk attributes
    DimensionColumnPage columnDataChunk = null;
    if (dimensionColumnChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionColumnPage(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
    } else // and set to data chunk instance
    if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) VariableLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage) DimensionColumnPage(org.apache.carbondata.core.datastore.chunk.DimensionColumnPage) ColumnGroupDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionColumnPage) FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) DataChunk2(org.apache.carbondata.format.DataChunk2) VariableLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage) ByteBuffer(java.nio.ByteBuffer) ColumnGroupDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionColumnPage)

Example 12 with FixedLengthDimensionColumnPage

use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.

the class IncludeFilterExecuterImplTest method testRangBinarySearch.

@Test
public void testRangBinarySearch() {
    long oldTime = 0;
    long newTime = 0;
    long start;
    long end;
    // dimension's data number in a blocklet, usually default is 32000
    int dataChunkSize = 32000;
    // repeat query times in the test
    int queryTimes = 10000;
    // repeated times for a dictionary value
    int repeatTimes = 200;
    // filtered value count in a blocklet
    int filteredValueCnt = 800;
    // column dictionary size
    int dimColumnSize = 2;
    FixedLengthDimensionColumnPage dimensionColumnDataChunk;
    DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
    byte[] dataChunk = new byte[dataChunkSize * dimColumnSize];
    for (int i = 0; i < dataChunkSize; i++) {
        if (i % repeatTimes == 0) {
            repeatTimes++;
        }
        byte[] data = transferIntToByteArr(repeatTimes, dimColumnSize);
        dataChunk[2 * i] = data[0];
        dataChunk[2 * i + 1] = data[1];
    }
    byte[][] filterKeys = new byte[filteredValueCnt][2];
    for (int k = 0; k < filteredValueCnt; k++) {
        filterKeys[k] = transferIntToByteArr(100 + k, dimColumnSize);
    }
    dim.setFilterKeys(filterKeys);
    dimensionColumnDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / dimColumnSize, dimColumnSize);
    // initial to run
    BitSet bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize, filterKeys);
    BitSet bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize, filterKeys);
    // performance run
    for (int j = 0; j < queryTimes; j++) {
        start = System.currentTimeMillis();
        bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize, filterKeys);
        end = System.currentTimeMillis();
        oldTime = oldTime + end - start;
        start = System.currentTimeMillis();
        bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize, filterKeys);
        end = System.currentTimeMillis();
        newTime = newTime + end - start;
        assertTrue(bitOld.equals(bitNew));
    }
    System.out.println("old code performance time: " + oldTime + " ms");
    System.out.println("new code performance time: " + newTime + " ms");
}
Also used : FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) BitSet(java.util.BitSet) Test(org.junit.Test)

Example 13 with FixedLengthDimensionColumnPage

use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.

the class CarbonUtilTest method testToGetnextGreaterValueToTarget.

@Test
public void testToGetnextGreaterValueToTarget() {
    byte[] dataChunks = { 5, 6, 7, 7, 7 };
    byte[] compareValues = { 7 };
    FixedLengthDimensionColumnPage fixedLengthDataChunk = new FixedLengthDimensionColumnPage(dataChunks, null, null, 5, 1, dataChunks.length);
    int result = CarbonUtil.nextGreaterValueToTarget(2, fixedLengthDataChunk, compareValues, 5);
    assertEquals(result, 5);
}
Also used : FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) Test(org.junit.Test)

Example 14 with FixedLengthDimensionColumnPage

use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.

the class CarbonUtilTest method testToGetFirstIndexUsingBinarySearchWithCompareTo0.

@Test
public void testToGetFirstIndexUsingBinarySearchWithCompareTo0() {
    byte[] dataChunks = { 10, 10, 10, 40, 50, 60 };
    byte[] compareValue = { 10 };
    FixedLengthDimensionColumnPage fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunks, null, null, 6, 1, dataChunks.length);
    int result = CarbonUtil.getFirstIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 1, 3, compareValue, false);
    assertEquals(0, result);
}
Also used : FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) Test(org.junit.Test)

Example 15 with FixedLengthDimensionColumnPage

use of org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage in project carbondata by apache.

the class CarbonUtilTest method testBinaryRangeSearch.

@Test
public void testBinaryRangeSearch() {
    byte[] dataChunk = new byte[10];
    FixedLengthDimensionColumnPage fixedLengthDimensionDataChunk;
    byte[] keyWord = new byte[1];
    int[] range;
    dataChunk = "abbcccddddeffgggh".getBytes();
    byte[][] dataArr = new byte[dataChunk.length / keyWord.length][keyWord.length];
    fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
    for (int ii = 0; ii < dataChunk.length / keyWord.length; ii++) {
        dataArr[ii] = fixedLengthDimensionDataChunk.getChunkData(ii);
    }
    keyWord[0] = Byte.valueOf("97");
    int[] expectRangeIndex = new int[2];
    expectRangeIndex[0] = 0;
    expectRangeIndex[1] = 0;
    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
    keyWord[0] = Byte.valueOf("104");
    expectRangeIndex = new int[2];
    expectRangeIndex[0] = 16;
    expectRangeIndex[1] = 16;
    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
    keyWord[0] = Byte.valueOf("101");
    expectRangeIndex = new int[2];
    expectRangeIndex[0] = 10;
    expectRangeIndex[1] = 10;
    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
    keyWord[0] = Byte.valueOf("99");
    expectRangeIndex = new int[2];
    expectRangeIndex[0] = 3;
    expectRangeIndex[1] = 5;
    assertRangeIndex(dataArr, dataChunk, fixedLengthDimensionDataChunk, keyWord, expectRangeIndex);
    dataChunk = "ab".getBytes();
    fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
    keyWord[0] = Byte.valueOf("97");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(0, range[0]);
    assertEquals(0, range[1]);
    keyWord[0] = Byte.valueOf("98");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(1, range[0]);
    assertEquals(1, range[1]);
    dataChunk = "aabb".getBytes();
    fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
    keyWord[0] = Byte.valueOf("97");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(0, range[0]);
    assertEquals(1, range[1]);
    keyWord[0] = Byte.valueOf("98");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(2, range[0]);
    assertEquals(3, range[1]);
    dataChunk = "a".getBytes();
    fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
    keyWord[0] = Byte.valueOf("97");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(0, range[0]);
    assertEquals(0, range[1]);
    dataChunk = "aa".getBytes();
    fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
    keyWord[0] = Byte.valueOf("97");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(0, range[0]);
    assertEquals(1, range[1]);
    dataChunk = "aabbbbbbbbbbcc".getBytes();
    fixedLengthDimensionDataChunk = new FixedLengthDimensionColumnPage(dataChunk, null, null, dataChunk.length / keyWord.length, keyWord.length, dataChunk.length);
    keyWord[0] = Byte.valueOf("98");
    range = CarbonUtil.getRangeIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 0, dataChunk.length - 1, keyWord);
    assertEquals(2, range[0]);
    assertEquals(11, range[1]);
}
Also used : FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) Test(org.junit.Test)

Aggregations

FixedLengthDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage)19 Test (org.junit.Test)13 BitSet (java.util.BitSet)4 DimensionColumnPage (org.apache.carbondata.core.datastore.chunk.DimensionColumnPage)4 VariableLengthDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage)4 ByteBuffer (java.nio.ByteBuffer)2 ColumnGroupDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionColumnPage)2 FileReader (org.apache.carbondata.core.datastore.FileReader)1 DimensionChunkStoreFactory (org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory)1 DataChunk (org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk)1 DataChunk2 (org.apache.carbondata.format.DataChunk2)1