Search in sources :

Example 1 with EncodedColumnPage

use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeBlockletToFile.

/**
 * Write one blocklet data into file
 * File format:
 * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
 * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
 * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
 * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
 */
private void writeBlockletToFile(byte[][] dataChunkBytes) throws IOException {
    long offset = currentOffsetInFile;
    // to maintain the offset of each data chunk in blocklet
    List<Long> currentDataChunksOffset = new ArrayList<>();
    // to maintain the length of each data chunk in blocklet
    List<Integer> currentDataChunksLength = new ArrayList<>();
    List<EncodedTablePage> encodedTablePages = blockletDataHolder.getEncodedTablePages();
    int numberOfDimension = encodedTablePages.get(0).getNumDimensions();
    int numberOfMeasures = encodedTablePages.get(0).getNumMeasures();
    ByteBuffer buffer = null;
    long dimensionOffset = 0;
    long measureOffset = 0;
    int numberOfRows = 0;
    // calculate the number of rows in each blocklet
    for (EncodedTablePage encodedTablePage : encodedTablePages) {
        numberOfRows += encodedTablePage.getPageSize();
    }
    for (int i = 0; i < numberOfDimension; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add(dataChunkBytes[i].length);
        buffer = ByteBuffer.wrap(dataChunkBytes[i]);
        currentOffsetInFile += fileChannel.write(buffer);
        offset += dataChunkBytes[i].length;
        for (EncodedTablePage encodedTablePage : encodedTablePages) {
            EncodedColumnPage dimension = encodedTablePage.getDimension(i);
            buffer = dimension.getEncodedData();
            int bufferSize = buffer.limit();
            currentOffsetInFile += fileChannel.write(buffer);
            offset += bufferSize;
        }
    }
    dimensionOffset = offset;
    int dataChunkStartIndex = encodedTablePages.get(0).getNumDimensions();
    for (int i = 0; i < numberOfMeasures; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
        buffer = ByteBuffer.wrap(dataChunkBytes[dataChunkStartIndex]);
        currentOffsetInFile += fileChannel.write(buffer);
        offset += dataChunkBytes[dataChunkStartIndex].length;
        dataChunkStartIndex++;
        for (EncodedTablePage encodedTablePage : encodedTablePages) {
            EncodedColumnPage measure = encodedTablePage.getMeasure(i);
            buffer = measure.getEncodedData();
            int bufferSize = buffer.limit();
            currentOffsetInFile += fileChannel.write(buffer);
            offset += bufferSize;
        }
    }
    measureOffset = offset;
    blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(encodedTablePages, model.getSegmentProperties().getMeasures()));
    BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, blockletDataHolder.getEncodedTablePages().size());
    blockletMetadata.add(blockletInfo3);
}
Also used : EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3) ArrayList(java.util.ArrayList) EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage) ByteBuffer(java.nio.ByteBuffer)

Example 2 with EncodedColumnPage

use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.

the class TablePage method encodeAndCompressDimensions.

// apply and compress each dimension, set encoded data in `encodedData`
private EncodedColumnPage[] encodeAndCompressDimensions() throws KeyGenException, IOException, MemoryException {
    List<EncodedColumnPage> encodedDimensions = new ArrayList<>();
    List<EncodedColumnPage> encodedComplexDimenions = new ArrayList<>();
    TableSpec tableSpec = model.getTableSpec();
    int dictIndex = 0;
    int noDictIndex = 0;
    int complexDimIndex = 0;
    int numDimensions = tableSpec.getNumDimensions();
    for (int i = 0; i < numDimensions; i++) {
        ColumnPageEncoder columnPageEncoder;
        EncodedColumnPage encodedPage;
        TableSpec.DimensionSpec spec = tableSpec.getDimensionSpec(i);
        switch(spec.getColumnType()) {
            case GLOBAL_DICTIONARY:
            case DIRECT_DICTIONARY:
                columnPageEncoder = encodingFactory.createEncoder(spec, dictDimensionPages[dictIndex]);
                encodedPage = columnPageEncoder.encode(dictDimensionPages[dictIndex++]);
                encodedDimensions.add(encodedPage);
                break;
            case PLAIN_VALUE:
                columnPageEncoder = encodingFactory.createEncoder(spec, noDictDimensionPages[noDictIndex]);
                encodedPage = columnPageEncoder.encode(noDictDimensionPages[noDictIndex++]);
                encodedDimensions.add(encodedPage);
                break;
            case COMPLEX:
                EncodedColumnPage[] encodedPages = ColumnPageEncoder.encodeComplexColumn(complexDimensionPages[complexDimIndex++]);
                encodedComplexDimenions.addAll(Arrays.asList(encodedPages));
                break;
            default:
                throw new IllegalArgumentException("unsupported dimension type:" + spec.getColumnType());
        }
    }
    encodedDimensions.addAll(encodedComplexDimenions);
    return encodedDimensions.toArray(new EncodedColumnPage[encodedDimensions.size()]);
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) TableSpec(org.apache.carbondata.core.datastore.TableSpec) EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) ArrayList(java.util.ArrayList)

Example 3 with EncodedColumnPage

use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.

the class TablePage method encodeAndCompressMeasures.

// apply measure and set encodedData in `encodedData`
private EncodedColumnPage[] encodeAndCompressMeasures() throws MemoryException, IOException {
    EncodedColumnPage[] encodedMeasures = new EncodedColumnPage[measurePages.length];
    for (int i = 0; i < measurePages.length; i++) {
        ColumnPageEncoder encoder = encodingFactory.createEncoder(model.getTableSpec().getMeasureSpec(i), measurePages[i]);
        encodedMeasures[i] = encoder.encode(measurePages[i]);
    }
    return encodedMeasures;
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage)

Example 4 with EncodedColumnPage

use of org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage in project carbondata by apache.

the class CarbonMetadataUtilTest method testConvertFileFooter.

@Test
public void testConvertFileFooter() throws Exception {
    int[] cardinality = { 1, 2, 3, 4, 5 };
    org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
    org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema1 = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
    List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> columnSchemaList = new ArrayList<>();
    columnSchemaList.add(colSchema);
    columnSchemaList.add(colSchema1);
    SegmentProperties segmentProperties = new SegmentProperties(columnSchemaList, cardinality);
    final EncodedColumnPage measure = new EncodedColumnPage(new DataChunk2(), new byte[] { 0, 1 }, PrimitivePageStatsCollector.newInstance(org.apache.carbondata.core.metadata.datatype.DataTypes.BYTE));
    new MockUp<EncodedTablePage>() {

        @SuppressWarnings("unused")
        @Mock
        public EncodedColumnPage getMeasure(int measureIndex) {
            return measure;
        }
    };
    new MockUp<TablePageKey>() {

        @SuppressWarnings("unused")
        @Mock
        public byte[] serializeStartKey() {
            return new byte[] { 1, 2 };
        }

        @SuppressWarnings("unused")
        @Mock
        public byte[] serializeEndKey() {
            return new byte[] { 1, 2 };
        }
    };
    TablePageKey key = new TablePageKey(3, segmentProperties, false);
    EncodedTablePage encodedTablePage = EncodedTablePage.newInstance(3, new EncodedColumnPage[0], new EncodedColumnPage[0], key);
    List<EncodedTablePage> encodedTablePageList = new ArrayList<>();
    encodedTablePageList.add(encodedTablePage);
    BlockletInfo3 blockletInfoColumnar1 = new BlockletInfo3();
    List<BlockletInfo3> blockletInfoColumnarList = new ArrayList<>();
    blockletInfoColumnarList.add(blockletInfoColumnar1);
    byte[] byteMaxArr = "1".getBytes();
    byte[] byteMinArr = "2".getBytes();
    BlockletIndex index = getBlockletIndex(encodedTablePageList, segmentProperties.getMeasures());
    List<BlockletIndex> indexList = new ArrayList<>();
    indexList.add(index);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(byteMaxArr));
    blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(byteMinArr));
    FileFooter3 footer = convertFileFooterVersion3(blockletInfoColumnarList, indexList, cardinality, 2);
    assertEquals(footer.getBlocklet_index_list(), indexList);
}
Also used : BlockletIndex(org.apache.carbondata.format.BlockletIndex) CarbonMetadataUtil.getBlockletIndex(org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockletIndex) ArrayList(java.util.ArrayList) EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) TablePageKey(org.apache.carbondata.core.datastore.page.key.TablePageKey) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) FileFooter3(org.apache.carbondata.format.FileFooter3) EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3) DataChunk2(org.apache.carbondata.format.DataChunk2) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) Test(org.junit.Test)

Aggregations

EncodedColumnPage (org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage)4 ArrayList (java.util.ArrayList)3 EncodedTablePage (org.apache.carbondata.core.datastore.page.EncodedTablePage)2 ColumnPageEncoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder)2 BlockletInfo3 (org.apache.carbondata.format.BlockletInfo3)2 ByteBuffer (java.nio.ByteBuffer)1 MockUp (mockit.MockUp)1 TableSpec (org.apache.carbondata.core.datastore.TableSpec)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 TablePageKey (org.apache.carbondata.core.datastore.page.key.TablePageKey)1 CarbonMetadataUtil.getBlockletIndex (org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockletIndex)1 BlockletIndex (org.apache.carbondata.format.BlockletIndex)1 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)1 ColumnSchema (org.apache.carbondata.format.ColumnSchema)1 DataChunk2 (org.apache.carbondata.format.DataChunk2)1 FileFooter3 (org.apache.carbondata.format.FileFooter3)1 Test (org.junit.Test)1