Search in sources :

Example 1 with EncodedTablePage

use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

public static BlockletIndex getBlockletIndex(List<EncodedTablePage> encodedTablePageList, List<CarbonMeasure> carbonMeasureList) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    // Calculating min/max for every each column.
    TablePageStatistics stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), encodedTablePageList.get(0).getMeasures());
    byte[][] minCol = stats.getDimensionMinValue().clone();
    byte[][] maxCol = stats.getDimensionMaxValue().clone();
    for (EncodedTablePage encodedTablePage : encodedTablePageList) {
        stats = new TablePageStatistics(encodedTablePage.getDimensions(), encodedTablePage.getMeasures());
        byte[][] columnMaxData = stats.getDimensionMaxValue();
        byte[][] columnMinData = stats.getDimensionMinValue();
        for (int i = 0; i < maxCol.length; i++) {
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) {
                maxCol[i] = columnMaxData[i];
            }
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMinData[i], minCol[i]) < 0) {
                minCol[i] = columnMinData[i];
            }
        }
    }
    // Writing min/max to thrift file
    for (byte[] max : maxCol) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : minCol) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), encodedTablePageList.get(0).getMeasures());
    byte[][] measureMaxValue = stats.getMeasureMaxValue().clone();
    byte[][] measureMinValue = stats.getMeasureMinValue().clone();
    byte[] minVal = null;
    byte[] maxVal = null;
    for (int i = 1; i < encodedTablePageList.size(); i++) {
        for (int j = 0; j < measureMinValue.length; j++) {
            stats = new TablePageStatistics(encodedTablePageList.get(i).getDimensions(), encodedTablePageList.get(i).getMeasures());
            minVal = stats.getMeasureMinValue()[j];
            maxVal = stats.getMeasureMaxValue()[j];
            if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) {
                measureMaxValue[j] = maxVal.clone();
            }
            if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
                measureMinValue[j] = minVal.clone();
            }
        }
    }
    for (byte[] max : measureMaxValue) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : measureMinValue) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    byte[] startKey = encodedTablePageList.get(0).getPageKey().serializeStartKey();
    blockletBTreeIndex.setStart_key(startKey);
    byte[] endKey = encodedTablePageList.get(encodedTablePageList.size() - 1).getPageKey().serializeEndKey();
    blockletBTreeIndex.setEnd_key(endKey);
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) TablePageStatistics(org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics) EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage)

Example 2 with EncodedTablePage

use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeBlockletToFile.

/**
 * Write the collect blocklet data (blockletDataHolder) to file
 */
private void writeBlockletToFile() {
    // get the list of all encoded table page
    List<EncodedTablePage> encodedTablePageList = blockletDataHolder.getEncodedTablePages();
    int numDimensions = encodedTablePageList.get(0).getNumDimensions();
    int numMeasures = encodedTablePageList.get(0).getNumMeasures();
    // get data chunks for all the column
    byte[][] dataChunkBytes = new byte[numDimensions + numMeasures][];
    long metadataSize = fillDataChunk(encodedTablePageList, dataChunkBytes);
    // calculate the total size of data to be written
    long blockletSize = blockletDataHolder.getSize() + metadataSize;
    // to check if data size will exceed the block size then create a new file
    createNewFileIfReachThreshold(blockletSize);
    // write data to file
    try {
        if (currentOffsetInFile == 0) {
            // write the header if file is empty
            writeHeaderToFile();
        }
        writeBlockletToFile(dataChunkBytes);
        if (listener != null) {
            listener.onBlockletEnd(blockletId++);
        }
        pageId = 0;
    } catch (IOException e) {
        LOGGER.error(e, "Problem while writing file");
        throw new CarbonDataWriterException("Problem while writing file", e);
    } finally {
        // clear the data holder
        blockletDataHolder.clear();
    }
}
Also used : EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage) IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)

Example 3 with EncodedTablePage

use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeBlockletToFile.

/**
 * Write one blocklet data into file
 * File format:
 * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
 * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
 * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
 * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
 */
private void writeBlockletToFile(byte[][] dataChunkBytes) throws IOException {
    long offset = currentOffsetInFile;
    // to maintain the offset of each data chunk in blocklet
    List<Long> currentDataChunksOffset = new ArrayList<>();
    // to maintain the length of each data chunk in blocklet
    List<Integer> currentDataChunksLength = new ArrayList<>();
    List<EncodedTablePage> encodedTablePages = blockletDataHolder.getEncodedTablePages();
    int numberOfDimension = encodedTablePages.get(0).getNumDimensions();
    int numberOfMeasures = encodedTablePages.get(0).getNumMeasures();
    ByteBuffer buffer = null;
    long dimensionOffset = 0;
    long measureOffset = 0;
    int numberOfRows = 0;
    // calculate the number of rows in each blocklet
    for (EncodedTablePage encodedTablePage : encodedTablePages) {
        numberOfRows += encodedTablePage.getPageSize();
    }
    for (int i = 0; i < numberOfDimension; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add(dataChunkBytes[i].length);
        buffer = ByteBuffer.wrap(dataChunkBytes[i]);
        currentOffsetInFile += fileChannel.write(buffer);
        offset += dataChunkBytes[i].length;
        for (EncodedTablePage encodedTablePage : encodedTablePages) {
            EncodedColumnPage dimension = encodedTablePage.getDimension(i);
            buffer = dimension.getEncodedData();
            int bufferSize = buffer.limit();
            currentOffsetInFile += fileChannel.write(buffer);
            offset += bufferSize;
        }
    }
    dimensionOffset = offset;
    int dataChunkStartIndex = encodedTablePages.get(0).getNumDimensions();
    for (int i = 0; i < numberOfMeasures; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
        buffer = ByteBuffer.wrap(dataChunkBytes[dataChunkStartIndex]);
        currentOffsetInFile += fileChannel.write(buffer);
        offset += dataChunkBytes[dataChunkStartIndex].length;
        dataChunkStartIndex++;
        for (EncodedTablePage encodedTablePage : encodedTablePages) {
            EncodedColumnPage measure = encodedTablePage.getMeasure(i);
            buffer = measure.getEncodedData();
            int bufferSize = buffer.limit();
            currentOffsetInFile += fileChannel.write(buffer);
            offset += bufferSize;
        }
    }
    measureOffset = offset;
    blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(encodedTablePages, model.getSegmentProperties().getMeasures()));
    BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, blockletDataHolder.getEncodedTablePages().size());
    blockletMetadata.add(blockletInfo3);
}
Also used : EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3) ArrayList(java.util.ArrayList) EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage) ByteBuffer(java.nio.ByteBuffer)

Example 4 with EncodedTablePage

use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.

the class BlockletDataHolder method addPage.

public void addPage(TablePage rawTablePage) {
    EncodedTablePage encodedTablePage = rawTablePage.getEncodedTablePage();
    this.encodedTablePage.add(encodedTablePage);
    currentSize += encodedTablePage.getEncodedSize();
}
Also used : EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage)

Example 5 with EncodedTablePage

use of org.apache.carbondata.core.datastore.page.EncodedTablePage in project carbondata by apache.

the class CarbonMetadataUtilTest method testConvertFileFooter.

@Test
public void testConvertFileFooter() throws Exception {
    int[] cardinality = { 1, 2, 3, 4, 5 };
    org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
    org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema1 = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
    List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> columnSchemaList = new ArrayList<>();
    columnSchemaList.add(colSchema);
    columnSchemaList.add(colSchema1);
    SegmentProperties segmentProperties = new SegmentProperties(columnSchemaList, cardinality);
    final EncodedColumnPage measure = new EncodedColumnPage(new DataChunk2(), new byte[] { 0, 1 }, PrimitivePageStatsCollector.newInstance(org.apache.carbondata.core.metadata.datatype.DataTypes.BYTE));
    new MockUp<EncodedTablePage>() {

        @SuppressWarnings("unused")
        @Mock
        public EncodedColumnPage getMeasure(int measureIndex) {
            return measure;
        }
    };
    new MockUp<TablePageKey>() {

        @SuppressWarnings("unused")
        @Mock
        public byte[] serializeStartKey() {
            return new byte[] { 1, 2 };
        }

        @SuppressWarnings("unused")
        @Mock
        public byte[] serializeEndKey() {
            return new byte[] { 1, 2 };
        }
    };
    TablePageKey key = new TablePageKey(3, segmentProperties, false);
    EncodedTablePage encodedTablePage = EncodedTablePage.newInstance(3, new EncodedColumnPage[0], new EncodedColumnPage[0], key);
    List<EncodedTablePage> encodedTablePageList = new ArrayList<>();
    encodedTablePageList.add(encodedTablePage);
    BlockletInfo3 blockletInfoColumnar1 = new BlockletInfo3();
    List<BlockletInfo3> blockletInfoColumnarList = new ArrayList<>();
    blockletInfoColumnarList.add(blockletInfoColumnar1);
    byte[] byteMaxArr = "1".getBytes();
    byte[] byteMinArr = "2".getBytes();
    BlockletIndex index = getBlockletIndex(encodedTablePageList, segmentProperties.getMeasures());
    List<BlockletIndex> indexList = new ArrayList<>();
    indexList.add(index);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(byteMaxArr));
    blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(byteMinArr));
    FileFooter3 footer = convertFileFooterVersion3(blockletInfoColumnarList, indexList, cardinality, 2);
    assertEquals(footer.getBlocklet_index_list(), indexList);
}
Also used : BlockletIndex(org.apache.carbondata.format.BlockletIndex) CarbonMetadataUtil.getBlockletIndex(org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockletIndex) ArrayList(java.util.ArrayList) EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) TablePageKey(org.apache.carbondata.core.datastore.page.key.TablePageKey) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) FileFooter3(org.apache.carbondata.format.FileFooter3) EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3) DataChunk2(org.apache.carbondata.format.DataChunk2) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) Test(org.junit.Test)

Aggregations

EncodedTablePage (org.apache.carbondata.core.datastore.page.EncodedTablePage)5 ArrayList (java.util.ArrayList)2 EncodedColumnPage (org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage)2 BlockletIndex (org.apache.carbondata.format.BlockletIndex)2 BlockletInfo3 (org.apache.carbondata.format.BlockletInfo3)2 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 MockUp (mockit.MockUp)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 CarbonDataWriterException (org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)1 TablePageKey (org.apache.carbondata.core.datastore.page.key.TablePageKey)1 TablePageStatistics (org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics)1 CarbonMetadataUtil.getBlockletIndex (org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockletIndex)1 BlockletBTreeIndex (org.apache.carbondata.format.BlockletBTreeIndex)1 ColumnSchema (org.apache.carbondata.format.ColumnSchema)1 DataChunk2 (org.apache.carbondata.format.DataChunk2)1 FileFooter3 (org.apache.carbondata.format.FileFooter3)1 Test (org.junit.Test)1