Search in sources :

Example 1 with BlockletBTreeIndex

use of org.apache.carbondata.format.BlockletBTreeIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

public static BlockletIndex getBlockletIndex(List<NodeHolder> nodeHolderList, List<CarbonMeasure> carbonMeasureList) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    // Calculating min/max for every each column.
    byte[][] minCol = nodeHolderList.get(0).getColumnMinData().clone();
    byte[][] maxCol = nodeHolderList.get(0).getColumnMaxData().clone();
    for (NodeHolder nodeHolder : nodeHolderList) {
        byte[][] columnMaxData = nodeHolder.getColumnMaxData();
        byte[][] columnMinData = nodeHolder.getColumnMinData();
        for (int i = 0; i < maxCol.length; i++) {
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) {
                maxCol[i] = columnMaxData[i];
            }
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMinData[i], minCol[i]) < 0) {
                minCol[i] = columnMinData[i];
            }
        }
    }
    // Writing min/max to thrift file
    for (byte[] max : maxCol) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : minCol) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    byte[][] measureMaxValue = nodeHolderList.get(0).getMeasureColumnMaxData().clone();
    byte[][] measureMinValue = nodeHolderList.get(0).getMeasureColumnMinData().clone();
    byte[] minVal = null;
    byte[] maxVal = null;
    for (int i = 1; i < nodeHolderList.size(); i++) {
        for (int j = 0; j < measureMinValue.length; j++) {
            minVal = nodeHolderList.get(i).getMeasureColumnMinData()[j];
            maxVal = nodeHolderList.get(i).getMeasureColumnMaxData()[j];
            if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) {
                measureMaxValue[j] = maxVal.clone();
            }
            if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
                measureMinValue[j] = minVal.clone();
            }
        }
    }
    for (byte[] max : measureMaxValue) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : measureMinValue) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key(nodeHolderList.get(0).getStartKey());
    blockletBTreeIndex.setEnd_key(nodeHolderList.get(nodeHolderList.size() - 1).getEndKey());
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Example 2 with BlockletBTreeIndex

use of org.apache.carbondata.format.BlockletBTreeIndex in project carbondata by apache.

the class DataFileFooterConverterTest method testReadDataFileFooter.

@Test
public void testReadDataFileFooter() throws Exception {
    DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
    DataFileFooter dataFileFooter = new DataFileFooter();
    List<Integer> column_cardinalities = new ArrayList<>();
    column_cardinalities.add(new Integer("1"));
    column_cardinalities.add(new Integer("2"));
    column_cardinalities.add(new Integer("3"));
    org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    final List<ColumnSchema> columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
    org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
    List<org.apache.carbondata.format.BlockletIndex> blockletIndexArrayList = new ArrayList<>();
    blockletIndexArrayList.add(blockletIndex1);
    org.apache.carbondata.format.BlockletInfo blockletInfo = new org.apache.carbondata.format.BlockletInfo();
    List<org.apache.carbondata.format.BlockletInfo> blockletInfoArrayList = new ArrayList<>();
    blockletInfoArrayList.add(blockletInfo);
    final FileFooter fileFooter = new FileFooter(1, 3, columnSchemas, segmentInfo1, blockletIndexArrayList);
    fileFooter.setBlocklet_info_list(blockletInfoArrayList);
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key("1".getBytes());
    blockletBTreeIndex.setEnd_key("3".getBytes());
    blockletIndex1.setB_tree_index(blockletBTreeIndex);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
    blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
    blockletIndex1.setMin_max_index(blockletMinMaxIndex);
    new MockUp<FileFactory>() {

        @SuppressWarnings("unused")
        @Mock
        public FileFactory.FileType getFileType(String path) {
            return FileFactory.FileType.LOCAL;
        }

        @SuppressWarnings("unused")
        @Mock
        public FileReader getFileHolder(FileFactory.FileType fileType) {
            return new FileReaderImpl();
        }
    };
    new MockUp<FileReaderImpl>() {

        @SuppressWarnings("unused")
        @Mock
        public long readLong(String filePath, long offset) {
            return 1;
        }
    };
    new MockUp<CarbonFooterReader>() {

        @SuppressWarnings("unused")
        @Mock
        public FileFooter readFooter() throws IOException {
            return fileFooter;
        }
    };
    SegmentInfo segmentInfo = new SegmentInfo();
    int[] arr = { 1, 2, 3 };
    segmentInfo.setColumnCardinality(arr);
    dataFileFooter.setNumberOfRows(3);
    dataFileFooter.setSegmentInfo(segmentInfo);
    TableBlockInfo info = new TableBlockInfo("/file.carbondata", 1, "0", new String[0], 1, ColumnarFormatVersion.V1, null);
    DataFileFooter result = dataFileFooterConverter.readDataFileFooter(info);
    assertEquals(result.getNumberOfRows(), 3);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) FileReaderImpl(org.apache.carbondata.core.datastore.impl.FileReaderImpl) FileFooter(org.apache.carbondata.format.FileFooter) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) Encoding(org.apache.carbondata.format.Encoding) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo) Test(org.junit.Test)

Example 3 with BlockletBTreeIndex

use of org.apache.carbondata.format.BlockletBTreeIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

public static BlockletIndex getBlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex info) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    for (int i = 0; i < info.getMinMaxIndex().getMaxValues().length; i++) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(info.getMinMaxIndex().getMaxValues()[i]));
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(info.getMinMaxIndex().getMinValues()[i]));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key(info.getBtreeIndex().getStartKey());
    blockletBTreeIndex.setEnd_key(info.getBtreeIndex().getEndKey());
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Example 4 with BlockletBTreeIndex

use of org.apache.carbondata.format.BlockletBTreeIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

public static BlockletIndex getBlockletIndex(List<EncodedTablePage> encodedTablePageList, List<CarbonMeasure> carbonMeasureList) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    // Calculating min/max for every each column.
    TablePageStatistics stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), encodedTablePageList.get(0).getMeasures());
    byte[][] minCol = stats.getDimensionMinValue().clone();
    byte[][] maxCol = stats.getDimensionMaxValue().clone();
    for (EncodedTablePage encodedTablePage : encodedTablePageList) {
        stats = new TablePageStatistics(encodedTablePage.getDimensions(), encodedTablePage.getMeasures());
        byte[][] columnMaxData = stats.getDimensionMaxValue();
        byte[][] columnMinData = stats.getDimensionMinValue();
        for (int i = 0; i < maxCol.length; i++) {
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) {
                maxCol[i] = columnMaxData[i];
            }
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMinData[i], minCol[i]) < 0) {
                minCol[i] = columnMinData[i];
            }
        }
    }
    // Writing min/max to thrift file
    for (byte[] max : maxCol) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : minCol) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    stats = new TablePageStatistics(encodedTablePageList.get(0).getDimensions(), encodedTablePageList.get(0).getMeasures());
    byte[][] measureMaxValue = stats.getMeasureMaxValue().clone();
    byte[][] measureMinValue = stats.getMeasureMinValue().clone();
    byte[] minVal = null;
    byte[] maxVal = null;
    for (int i = 1; i < encodedTablePageList.size(); i++) {
        for (int j = 0; j < measureMinValue.length; j++) {
            stats = new TablePageStatistics(encodedTablePageList.get(i).getDimensions(), encodedTablePageList.get(i).getMeasures());
            minVal = stats.getMeasureMinValue()[j];
            maxVal = stats.getMeasureMaxValue()[j];
            if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) {
                measureMaxValue[j] = maxVal.clone();
            }
            if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
                measureMinValue[j] = minVal.clone();
            }
        }
    }
    for (byte[] max : measureMaxValue) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : measureMinValue) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    byte[] startKey = encodedTablePageList.get(0).getPageKey().serializeStartKey();
    blockletBTreeIndex.setStart_key(startKey);
    byte[] endKey = encodedTablePageList.get(encodedTablePageList.size() - 1).getPageKey().serializeEndKey();
    blockletBTreeIndex.setEnd_key(endKey);
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) TablePageStatistics(org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics) EncodedTablePage(org.apache.carbondata.core.datastore.page.EncodedTablePage)

Example 5 with BlockletBTreeIndex

use of org.apache.carbondata.format.BlockletBTreeIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

private static BlockletIndex getBlockletIndex(BlockletInfoColumnar info) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    for (byte[] max : info.getColumnMaxData()) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : info.getColumnMinData()) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key(info.getStartKey());
    blockletBTreeIndex.setEnd_key(info.getEndKey());
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Aggregations

BlockletBTreeIndex (org.apache.carbondata.format.BlockletBTreeIndex)7 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)7 BlockletIndex (org.apache.carbondata.format.BlockletIndex)5 ArrayList (java.util.ArrayList)2 MockUp (mockit.MockUp)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)2 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)2 ColumnSchema (org.apache.carbondata.format.ColumnSchema)2 Encoding (org.apache.carbondata.format.Encoding)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)1 FileReaderImpl (org.apache.carbondata.core.datastore.impl.FileReaderImpl)1 EncodedTablePage (org.apache.carbondata.core.datastore.page.EncodedTablePage)1 TablePageStatistics (org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics)1 ThriftReader (org.apache.carbondata.core.reader.ThriftReader)1 BlockIndex (org.apache.carbondata.format.BlockIndex)1 FileFooter (org.apache.carbondata.format.FileFooter)1