Search in sources :

Example 11 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class SegmentIndexFileStore method readIndexAndFillBlockletInfo.

/**
 * This method will read the index information from carbon index file
 *
 * @param indexFile
 * @return
 * @throws IOException
 */
private void readIndexAndFillBlockletInfo(CarbonFile indexFile) throws IOException {
    // flag to take decision whether carbondata file footer reading is required.
    // If the index file does not contain the file footer then carbondata file footer
    // read is required else not required
    boolean isCarbonDataFileFooterReadRequired = true;
    List<BlockletInfo> blockletInfoList = null;
    List<BlockIndex> blockIndexThrift = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    try {
        indexReader.openThriftReader(indexFile.getCanonicalPath());
        // get the index header
        org.apache.carbondata.format.IndexHeader indexHeader = indexReader.readIndexHeader();
        DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
        String filePath = indexFile.getCanonicalPath();
        String parentPath = filePath.substring(0, filePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
        while (indexReader.hasNext()) {
            BlockIndex blockIndex = indexReader.readBlockIndexInfo();
            if (blockIndex.isSetBlocklet_info()) {
                // this case will come in case segment index compaction property is set to false from the
                // application and alter table segment index compaction is run manually. In that case
                // blocklet info will be present in the index but read carbon data file footer property
                // will be true
                isCarbonDataFileFooterReadRequired = false;
                break;
            } else {
                TableBlockInfo blockInfo = fileFooterConverter.getTableBlockInfo(blockIndex, indexHeader, parentPath);
                blockletInfoList = getBlockletInfoFromIndexInfo(blockInfo);
            }
            // the same entry with different blocklet info need to be repeated
            for (int i = 0; i < blockletInfoList.size(); i++) {
                BlockIndex blockIndexReplica = blockIndex.deepCopy();
                BlockletInfo blockletInfo = blockletInfoList.get(i);
                blockIndexReplica.setBlock_index(CarbonMetadataUtil.getBlockletIndex(blockletInfo.getBlockletIndex()));
                blockIndexReplica.setBlocklet_info(CarbonMetadataUtil.getBlocletInfo3(blockletInfo));
                blockIndexThrift.add(blockIndexReplica);
            }
        }
        // read complete file at once
        if (!isCarbonDataFileFooterReadRequired) {
            readIndexFile(indexFile);
        } else {
            int totalSize = 0;
            List<byte[]> blockIndexByteArrayList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
            byte[] indexHeaderBytes = CarbonUtil.getByteArray(indexHeader);
            totalSize += indexHeaderBytes.length;
            blockIndexByteArrayList.add(indexHeaderBytes);
            for (BlockIndex blockIndex : blockIndexThrift) {
                byte[] indexInfoBytes = CarbonUtil.getByteArray(blockIndex);
                totalSize += indexInfoBytes.length;
                blockIndexByteArrayList.add(indexInfoBytes);
            }
            ByteBuffer byteBuffer = ByteBuffer.allocate(totalSize);
            for (byte[] blockIndexBytes : blockIndexByteArrayList) {
                byteBuffer.put(blockIndexBytes);
            }
            carbonIndexMap.put(indexFile.getName(), byteBuffer.array());
        }
    } finally {
        indexReader.closeThriftReader();
    }
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ArrayList(java.util.ArrayList) BlockIndex(org.apache.carbondata.format.BlockIndex) MergedBlockIndex(org.apache.carbondata.format.MergedBlockIndex) ByteBuffer(java.nio.ByteBuffer)

Example 12 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class DataFileFooterConverterTest method testGetIndexInfo.

@Test
public void testGetIndexInfo() throws Exception {
    DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
    final ThriftReader thriftReader = new ThriftReader("file");
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    final List<ColumnSchema> columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
    final BlockIndex blockIndex = new BlockIndex();
    blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
    org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key("1".getBytes());
    blockletBTreeIndex.setEnd_key("3".getBytes());
    blockletIndex1.setB_tree_index(blockletBTreeIndex);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
    blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
    blockletIndex1.setMin_max_index(blockletMinMaxIndex);
    blockIndex.setBlock_index(blockletIndex1);
    List<Integer> column_cardinalities = new ArrayList<>();
    column_cardinalities.add(new Integer("1"));
    final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
    new MockUp<CarbonIndexFileReader>() {

        boolean mockedHasNextStatus = true;

        @SuppressWarnings("unused")
        @Mock
        public boolean hasNext() throws IOException {
            boolean temp = mockedHasNextStatus;
            mockedHasNextStatus = false;
            return temp;
        }

        @SuppressWarnings("unused")
        @Mock
        public void openThriftReader(String filePath) throws IOException {
            thriftReader.open();
        }

        @SuppressWarnings("unused")
        @Mock
        public IndexHeader readIndexHeader() throws IOException {
            return new IndexHeader(1, columnSchemas, segmentInfo1);
        }

        @SuppressWarnings("unused")
        @Mock
        public BlockIndex readBlockIndexInfo() throws IOException {
            return blockIndex;
        }

        @SuppressWarnings("unused")
        @Mock
        public void closeThriftReader() {
            thriftReader.close();
        }
    };
    new MockUp<IndexHeader>() {

        @SuppressWarnings("unused")
        @Mock
        public List<ColumnSchema> getTable_columns() {
            return columnSchemas;
        }
    };
    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
    final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
    new MockUp<FileFactory>() {

        @SuppressWarnings("unused")
        @Mock
        public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
            return dataInputStream;
        }
    };
    String[] arr = { "a", "b", "c" };
    String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
    TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
    tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
    tableBlockInfoList.add(tableBlockInfo);
    String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
    List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
    byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
    byte[] res = "1".getBytes();
    for (int i = 0; i < exp.length; i++) {
        assertEquals(exp[i], res[i]);
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) Encoding(org.apache.carbondata.format.Encoding) DataInputStream(java.io.DataInputStream) BlockIndex(org.apache.carbondata.format.BlockIndex) ByteArrayInputStream(java.io.ByteArrayInputStream) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo) Test(org.junit.Test)

Aggregations

BlockIndex (org.apache.carbondata.format.BlockIndex)12 ArrayList (java.util.ArrayList)7 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)7 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)5 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)5 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)4 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)3 ByteBuffer (java.nio.ByteBuffer)2 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)2 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)2 BlockletIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex)2 BlockIndexInfo (org.apache.carbondata.core.metadata.index.BlockIndexInfo)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 CarbonIndexFileWriter (org.apache.carbondata.core.writer.CarbonIndexFileWriter)2 BlockletIndex (org.apache.carbondata.format.BlockletIndex)2 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)2 IndexHeader (org.apache.carbondata.format.IndexHeader)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1