Search in sources :

Example 6 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class CarbonMetadataUtilTest method testGetBlockIndexInfo.

@Test
public void testGetBlockIndexInfo() throws Exception {
    byte[] startKey = { 1, 2, 3, 4, 5 };
    byte[] endKey = { 9, 3, 5, 5, 5 };
    byte[] byteArr = { 1, 2, 3, 4, 5 };
    List<ByteBuffer> minList = new ArrayList<>();
    minList.add(ByteBuffer.wrap(byteArr));
    byte[] byteArr1 = { 9, 9, 8, 6, 7 };
    List<ByteBuffer> maxList = new ArrayList<>();
    maxList.add(ByteBuffer.wrap(byteArr1));
    org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex blockletMinMaxIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex(minList, maxList);
    org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex blockletBTreeIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex(startKey, endKey);
    org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex blockletIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex(blockletBTreeIndex, blockletMinMaxIndex);
    BlockIndexInfo blockIndexInfo = new BlockIndexInfo(1, "file", 1, blockletIndex);
    List<BlockIndexInfo> blockIndexInfoList = new ArrayList<>();
    blockIndexInfoList.add(blockIndexInfo);
    List<BlockIndex> result = getBlockIndexInfo(blockIndexInfoList);
    String expected = "file";
    assertEquals(result.get(0).file_name, expected);
}
Also used : BlockletIndex(org.apache.carbondata.format.BlockletIndex) CarbonMetadataUtil.getBlockletIndex(org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockletIndex) ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) BlockIndex(org.apache.carbondata.format.BlockIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) CarbonMetadataUtil.getBlockIndexInfo(org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockIndexInfo) BlockIndexInfo(org.apache.carbondata.core.metadata.index.BlockIndexInfo) Test(org.junit.Test)

Example 7 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class AbstractDataFileFooterConverter method getIndexInfo.

/**
 * Below method will be used to get the index info from index file
 *
 * @param filePath           file path of the index file
 * @return list of index info
 * @throws IOException problem while reading the index file
 */
public List<DataFileFooter> getIndexInfo(String filePath, byte[] fileData) throws IOException {
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
    String parentPath = filePath.substring(0, filePath.lastIndexOf("/"));
    try {
        // open the reader
        if (fileData != null) {
            indexReader.openThriftReader(fileData);
        } else {
            indexReader.openThriftReader(filePath);
        }
        // get the index header
        org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
        List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
        List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
        for (int i = 0; i < table_columns.size(); i++) {
            columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
        }
        // get the segment info
        SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
        BlockletIndex blockletIndex = null;
        DataFileFooter dataFileFooter = null;
        // read the block info from file
        while (indexReader.hasNext()) {
            BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
            blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
            dataFileFooter = new DataFileFooter();
            TableBlockInfo tableBlockInfo = getTableBlockInfo(readBlockIndexInfo, readIndexHeader, parentPath);
            dataFileFooter.setBlockletIndex(blockletIndex);
            dataFileFooter.setColumnInTable(columnSchemaList);
            dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
            dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
            dataFileFooter.setSegmentInfo(segmentInfo);
            dataFileFooter.setVersionId(tableBlockInfo.getVersion());
            // In case of old schema time stamp will not be found in the index header
            if (readIndexHeader.isSetSchema_time_stamp()) {
                dataFileFooter.setSchemaUpdatedTimeStamp(readIndexHeader.getSchema_time_stamp());
            }
            if (readBlockIndexInfo.isSetBlocklet_info()) {
                List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
                BlockletInfo blockletInfo = new DataFileFooterConverterV3().getBlockletInfo(readBlockIndexInfo.getBlocklet_info(), CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
                blockletInfo.setBlockletIndex(blockletIndex);
                blockletInfoList.add(blockletInfo);
                dataFileFooter.setBlockletList(blockletInfoList);
            }
            dataFileFooters.add(dataFileFooter);
        }
    } finally {
        indexReader.closeThriftReader();
    }
    return dataFileFooters;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) ArrayList(java.util.ArrayList) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockIndex(org.apache.carbondata.format.BlockIndex) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockInfo(org.apache.carbondata.core.datastore.block.BlockInfo) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo)

Example 8 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockIndexInfo.

/**
 * Below method will be used to get the block index info thrift object for
 * each block present in the segment
 *
 * @param blockIndexInfoList block index info list
 * @return list of block index
 */
public static List<BlockIndex> getBlockIndexInfo(List<BlockIndexInfo> blockIndexInfoList) {
    List<BlockIndex> thriftBlockIndexList = new ArrayList<BlockIndex>();
    BlockIndex blockIndex = null;
    // below code to create block index info object for each block
    for (BlockIndexInfo blockIndexInfo : blockIndexInfoList) {
        blockIndex = new BlockIndex();
        blockIndex.setNum_rows(blockIndexInfo.getNumberOfRows());
        blockIndex.setOffset(blockIndexInfo.getOffset());
        blockIndex.setFile_name(blockIndexInfo.getFileName());
        blockIndex.setBlock_index(getBlockletIndex(blockIndexInfo.getBlockletIndex()));
        if (blockIndexInfo.getBlockletInfo() != null) {
            blockIndex.setBlocklet_info(getBlocletInfo3(blockIndexInfo.getBlockletInfo()));
        }
        thriftBlockIndexList.add(blockIndex);
    }
    return thriftBlockIndexList;
}
Also used : ArrayList(java.util.ArrayList) BlockIndexInfo(org.apache.carbondata.core.metadata.index.BlockIndexInfo) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 9 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class AbstractFactDataWriter method writeIndexFile.

/**
 * Below method will be used to write the idex file
 *
 * @throws IOException               throws io exception if any problem while writing
 * @throws CarbonDataWriterException data writing
 */
protected void writeIndexFile() throws IOException, CarbonDataWriterException {
    // get the header
    IndexHeader indexHeader = CarbonMetadataUtil.getIndexHeader(localCardinality, thriftColumnSchemaList, model.getBucketId(), model.getSchemaUpdatedTimeStamp());
    // get the block index info thrift
    List<BlockIndex> blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList);
    String indexFileName;
    if (enableDirectlyWriteData2Hdfs) {
        String rawFileName = model.getCarbonDataDirectoryPath() + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
        indexFileName = FileFactory.getUpdatedFilePath(rawFileName, FileFactory.FileType.HDFS);
    } else {
        // randomly choose a temp location for index file
        String[] tempLocations = model.getStoreLocation();
        String chosenTempLocation = tempLocations[new Random().nextInt(tempLocations.length)];
        LOGGER.info("Randomly choose index file location: " + chosenTempLocation);
        indexFileName = chosenTempLocation + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
    }
    CarbonIndexFileWriter writer = new CarbonIndexFileWriter();
    // open file
    writer.openThriftWriter(indexFileName);
    // write the header first
    writer.writeThrift(indexHeader);
    // write the indexes
    for (BlockIndex blockIndex : blockIndexThrift) {
        writer.writeThrift(blockIndex);
    }
    writer.close();
    if (enableDirectlyWriteData2Hdfs) {
        executorServiceSubmitList.add(executorService.submit(new CompleteHdfsBackendThread(indexFileName, FileFactory.FileType.HDFS)));
    } else {
        CarbonUtil.copyCarbonDataFileToCarbonStorePath(indexFileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes);
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) CarbonIndexFileWriter(org.apache.carbondata.core.writer.CarbonIndexFileWriter) Random(java.util.Random) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 10 with BlockIndex

use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.

the class AbstractDataFileFooterConverter method getIndexInfo.

/**
 * Below method will be used to get the index info from index file
 *
 * @param filePath           file path of the index file
 * @param tableBlockInfoList table block index
 * @return list of index info
 * @throws IOException problem while reading the index file
 */
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList) throws IOException {
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
    try {
        // open the reader
        indexReader.openThriftReader(filePath);
        // get the index header
        org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
        List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
        List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
        for (int i = 0; i < table_columns.size(); i++) {
            columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
        }
        // get the segment info
        SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
        BlockletIndex blockletIndex = null;
        int counter = 0;
        int index = 0;
        DataFileFooter dataFileFooter = null;
        // read the block info from file
        while (indexReader.hasNext()) {
            BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
            blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
            dataFileFooter = new DataFileFooter();
            TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
            if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(tableBlockInfo.getFilePath())) == counter++) {
                tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
                tableBlockInfo.setVersion(ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
                int blockletSize = getBlockletSize(readBlockIndexInfo);
                tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
                dataFileFooter.setBlockletIndex(blockletIndex);
                dataFileFooter.setColumnInTable(columnSchemaList);
                dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
                dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
                dataFileFooter.setSegmentInfo(segmentInfo);
                dataFileFooters.add(dataFileFooter);
                if (++index == tableBlockInfoList.size()) {
                    break;
                }
            }
        }
    } finally {
        indexReader.closeThriftReader();
    }
    return dataFileFooters;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockIndex(org.apache.carbondata.format.BlockIndex) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockInfo(org.apache.carbondata.core.datastore.block.BlockInfo) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo)

Aggregations

BlockIndex (org.apache.carbondata.format.BlockIndex)12 ArrayList (java.util.ArrayList)7 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)7 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)5 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)5 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)4 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)3 ByteBuffer (java.nio.ByteBuffer)2 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)2 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)2 BlockletIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex)2 BlockIndexInfo (org.apache.carbondata.core.metadata.index.BlockIndexInfo)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 CarbonIndexFileWriter (org.apache.carbondata.core.writer.CarbonIndexFileWriter)2 BlockletIndex (org.apache.carbondata.format.BlockletIndex)2 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)2 IndexHeader (org.apache.carbondata.format.IndexHeader)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1