Search in sources :

Example 11 with CarbonIndexFileReader

use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.

the class StreamPruner method listAllStreamFiles.

// TODO optimize and move the code to StreamSegment , but it's in the streaming module.
private List<StreamFile> listAllStreamFiles(List<Segment> segments, boolean withMinMax) throws IOException {
    List<StreamFile> streamFileList = new ArrayList<>();
    for (Segment segment : segments) {
        String segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getAbsoluteTableIdentifier().getTablePath(), segment.getSegmentNo());
        String indexFile = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
        if (FileFactory.isFileExist(indexFile)) {
            CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
            indexReader.openThriftReader(indexFile);
            try {
                while (indexReader.hasNext()) {
                    BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                    String filePath = segmentDir + File.separator + blockIndex.getFile_name();
                    long length = blockIndex.getFile_size();
                    StreamFile streamFile = new StreamFile(segment.getSegmentNo(), filePath, length);
                    streamFileList.add(streamFile);
                    if (withMinMax) {
                        if (blockIndex.getBlock_index() != null && blockIndex.getBlock_index().getMin_max_index() != null) {
                            streamFile.setMinMaxIndex(CarbonMetadataUtil.convertExternalMinMaxIndex(blockIndex.getBlock_index().getMin_max_index()));
                        }
                    }
                }
            } finally {
                indexReader.closeThriftReader();
            }
        }
    }
    totalFileNums = streamFileList.size();
    return streamFileList;
}
Also used : CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) ArrayList(java.util.ArrayList) BlockIndex(org.apache.carbondata.format.BlockIndex) Segment(org.apache.carbondata.core.index.Segment)

Example 12 with CarbonIndexFileReader

use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.

the class AbstractDataFileFooterConverter method getIndexInfo.

/**
 * Below method will be used to get the index info from index file
 */
public List<DataFileFooter> getIndexInfo(String filePath, byte[] fileData, boolean isTransactionalTable) throws IOException {
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader(configuration);
    List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
    String formattedPath = filePath.replace("\\", "/");
    String parentPath = formattedPath.substring(0, formattedPath.lastIndexOf("/"));
    try {
        // open the reader
        if (fileData != null) {
            indexReader.openThriftReader(fileData);
        } else {
            indexReader.openThriftReader(filePath);
        }
        // get the index header
        org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
        List<ColumnSchema> columnSchemaList = convertColumnSchemaList(readIndexHeader.getTable_columns());
        if (!isTransactionalTable) {
            QueryUtil.updateColumnUniqueIdForNonTransactionTable(columnSchemaList);
        }
        BlockletIndex blockletIndex = null;
        DataFileFooter dataFileFooter = null;
        // read the block info from file
        while (indexReader.hasNext()) {
            BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
            blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
            dataFileFooter = new DataFileFooter();
            TableBlockInfo tableBlockInfo = getTableBlockInfo(readBlockIndexInfo, readIndexHeader, parentPath);
            dataFileFooter.setBlockletIndex(blockletIndex);
            dataFileFooter.setColumnInTable(columnSchemaList);
            dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
            dataFileFooter.setBlockInfo(tableBlockInfo);
            dataFileFooter.setVersionId(tableBlockInfo.getVersion());
            // In case of old schema time stamp will not be found in the index header
            if (readIndexHeader.isSetSchema_time_stamp()) {
                dataFileFooter.setSchemaUpdatedTimeStamp(readIndexHeader.getSchema_time_stamp());
            }
            if (readBlockIndexInfo.isSetBlocklet_info()) {
                List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
                BlockletInfo blockletInfo = new DataFileFooterConverterV3(configuration).getBlockletInfo(readBlockIndexInfo.getBlocklet_info(), CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
                blockletInfo.setBlockletIndex(blockletIndex);
                blockletInfoList.add(blockletInfo);
                dataFileFooter.setBlockletList(blockletInfoList);
            }
            dataFileFooters.add(dataFileFooter);
        }
    } finally {
        indexReader.closeThriftReader();
    }
    return dataFileFooters;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) ArrayList(java.util.ArrayList) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockIndex(org.apache.carbondata.format.BlockIndex) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter)

Example 13 with CarbonIndexFileReader

use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.

the class AbstractDataFileFooterConverter method getIndexInfo.

/**
 * Below method will be used to get the index info from index file
 *
 * @param filePath           file path of the index file
 * @param tableBlockInfoList table block index
 * @return list of index info
 * @throws IOException problem while reading the index file
 */
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList) throws IOException {
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
    try {
        // open the reader
        indexReader.openThriftReader(filePath);
        // get the index header
        org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
        List<ColumnSchema> columnSchemaList = convertColumnSchemaList(readIndexHeader.getTable_columns());
        // get the segment info
        BlockletIndex blockletIndex = null;
        int counter = 0;
        int index = 0;
        DataFileFooter dataFileFooter = null;
        // read the block info from file
        while (indexReader.hasNext()) {
            BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
            blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
            dataFileFooter = new DataFileFooter();
            TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
            if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(tableBlockInfo.getFilePath())) == counter++) {
                tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
                tableBlockInfo.setVersion(ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
                dataFileFooter.setBlockletIndex(blockletIndex);
                dataFileFooter.setColumnInTable(columnSchemaList);
                dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
                dataFileFooter.setBlockInfo(tableBlockInfo);
                if (readIndexHeader.isSetIs_sort()) {
                    dataFileFooter.setSorted(readIndexHeader.isIs_sort());
                } else {
                    if (tableBlockInfo.getVersion() == ColumnarFormatVersion.V3) {
                        dataFileFooter.setSorted(null);
                    }
                }
                dataFileFooters.add(dataFileFooter);
                if (++index == tableBlockInfoList.size()) {
                    break;
                }
            }
        }
    } finally {
        indexReader.closeThriftReader();
    }
    return dataFileFooters;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) BlockIndex(org.apache.carbondata.format.BlockIndex) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter)

Example 14 with CarbonIndexFileReader

use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.

the class SegmentIndexFileStore method readIndexAndFillBlockletInfo.

/**
 * This method will read the index information from carbon index file
 *
 * @param indexFile
 * @return
 * @throws IOException
 */
private void readIndexAndFillBlockletInfo(CarbonFile indexFile) throws IOException {
    // flag to take decision whether carbondata file footer reading is required.
    // If the index file does not contain the file footer then carbondata file footer
    // read is required else not required
    boolean isCarbonDataFileFooterReadRequired = true;
    List<BlockletInfo> blockletInfoList = null;
    List<BlockIndex> blockIndexThrift = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    try {
        indexReader.openThriftReader(indexFile.getCanonicalPath());
        // get the index header
        org.apache.carbondata.format.IndexHeader indexHeader = indexReader.readIndexHeader();
        DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(FileFactory.getConfiguration());
        String filePath = FileFactory.getUpdatedFilePath(indexFile.getCanonicalPath());
        String parentPath = filePath.substring(0, filePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
        while (indexReader.hasNext()) {
            BlockIndex blockIndex = indexReader.readBlockIndexInfo();
            if (blockIndex.isSetBlocklet_info()) {
                // this case will come in case segment index compaction property is set to false from the
                // application and alter table segment index compaction is run manually. In that case
                // blocklet info will be present in the index but read carbon data file footer property
                // will be true
                isCarbonDataFileFooterReadRequired = false;
                break;
            } else {
                TableBlockInfo blockInfo = fileFooterConverter.getTableBlockInfo(blockIndex, indexHeader, parentPath);
                blockletInfoList = getBlockletInfoFromIndexInfo(blockInfo);
            }
            // the same entry with different blocklet info need to be repeated
            for (BlockletInfo info : blockletInfoList) {
                BlockIndex blockIndexReplica = blockIndex.deepCopy();
                BlockletInfo blockletInfo = info;
                blockIndexReplica.setBlock_index(CarbonMetadataUtil.getBlockletIndex(blockletInfo.getBlockletIndex()));
                blockIndexReplica.setBlocklet_info(CarbonMetadataUtil.getBlockletInfo3(blockletInfo));
                blockIndexThrift.add(blockIndexReplica);
            }
        }
        // read complete file at once
        if (!isCarbonDataFileFooterReadRequired) {
            readIndexFile(indexFile);
        } else {
            int totalSize = 0;
            List<byte[]> blockIndexByteArrayList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
            byte[] indexHeaderBytes = CarbonUtil.getByteArray(indexHeader);
            totalSize += indexHeaderBytes.length;
            blockIndexByteArrayList.add(indexHeaderBytes);
            for (BlockIndex blockIndex : blockIndexThrift) {
                byte[] indexInfoBytes = CarbonUtil.getByteArray(blockIndex);
                totalSize += indexInfoBytes.length;
                blockIndexByteArrayList.add(indexInfoBytes);
            }
            ByteBuffer byteBuffer = ByteBuffer.allocate(totalSize);
            for (byte[] blockIndexBytes : blockIndexByteArrayList) {
                byteBuffer.put(blockIndexBytes);
            }
            carbonIndexMap.put(indexFile.getName(), byteBuffer.array());
        }
    } finally {
        indexReader.closeThriftReader();
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ArrayList(java.util.ArrayList) BlockIndex(org.apache.carbondata.format.BlockIndex) MergedBlockIndex(org.apache.carbondata.format.MergedBlockIndex) ByteBuffer(java.nio.ByteBuffer)

Example 15 with CarbonIndexFileReader

use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.

the class CarbonUtil method inferSchemaFromIndexFile.

/**
 * This method will infer the schema file from a given index file path
 * @param indexFilePath
 * @param tableName
 * @return
 * @throws IOException
 */
public static org.apache.carbondata.format.TableInfo inferSchemaFromIndexFile(String indexFilePath, String tableName) throws IOException {
    CarbonIndexFileReader indexFileReader = new CarbonIndexFileReader();
    try {
        indexFileReader.openThriftReader(indexFilePath);
        org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader();
        List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
        List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
        for (int i = 0; i < table_columns.size(); i++) {
            columnSchemaList.add(thriftColumnSchemaToWrapperColumnSchema(table_columns.get(i)));
        }
        // only columnSchema is the valid entry, reset all dummy entries.
        TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
        ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = new ThriftWrapperSchemaConverterImpl();
        org.apache.carbondata.format.TableSchema thriftFactTable = thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
        return new org.apache.carbondata.format.TableInfo(thriftFactTable, new ArrayList<org.apache.carbondata.format.TableSchema>());
    } finally {
        indexFileReader.closeThriftReader();
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo)

Aggregations

CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)15 BlockIndex (org.apache.carbondata.format.BlockIndex)11 ArrayList (java.util.ArrayList)8 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)5 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)4 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)4 IndexHeader (org.apache.carbondata.format.IndexHeader)4 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)3 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 BlockletIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex)3 IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 Segment (org.apache.carbondata.core.index.Segment)2 File (java.io.File)1 FileFilter (java.io.FileFilter)1 HashMap (java.util.HashMap)1 MockUp (mockit.MockUp)1 Segment (org.apache.carbondata.core.datamap.Segment)1 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)1 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)1