Search in sources :

Example 1 with DataFileFooterConverter

use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.

the class BlockletDataMap method init.

@Override
public void init(DataMapModel dataMapModel) throws IOException, MemoryException {
    long startTime = System.currentTimeMillis();
    assert (dataMapModel instanceof BlockletDataMapModel);
    BlockletDataMapModel blockletDataMapInfo = (BlockletDataMapModel) dataMapModel;
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(blockletDataMapInfo.getFilePath(), blockletDataMapInfo.getFileData());
    Path path = new Path(blockletDataMapInfo.getFilePath());
    byte[] filePath = path.getParent().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    byte[] fileName = path.getName().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    byte[] segmentId = blockletDataMapInfo.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    DataMapRowImpl summaryRow = null;
    byte[] schemaBinary = null;
    // below 2 variables will be used for fetching the relative blocklet id. Relative blocklet ID
    // is id assigned to a blocklet within a part file
    String tempFilePath = null;
    int relativeBlockletId = 0;
    for (DataFileFooter fileFooter : indexInfo) {
        if (segmentProperties == null) {
            List<ColumnSchema> columnInTable = fileFooter.getColumnInTable();
            schemaBinary = convertSchemaToBinary(columnInTable);
            columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality();
            segmentProperties = new SegmentProperties(columnInTable, columnCardinality);
            createSchema(segmentProperties);
            createSummarySchema(segmentProperties, schemaBinary, filePath, fileName, segmentId);
        }
        TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
        BlockMetaInfo blockMetaInfo = blockletDataMapInfo.getBlockMetaInfoMap().get(blockInfo.getFilePath());
        // the file exists physically or not
        if (blockMetaInfo != null) {
            if (fileFooter.getBlockletList() == null) {
                // This is old store scenario, here blocklet information is not available in index file so
                // load only block info
                summaryRow = loadToUnsafeBlock(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo);
            } else {
                // blocklet ID will start from 0 again only when part file path is changed
                if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
                    tempFilePath = blockInfo.getFilePath();
                    relativeBlockletId = 0;
                }
                summaryRow = loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
                // this is done because relative blocklet id need to be incremented based on the
                // total number of blocklets
                relativeBlockletId += fileFooter.getBlockletList().size();
            }
        }
    }
    if (unsafeMemoryDMStore != null) {
        unsafeMemoryDMStore.finishWriting();
    }
    if (null != unsafeMemorySummaryDMStore) {
        addTaskSummaryRowToUnsafeMemoryStore(summaryRow, schemaBinary, filePath, fileName, segmentId);
        unsafeMemorySummaryDMStore.finishWriting();
    }
    LOGGER.info("Time taken to load blocklet datamap from file : " + dataMapModel.getFilePath() + "is " + (System.currentTimeMillis() - startTime));
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 2 with DataFileFooterConverter

use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.

the class SegmentFileStore method getSchemaFiles.

/**
 * Reads all index files and get the schema of each index file
 * @throws IOException
 */
public static Map<String, List<ColumnSchema>> getSchemaFiles(SegmentFile segmentFile, String tablePath) throws IOException {
    Map<String, List<ColumnSchema>> schemaMap = new HashMap<>();
    if (segmentFile == null) {
        return schemaMap;
    }
    SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
    indexFileStore.readAllIIndexOfSegment(segmentFile, tablePath, SegmentStatus.SUCCESS, true);
    Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
        List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
        if (indexInfo.size() > 0) {
            schemaMap.put(entry.getKey(), indexInfo.get(0).getColumnInTable());
        }
    }
    return schemaMap;
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) HashMap(java.util.HashMap) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with DataFileFooterConverter

use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.

the class SegmentFileStore method readIndexFiles.

/**
 * Reads all index files as per the status of the file. In case of @ignoreStatus is true it just
 * reads all index files
 * @param status
 * @param ignoreStatus
 * @throws IOException
 */
private void readIndexFiles(SegmentStatus status, boolean ignoreStatus) throws IOException {
    if (indexFilesMap != null) {
        return;
    }
    SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
    indexFilesMap = new HashMap<>();
    indexFileStore.readAllIIndexOfSegment(this.segmentFile, tablePath, status, ignoreStatus);
    Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
        List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
        List<String> blocks = new ArrayList<>();
        for (DataFileFooter footer : indexInfo) {
            blocks.add(footer.getBlockInfo().getTableBlockInfo().getFilePath());
        }
        indexFilesMap.put(entry.getKey(), blocks);
    }
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with DataFileFooterConverter

use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.

the class BlockletDataMapIndexStore method getBlockMetaInfoMap.

private Map<String, BlockMetaInfo> getBlockMetaInfoMap(TableBlockIndexUniqueIdentifier identifier, SegmentIndexFileStore indexFileStore, Set<String> filesRead) throws IOException {
    if (identifier.getMergeIndexFileName() != null) {
        CarbonFile indexMergeFile = FileFactory.getCarbonFile(identifier.getIndexFilePath() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getMergeIndexFileName());
        if (indexMergeFile.exists() && !filesRead.contains(indexMergeFile.getPath())) {
            indexFileStore.readAllIIndexOfSegment(new CarbonFile[] { indexMergeFile });
            filesRead.add(indexMergeFile.getPath());
        }
    }
    if (indexFileStore.getFileData(identifier.getIndexFileName()) == null) {
        indexFileStore.readAllIIndexOfSegment(new CarbonFile[] { FileFactory.getCarbonFile(identifier.getIndexFilePath() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getIndexFileName()) });
    }
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    Map<String, BlockMetaInfo> blockMetaInfoMap = new HashMap<>();
    List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(identifier.getIndexFilePath() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getIndexFileName(), indexFileStore.getFileData(identifier.getIndexFileName()));
    for (DataFileFooter footer : indexInfo) {
        String blockPath = footer.getBlockInfo().getTableBlockInfo().getFilePath();
        if (FileFactory.isFileExist(blockPath)) {
            blockMetaInfoMap.put(blockPath, createBlockMetaInfo(blockPath));
        } else {
            LOGGER.warn("Skipping invalid block " + footer.getBlockInfo().getBlockUniqueName() + " The block does not exist. The block might be got deleted due to clean up post" + " update/delete operation over table.");
        }
    }
    return blockMetaInfoMap;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) AbstractDFSCarbonFile(org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile) DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter)

Example 5 with DataFileFooterConverter

use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.

the class SegmentIndexFileStore method readIndexAndFillBlockletInfo.

/**
 * This method will read the index information from carbon index file
 *
 * @param indexFile
 * @return
 * @throws IOException
 */
private void readIndexAndFillBlockletInfo(CarbonFile indexFile) throws IOException {
    // flag to take decision whether carbondata file footer reading is required.
    // If the index file does not contain the file footer then carbondata file footer
    // read is required else not required
    boolean isCarbonDataFileFooterReadRequired = true;
    List<BlockletInfo> blockletInfoList = null;
    List<BlockIndex> blockIndexThrift = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    try {
        indexReader.openThriftReader(indexFile.getCanonicalPath());
        // get the index header
        org.apache.carbondata.format.IndexHeader indexHeader = indexReader.readIndexHeader();
        DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
        String filePath = indexFile.getCanonicalPath();
        String parentPath = filePath.substring(0, filePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
        while (indexReader.hasNext()) {
            BlockIndex blockIndex = indexReader.readBlockIndexInfo();
            if (blockIndex.isSetBlocklet_info()) {
                // this case will come in case segment index compaction property is set to false from the
                // application and alter table segment index compaction is run manually. In that case
                // blocklet info will be present in the index but read carbon data file footer property
                // will be true
                isCarbonDataFileFooterReadRequired = false;
                break;
            } else {
                TableBlockInfo blockInfo = fileFooterConverter.getTableBlockInfo(blockIndex, indexHeader, parentPath);
                blockletInfoList = getBlockletInfoFromIndexInfo(blockInfo);
            }
            // the same entry with different blocklet info need to be repeated
            for (int i = 0; i < blockletInfoList.size(); i++) {
                BlockIndex blockIndexReplica = blockIndex.deepCopy();
                BlockletInfo blockletInfo = blockletInfoList.get(i);
                blockIndexReplica.setBlock_index(CarbonMetadataUtil.getBlockletIndex(blockletInfo.getBlockletIndex()));
                blockIndexReplica.setBlocklet_info(CarbonMetadataUtil.getBlocletInfo3(blockletInfo));
                blockIndexThrift.add(blockIndexReplica);
            }
        }
        // read complete file at once
        if (!isCarbonDataFileFooterReadRequired) {
            readIndexFile(indexFile);
        } else {
            int totalSize = 0;
            List<byte[]> blockIndexByteArrayList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
            byte[] indexHeaderBytes = CarbonUtil.getByteArray(indexHeader);
            totalSize += indexHeaderBytes.length;
            blockIndexByteArrayList.add(indexHeaderBytes);
            for (BlockIndex blockIndex : blockIndexThrift) {
                byte[] indexInfoBytes = CarbonUtil.getByteArray(blockIndex);
                totalSize += indexInfoBytes.length;
                blockIndexByteArrayList.add(indexInfoBytes);
            }
            ByteBuffer byteBuffer = ByteBuffer.allocate(totalSize);
            for (byte[] blockIndexBytes : blockIndexByteArrayList) {
                byteBuffer.put(blockIndexBytes);
            }
            carbonIndexMap.put(indexFile.getName(), byteBuffer.array());
        }
    } finally {
        indexReader.closeThriftReader();
    }
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ArrayList(java.util.ArrayList) BlockIndex(org.apache.carbondata.format.BlockIndex) MergedBlockIndex(org.apache.carbondata.format.MergedBlockIndex) ByteBuffer(java.nio.ByteBuffer)

Aggregations

DataFileFooterConverter (org.apache.carbondata.core.util.DataFileFooterConverter)5 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Map (java.util.Map)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 SegmentIndexFileStore (org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)2 ByteBuffer (java.nio.ByteBuffer)1 List (java.util.List)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 AbstractDFSCarbonFile (org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile)1 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)1 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)1 DataMapRowImpl (org.apache.carbondata.core.indexstore.row.DataMapRowImpl)1 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)1 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)1 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)1 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)1 BlockIndex (org.apache.carbondata.format.BlockIndex)1