use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.
the class BlockletDataMap method init.
@Override
public void init(DataMapModel dataMapModel) throws IOException, MemoryException {
long startTime = System.currentTimeMillis();
assert (dataMapModel instanceof BlockletDataMapModel);
BlockletDataMapModel blockletDataMapInfo = (BlockletDataMapModel) dataMapModel;
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(blockletDataMapInfo.getFilePath(), blockletDataMapInfo.getFileData());
Path path = new Path(blockletDataMapInfo.getFilePath());
byte[] filePath = path.getParent().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
byte[] fileName = path.getName().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
byte[] segmentId = blockletDataMapInfo.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
DataMapRowImpl summaryRow = null;
byte[] schemaBinary = null;
// below 2 variables will be used for fetching the relative blocklet id. Relative blocklet ID
// is id assigned to a blocklet within a part file
String tempFilePath = null;
int relativeBlockletId = 0;
for (DataFileFooter fileFooter : indexInfo) {
if (segmentProperties == null) {
List<ColumnSchema> columnInTable = fileFooter.getColumnInTable();
schemaBinary = convertSchemaToBinary(columnInTable);
columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality();
segmentProperties = new SegmentProperties(columnInTable, columnCardinality);
createSchema(segmentProperties);
createSummarySchema(segmentProperties, schemaBinary, filePath, fileName, segmentId);
}
TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
BlockMetaInfo blockMetaInfo = blockletDataMapInfo.getBlockMetaInfoMap().get(blockInfo.getFilePath());
// the file exists physically or not
if (blockMetaInfo != null) {
if (fileFooter.getBlockletList() == null) {
// This is old store scenario, here blocklet information is not available in index file so
// load only block info
summaryRow = loadToUnsafeBlock(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo);
} else {
// blocklet ID will start from 0 again only when part file path is changed
if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
tempFilePath = blockInfo.getFilePath();
relativeBlockletId = 0;
}
summaryRow = loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
// this is done because relative blocklet id need to be incremented based on the
// total number of blocklets
relativeBlockletId += fileFooter.getBlockletList().size();
}
}
}
if (unsafeMemoryDMStore != null) {
unsafeMemoryDMStore.finishWriting();
}
if (null != unsafeMemorySummaryDMStore) {
addTaskSummaryRowToUnsafeMemoryStore(summaryRow, schemaBinary, filePath, fileName, segmentId);
unsafeMemorySummaryDMStore.finishWriting();
}
LOGGER.info("Time taken to load blocklet datamap from file : " + dataMapModel.getFilePath() + "is " + (System.currentTimeMillis() - startTime));
}
use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.
the class SegmentFileStore method getSchemaFiles.
/**
* Reads all index files and get the schema of each index file
* @throws IOException
*/
public static Map<String, List<ColumnSchema>> getSchemaFiles(SegmentFile segmentFile, String tablePath) throws IOException {
Map<String, List<ColumnSchema>> schemaMap = new HashMap<>();
if (segmentFile == null) {
return schemaMap;
}
SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
indexFileStore.readAllIIndexOfSegment(segmentFile, tablePath, SegmentStatus.SUCCESS, true);
Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
if (indexInfo.size() > 0) {
schemaMap.put(entry.getKey(), indexInfo.get(0).getColumnInTable());
}
}
return schemaMap;
}
use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.
the class SegmentFileStore method readIndexFiles.
/**
* Reads all index files as per the status of the file. In case of @ignoreStatus is true it just
* reads all index files
* @param status
* @param ignoreStatus
* @throws IOException
*/
private void readIndexFiles(SegmentStatus status, boolean ignoreStatus) throws IOException {
if (indexFilesMap != null) {
return;
}
SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
indexFilesMap = new HashMap<>();
indexFileStore.readAllIIndexOfSegment(this.segmentFile, tablePath, status, ignoreStatus);
Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
List<String> blocks = new ArrayList<>();
for (DataFileFooter footer : indexInfo) {
blocks.add(footer.getBlockInfo().getTableBlockInfo().getFilePath());
}
indexFilesMap.put(entry.getKey(), blocks);
}
}
use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.
the class BlockletDataMapIndexStore method getBlockMetaInfoMap.
private Map<String, BlockMetaInfo> getBlockMetaInfoMap(TableBlockIndexUniqueIdentifier identifier, SegmentIndexFileStore indexFileStore, Set<String> filesRead) throws IOException {
if (identifier.getMergeIndexFileName() != null) {
CarbonFile indexMergeFile = FileFactory.getCarbonFile(identifier.getIndexFilePath() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getMergeIndexFileName());
if (indexMergeFile.exists() && !filesRead.contains(indexMergeFile.getPath())) {
indexFileStore.readAllIIndexOfSegment(new CarbonFile[] { indexMergeFile });
filesRead.add(indexMergeFile.getPath());
}
}
if (indexFileStore.getFileData(identifier.getIndexFileName()) == null) {
indexFileStore.readAllIIndexOfSegment(new CarbonFile[] { FileFactory.getCarbonFile(identifier.getIndexFilePath() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getIndexFileName()) });
}
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
Map<String, BlockMetaInfo> blockMetaInfoMap = new HashMap<>();
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(identifier.getIndexFilePath() + CarbonCommonConstants.FILE_SEPARATOR + identifier.getIndexFileName(), indexFileStore.getFileData(identifier.getIndexFileName()));
for (DataFileFooter footer : indexInfo) {
String blockPath = footer.getBlockInfo().getTableBlockInfo().getFilePath();
if (FileFactory.isFileExist(blockPath)) {
blockMetaInfoMap.put(blockPath, createBlockMetaInfo(blockPath));
} else {
LOGGER.warn("Skipping invalid block " + footer.getBlockInfo().getBlockUniqueName() + " The block does not exist. The block might be got deleted due to clean up post" + " update/delete operation over table.");
}
}
return blockMetaInfoMap;
}
use of org.apache.carbondata.core.util.DataFileFooterConverter in project carbondata by apache.
the class SegmentIndexFileStore method readIndexAndFillBlockletInfo.
/**
* This method will read the index information from carbon index file
*
* @param indexFile
* @return
* @throws IOException
*/
private void readIndexAndFillBlockletInfo(CarbonFile indexFile) throws IOException {
// flag to take decision whether carbondata file footer reading is required.
// If the index file does not contain the file footer then carbondata file footer
// read is required else not required
boolean isCarbonDataFileFooterReadRequired = true;
List<BlockletInfo> blockletInfoList = null;
List<BlockIndex> blockIndexThrift = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
indexReader.openThriftReader(indexFile.getCanonicalPath());
// get the index header
org.apache.carbondata.format.IndexHeader indexHeader = indexReader.readIndexHeader();
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
String filePath = indexFile.getCanonicalPath();
String parentPath = filePath.substring(0, filePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
if (blockIndex.isSetBlocklet_info()) {
// this case will come in case segment index compaction property is set to false from the
// application and alter table segment index compaction is run manually. In that case
// blocklet info will be present in the index but read carbon data file footer property
// will be true
isCarbonDataFileFooterReadRequired = false;
break;
} else {
TableBlockInfo blockInfo = fileFooterConverter.getTableBlockInfo(blockIndex, indexHeader, parentPath);
blockletInfoList = getBlockletInfoFromIndexInfo(blockInfo);
}
// the same entry with different blocklet info need to be repeated
for (int i = 0; i < blockletInfoList.size(); i++) {
BlockIndex blockIndexReplica = blockIndex.deepCopy();
BlockletInfo blockletInfo = blockletInfoList.get(i);
blockIndexReplica.setBlock_index(CarbonMetadataUtil.getBlockletIndex(blockletInfo.getBlockletIndex()));
blockIndexReplica.setBlocklet_info(CarbonMetadataUtil.getBlocletInfo3(blockletInfo));
blockIndexThrift.add(blockIndexReplica);
}
}
// read complete file at once
if (!isCarbonDataFileFooterReadRequired) {
readIndexFile(indexFile);
} else {
int totalSize = 0;
List<byte[]> blockIndexByteArrayList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
byte[] indexHeaderBytes = CarbonUtil.getByteArray(indexHeader);
totalSize += indexHeaderBytes.length;
blockIndexByteArrayList.add(indexHeaderBytes);
for (BlockIndex blockIndex : blockIndexThrift) {
byte[] indexInfoBytes = CarbonUtil.getByteArray(blockIndex);
totalSize += indexInfoBytes.length;
blockIndexByteArrayList.add(indexInfoBytes);
}
ByteBuffer byteBuffer = ByteBuffer.allocate(totalSize);
for (byte[] blockIndexBytes : blockIndexByteArrayList) {
byteBuffer.put(blockIndexBytes);
}
carbonIndexMap.put(indexFile.getName(), byteBuffer.array());
}
} finally {
indexReader.closeThriftReader();
}
}
Aggregations