use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.
the class CarbonUpdateUtil method cleanUpDeltaFiles.
/**
* Handling of the clean up of old carbondata files, index files , delte delta,
* update status files.
* @param table clean up will be handled on this table.
* @param forceDelete if true then max query execution timeout will not be considered.
*/
public static void cleanUpDeltaFiles(CarbonTable table, boolean forceDelete) {
SegmentStatusManager ssm = new SegmentStatusManager(table.getAbsoluteTableIdentifier());
CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(table.getAbsoluteTableIdentifier().getStorePath(), table.getAbsoluteTableIdentifier().getCarbonTableIdentifier());
LoadMetadataDetails[] details = ssm.readLoadMetadata(table.getMetaDataFilepath());
String validUpdateStatusFile = "";
for (LoadMetadataDetails segment : details) {
// take the update status file name from 0th segment.
validUpdateStatusFile = ssm.getUpdateStatusFileName(details);
if (segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) || segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS)) {
// take the list of files from this segment.
String segmentPath = carbonTablePath.getCarbonDataDirectoryPath("0", segment.getLoadName());
CarbonFile segDir = FileFactory.getCarbonFile(segmentPath, FileFactory.getFileType(segmentPath));
CarbonFile[] allSegmentFiles = segDir.listFiles();
// scan through the segment and find the carbondatafiles and index files.
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table.getAbsoluteTableIdentifier());
// get Invalid update delta files.
CarbonFile[] invalidUpdateDeltaFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_DELTA_FILE_EXT, true, allSegmentFiles);
for (CarbonFile invalidFile : invalidUpdateDeltaFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
// do the same for the index files.
CarbonFile[] invalidIndexFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT, true, allSegmentFiles);
for (CarbonFile invalidFile : invalidIndexFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
// now handle all the delete delta files which needs to be deleted.
// there are 2 cases here .
// 1. if the block is marked as compacted then the corresponding delta files
// can be deleted if query exec timeout is done.
// 2. if the block is in success state then also there can be delete
// delta compaction happened and old files can be deleted.
SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
for (SegmentUpdateDetails block : updateDetails) {
CarbonFile[] completeListOfDeleteDeltaFiles;
CarbonFile[] invalidDeleteDeltaFiles;
if (!block.getSegmentName().equalsIgnoreCase(segment.getLoadName())) {
continue;
}
// case 1
if (CarbonUpdateUtil.isBlockInvalid(block.getStatus())) {
completeListOfDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(segment.getLoadName(), block, true, allSegmentFiles);
for (CarbonFile invalidFile : completeListOfDeleteDeltaFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
CarbonFile[] blockRelatedFiles = updateStatusManager.getAllBlockRelatedFiles(block.getBlockName(), allSegmentFiles, block.getActualBlockName());
for (CarbonFile invalidFile : blockRelatedFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
} else {
invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(segment.getLoadName(), block, false, allSegmentFiles);
for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, false);
}
}
}
}
}
// delete the update table status files which are old.
if (null != validUpdateStatusFile && !validUpdateStatusFile.isEmpty()) {
final String updateStatusTimestamp = validUpdateStatusFile.substring(validUpdateStatusFile.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1);
CarbonFile metaFolder = FileFactory.getCarbonFile(carbonTablePath.getMetadataDirectoryPath(), FileFactory.getFileType(carbonTablePath.getMetadataDirectoryPath()));
CarbonFile[] invalidUpdateStatusFiles = metaFolder.listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile file) {
if (file.getName().startsWith(CarbonCommonConstants.TABLEUPDATESTATUS_FILENAME)) {
// we only send invalid ones to delete.
if (!file.getName().endsWith(updateStatusTimestamp)) {
return true;
}
}
return false;
}
});
for (CarbonFile invalidFile : invalidUpdateStatusFiles) {
compareTimestampsAndDelete(invalidFile, forceDelete, true);
}
}
}
use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.
the class BlockletDeleteDeltaCacheLoader method loadDeleteDeltaFileDataToCache.
/**
* This method will load the delete delta cache based on blocklet id of particular block with
* the help of SegmentUpdateStatusManager.
*/
public void loadDeleteDeltaFileDataToCache() {
SegmentUpdateStatusManager segmentUpdateStatusManager = new SegmentUpdateStatusManager(absoluteIdentifier);
int[] deleteDeltaFileData = null;
BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache = null;
if (null == blockletNode.getDeleteDeltaDataCache()) {
try {
deleteDeltaFileData = segmentUpdateStatusManager.getDeleteDeltaDataFromAllFiles(blockletID);
deleteDeltaDataCache = new BlockletLevelDeleteDeltaDataCache(deleteDeltaFileData, segmentUpdateStatusManager.getTimestampForRefreshCache(blockletID, null));
} catch (Exception e) {
LOGGER.debug("Unable to retrieve delete delta files");
}
} else {
deleteDeltaDataCache = blockletNode.getDeleteDeltaDataCache();
// if already cache is present then validate the cache using timestamp
String cacheTimeStamp = segmentUpdateStatusManager.getTimestampForRefreshCache(blockletID, deleteDeltaDataCache.getCacheTimeStamp());
if (null != cacheTimeStamp) {
try {
deleteDeltaFileData = segmentUpdateStatusManager.getDeleteDeltaDataFromAllFiles(blockletID);
deleteDeltaDataCache = new BlockletLevelDeleteDeltaDataCache(deleteDeltaFileData, segmentUpdateStatusManager.getTimestampForRefreshCache(blockletID, cacheTimeStamp));
} catch (Exception e) {
LOGGER.debug("Unable to retrieve delete delta files");
}
}
}
blockletNode.setDeleteDeltaDataCache(deleteDeltaDataCache);
}
use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.
the class SegmentTaskIndexStore method loadAndGetTaskIdToSegmentsMap.
/**
* Below method will be used to load the segment of segments
* One segment may have multiple task , so table segment will be loaded
* based on task id and will return the map of taksId to table segment
* map
*
* @param segmentToTableBlocksInfos segment id to block info
* @param absoluteTableIdentifier absolute table identifier
* @return map of taks id to segment mapping
* @throws IOException
*/
private SegmentTaskIndexWrapper loadAndGetTaskIdToSegmentsMap(Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos, AbsoluteTableIdentifier absoluteTableIdentifier, TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier) throws IOException {
// task id to segment map
Iterator<Map.Entry<String, List<TableBlockInfo>>> iteratorOverSegmentBlocksInfos = segmentToTableBlocksInfos.entrySet().iterator();
Map<TaskBucketHolder, AbstractIndex> taskIdToSegmentIndexMap = null;
SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
String segmentId = null;
TaskBucketHolder taskBucketHolder = null;
try {
while (iteratorOverSegmentBlocksInfos.hasNext()) {
// segment id to table block mapping
Map.Entry<String, List<TableBlockInfo>> next = iteratorOverSegmentBlocksInfos.next();
// group task id to table block info mapping for the segment
Map<TaskBucketHolder, List<TableBlockInfo>> taskIdToTableBlockInfoMap = mappedAndGetTaskIdToTableBlockInfo(segmentToTableBlocksInfos);
segmentId = next.getKey();
// get the existing map of task id to table segment map
UpdateVO updateVO = updateStatusManager.getInvalidTimestampRange(segmentId);
// check if segment is already loaded, if segment is already loaded
//no need to load the segment block
String lruCacheKey = tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier();
segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
if (segmentTaskIndexWrapper == null || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
// get the segment loader lock object this is to avoid
// same segment is getting loaded multiple times
// in case of concurrent query
Object segmentLoderLockObject = segmentLockMap.get(lruCacheKey);
if (null == segmentLoderLockObject) {
segmentLoderLockObject = addAndGetSegmentLock(lruCacheKey);
}
// acquire lock to lod the segment
synchronized (segmentLoderLockObject) {
segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
if (null == segmentTaskIndexWrapper || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
// so that the same can be updated after loading the btree.
if (tableSegmentUniqueIdentifier.isSegmentUpdated() && null != segmentTaskIndexWrapper) {
taskIdToSegmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
} else {
// creating a map of take if to table segment
taskIdToSegmentIndexMap = new HashMap<TaskBucketHolder, AbstractIndex>();
segmentTaskIndexWrapper = new SegmentTaskIndexWrapper(taskIdToSegmentIndexMap);
segmentTaskIndexWrapper.incrementAccessCount();
}
Iterator<Map.Entry<TaskBucketHolder, List<TableBlockInfo>>> iterator = taskIdToTableBlockInfoMap.entrySet().iterator();
long requiredSize = calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier);
segmentTaskIndexWrapper.setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize());
boolean isAddedToLruCache = lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize);
if (isAddedToLruCache) {
while (iterator.hasNext()) {
Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList = iterator.next();
taskBucketHolder = taskToBlockInfoList.getKey();
taskIdToSegmentIndexMap.put(taskBucketHolder, loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(), absoluteTableIdentifier));
}
} else {
throw new IndexBuilderException("Can not load the segment. No Enough space available.");
}
// set the latest timestamp.
segmentTaskIndexWrapper.setRefreshedTimeStamp(updateVO.getCreatedOrUpdatedTimeStamp());
// tableSegmentMapTemp.put(next.getKey(), taskIdToSegmentIndexMap);
// removing from segment lock map as once segment is loaded
// if concurrent query is coming for same segment
// it will wait on the lock so after this segment will be already
// loaded so lock is not required, that is why removing the
// the lock object as it wont be useful
segmentLockMap.remove(lruCacheKey);
} else {
segmentTaskIndexWrapper.incrementAccessCount();
}
}
} else {
segmentTaskIndexWrapper.incrementAccessCount();
}
}
} catch (IndexBuilderException e) {
LOGGER.error("Problem while loading the segment");
throw e;
}
return segmentTaskIndexWrapper;
}
use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.
the class CarbonInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CacheClient cacheClient = new CacheClient(identifier.getStorePath());
try {
List<String> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
// get all valid segments and set them into the configuration
if (getSegmentsToAccess(job).length == 0) {
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
if (segments.getValidSegments().size() == 0) {
return new ArrayList<>(0);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (String invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
}
if (invalidSegments.size() > 0) {
List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
for (String segId : invalidSegments) {
invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
}
cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
}
}
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
// prune partitions for filter query on partition table
BitSet matchedPartitions = null;
if (null != filter) {
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
if (null != partitionInfo) {
Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
if (matchedPartitions.cardinality() == 0) {
// no partition is required
return new ArrayList<InputSplit>();
}
if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
// all partitions are required, no need to prune partitions
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} finally {
// close the cache cache client to clear LRU cache memory
cacheClient.close();
}
}
use of org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager in project carbondata by apache.
the class CarbonDataMergerUtil method updateLoadMetadataIUDUpdateDeltaMergeStatus.
/**
* Update Both Segment Update Status and Table Status for the case of IUD Delete
* delta compaction.
*
* @param loadsToMerge
* @param metaDataFilepath
* @param carbonLoadModel
* @return
*/
public static boolean updateLoadMetadataIUDUpdateDeltaMergeStatus(List<LoadMetadataDetails> loadsToMerge, String metaDataFilepath, CarbonLoadModel carbonLoadModel) {
boolean status = false;
boolean updateLockStatus = false;
boolean tableLockStatus = false;
String timestamp = "" + carbonLoadModel.getFactTimeStamp();
List<String> updatedDeltaFilesList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
// This routine updateLoadMetadataIUDCompactionMergeStatus is suppose to update
// two files as it is only called during IUD_UPDDEL_DELTA_COMPACTION. Along with
// Table Status Metadata file (For Update Block Compaction) it has to update the
// Table Update Status Metadata File (For corresponding Delete Delta File).
// As the IUD_UPDDEL_DELTA_COMPACTION going to write in the same segment therefore in
// A) Table Update Status Metadata File (Block Level)
// * For each blocks which is being compacted Mark 'Compacted' as the Status.
// B) Table Status Metadata file (Segment Level)
// * loadStatus won't be changed to "compacted'
// * UpdateDeltaStartTime and UpdateDeltaEndTime will be both set to current
// timestamp (which is being passed from driver)
// First the Table Update Status Metadata File should be updated as we need to get
// the updated blocks for the segment from Table Status Metadata Update Delta Start and
// End Timestamp.
// Table Update Status Metadata Update.
AbsoluteTableIdentifier absoluteTableIdentifier = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier();
CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier());
SegmentUpdateStatusManager segmentUpdateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
ICarbonLock updateLock = segmentUpdateStatusManager.getTableUpdateStatusLock();
ICarbonLock statusLock = segmentStatusManager.getTableStatusLock();
// Update the Compacted Blocks with Compacted Status.
try {
updatedDeltaFilesList = segmentUpdateStatusManager.getUpdateDeltaFiles(loadsToMerge.get(0).getLoadName().toString());
} catch (Exception e) {
LOGGER.error("Error while getting the Update Delta Blocks.");
status = false;
return status;
}
if (updatedDeltaFilesList.size() > 0) {
try {
updateLockStatus = updateLock.lockWithRetries();
tableLockStatus = statusLock.lockWithRetries();
List<String> blockNames = new ArrayList<>(updatedDeltaFilesList.size());
for (String compactedBlocks : updatedDeltaFilesList) {
// Try to BlockName
int endIndex = compactedBlocks.lastIndexOf(File.separator);
String blkNoExt = compactedBlocks.substring(endIndex + 1, compactedBlocks.lastIndexOf("-"));
blockNames.add(blkNoExt);
}
if (updateLockStatus && tableLockStatus) {
SegmentUpdateDetails[] updateLists = segmentUpdateStatusManager.readLoadMetadata();
for (String compactedBlocks : blockNames) {
// Check is the compactedBlocks name matches with oldDetails
for (int i = 0; i < updateLists.length; i++) {
if (updateLists[i].getBlockName().equalsIgnoreCase(compactedBlocks) && !CarbonCommonConstants.COMPACTED.equalsIgnoreCase(updateLists[i].getStatus()) && !CarbonCommonConstants.MARKED_FOR_DELETE.equalsIgnoreCase(updateLists[i].getStatus())) {
updateLists[i].setStatus(CarbonCommonConstants.COMPACTED);
}
}
}
LoadMetadataDetails[] loadDetails = segmentStatusManager.readLoadMetadata(metaDataFilepath);
for (LoadMetadataDetails loadDetail : loadDetails) {
if (loadsToMerge.contains(loadDetail)) {
loadDetail.setUpdateDeltaStartTimestamp(timestamp);
loadDetail.setUpdateDeltaEndTimestamp(timestamp);
if (loadDetail.getLoadName().equalsIgnoreCase("0")) {
loadDetail.setUpdateStatusFileName(CarbonUpdateUtil.getUpdateStatusFileName(timestamp));
}
}
}
try {
segmentUpdateStatusManager.writeLoadDetailsIntoFile(Arrays.asList(updateLists), timestamp);
segmentStatusManager.writeLoadDetailsIntoFile(carbonTablePath.getTableStatusFilePath(), loadDetails);
status = true;
} catch (IOException e) {
LOGGER.error("Error while writing metadata. The metadata file path is " + carbonTablePath.getMetadataDirectoryPath());
status = false;
}
} else {
LOGGER.error("Not able to acquire the lock.");
status = false;
}
} catch (Exception e) {
LOGGER.error("Error while updating metadata. The metadata file path is " + carbonTablePath.getMetadataDirectoryPath());
status = false;
} finally {
if (updateLockStatus) {
if (updateLock.unlock()) {
LOGGER.info("Unlock the segment update lock successfully.");
} else {
LOGGER.error("Not able to unlock the segment update lock.");
}
}
if (tableLockStatus) {
if (statusLock.unlock()) {
LOGGER.info("Unlock the table status lock successfully.");
} else {
LOGGER.error("Not able to unlock the table status lock.");
}
}
}
}
return status;
}
Aggregations