use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class SegmentUpdateStatusManager method getInvalidTimestampRange.
/**
* Returns the invalid timestamp range of a segment.
* @param segmentId
* @return
*/
public UpdateVO getInvalidTimestampRange(String segmentId) {
UpdateVO range = new UpdateVO();
for (LoadMetadataDetails segment : segmentDetails) {
if (segment.getLoadName().equalsIgnoreCase(segmentId)) {
range.setSegmentId(segmentId);
range.setFactTimestamp(segment.getLoadStartTime());
if (!segment.getUpdateDeltaStartTimestamp().isEmpty() && !segment.getUpdateDeltaEndTimestamp().isEmpty()) {
range.setUpdateDeltaStartTimestamp(CarbonUpdateUtil.getTimeStampAsLong(segment.getUpdateDeltaStartTimestamp()));
range.setLatestUpdateTimestamp(CarbonUpdateUtil.getTimeStampAsLong(segment.getUpdateDeltaEndTimestamp()));
}
}
}
return range;
}
use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class BlockIndexStore method removeTableBlocksIfHorizontalCompactionDone.
/**
* remove TableBlocks executer level If Horizontal Compaction Done
* @param queryModel
*/
public void removeTableBlocksIfHorizontalCompactionDone(QueryModel queryModel) {
// get the invalid segments blocks details
Map<String, UpdateVO> invalidBlocksVO = queryModel.getInvalidBlockVOForSegmentId();
if (!invalidBlocksVO.isEmpty()) {
UpdateVO updateMetadata;
Iterator<Map.Entry<String, UpdateVO>> itr = invalidBlocksVO.entrySet().iterator();
String blockTimestamp = null;
while (itr.hasNext()) {
Map.Entry<String, UpdateVO> entry = itr.next();
TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(queryModel.getAbsoluteTableIdentifier(), entry.getKey());
List<BlockInfo> blockInfos = segmentIdToBlockListMap.get(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier());
if (null != blockInfos) {
for (BlockInfo blockInfo : blockInfos) {
// reading the updated block names from status manager instance
blockTimestamp = blockInfo.getBlockUniqueName().substring(blockInfo.getBlockUniqueName().lastIndexOf('-') + 1, blockInfo.getBlockUniqueName().length());
updateMetadata = entry.getValue();
if (CarbonUpdateUtil.isMaxQueryTimeoutExceeded(Long.parseLong(blockTimestamp))) {
Long blockTimeStamp = Long.parseLong(blockTimestamp);
if (blockTimeStamp > updateMetadata.getFactTimestamp() && (updateMetadata.getUpdateDeltaStartTimestamp() != null && blockTimeStamp < updateMetadata.getUpdateDeltaStartTimestamp())) {
String lruCacheKey = getLruCacheKey(queryModel.getAbsoluteTableIdentifier(), blockInfo);
lruCache.remove(lruCacheKey);
}
}
}
}
}
}
}
use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class SegmentTaskIndexStore method loadAndGetTaskIdToSegmentsMap.
/**
* Below method will be used to load the segment of segments
* One segment may have multiple task , so table segment will be loaded
* based on task id and will return the map of taksId to table segment
* map
*
* @param segmentToTableBlocksInfos segment id to block info
* @param absoluteTableIdentifier absolute table identifier
* @return map of taks id to segment mapping
* @throws IOException
*/
private SegmentTaskIndexWrapper loadAndGetTaskIdToSegmentsMap(Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos, AbsoluteTableIdentifier absoluteTableIdentifier, TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier) throws IOException {
// task id to segment map
Iterator<Map.Entry<String, List<TableBlockInfo>>> iteratorOverSegmentBlocksInfos = segmentToTableBlocksInfos.entrySet().iterator();
Map<TaskBucketHolder, AbstractIndex> taskIdToSegmentIndexMap = null;
SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
String segmentId = null;
TaskBucketHolder taskBucketHolder = null;
try {
while (iteratorOverSegmentBlocksInfos.hasNext()) {
// segment id to table block mapping
Map.Entry<String, List<TableBlockInfo>> next = iteratorOverSegmentBlocksInfos.next();
// group task id to table block info mapping for the segment
Map<TaskBucketHolder, List<TableBlockInfo>> taskIdToTableBlockInfoMap = mappedAndGetTaskIdToTableBlockInfo(segmentToTableBlocksInfos);
segmentId = next.getKey();
// get the existing map of task id to table segment map
UpdateVO updateVO = updateStatusManager.getInvalidTimestampRange(segmentId);
// check if segment is already loaded, if segment is already loaded
//no need to load the segment block
String lruCacheKey = tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier();
segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
if (segmentTaskIndexWrapper == null || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
// get the segment loader lock object this is to avoid
// same segment is getting loaded multiple times
// in case of concurrent query
Object segmentLoderLockObject = segmentLockMap.get(lruCacheKey);
if (null == segmentLoderLockObject) {
segmentLoderLockObject = addAndGetSegmentLock(lruCacheKey);
}
// acquire lock to lod the segment
synchronized (segmentLoderLockObject) {
segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
if (null == segmentTaskIndexWrapper || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
// so that the same can be updated after loading the btree.
if (tableSegmentUniqueIdentifier.isSegmentUpdated() && null != segmentTaskIndexWrapper) {
taskIdToSegmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
} else {
// creating a map of take if to table segment
taskIdToSegmentIndexMap = new HashMap<TaskBucketHolder, AbstractIndex>();
segmentTaskIndexWrapper = new SegmentTaskIndexWrapper(taskIdToSegmentIndexMap);
segmentTaskIndexWrapper.incrementAccessCount();
}
Iterator<Map.Entry<TaskBucketHolder, List<TableBlockInfo>>> iterator = taskIdToTableBlockInfoMap.entrySet().iterator();
long requiredSize = calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier);
segmentTaskIndexWrapper.setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize());
boolean isAddedToLruCache = lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize);
if (isAddedToLruCache) {
while (iterator.hasNext()) {
Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList = iterator.next();
taskBucketHolder = taskToBlockInfoList.getKey();
taskIdToSegmentIndexMap.put(taskBucketHolder, loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(), absoluteTableIdentifier));
}
} else {
throw new IndexBuilderException("Can not load the segment. No Enough space available.");
}
// set the latest timestamp.
segmentTaskIndexWrapper.setRefreshedTimeStamp(updateVO.getCreatedOrUpdatedTimeStamp());
// tableSegmentMapTemp.put(next.getKey(), taskIdToSegmentIndexMap);
// removing from segment lock map as once segment is loaded
// if concurrent query is coming for same segment
// it will wait on the lock so after this segment will be already
// loaded so lock is not required, that is why removing the
// the lock object as it wont be useful
segmentLockMap.remove(lruCacheKey);
} else {
segmentTaskIndexWrapper.incrementAccessCount();
}
}
} else {
segmentTaskIndexWrapper.incrementAccessCount();
}
}
} catch (IndexBuilderException e) {
LOGGER.error("Problem while loading the segment");
throw e;
}
return segmentTaskIndexWrapper;
}
use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class CarbonInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CacheClient cacheClient = new CacheClient(identifier.getStorePath());
try {
List<String> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
// get all valid segments and set them into the configuration
if (getSegmentsToAccess(job).length == 0) {
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
if (segments.getValidSegments().size() == 0) {
return new ArrayList<>(0);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (String invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
}
if (invalidSegments.size() > 0) {
List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
for (String segId : invalidSegments) {
invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
}
cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
}
}
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
// prune partitions for filter query on partition table
BitSet matchedPartitions = null;
if (null != filter) {
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
if (null != partitionInfo) {
Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
if (matchedPartitions.cardinality() == 0) {
// no partition is required
return new ArrayList<InputSplit>();
}
if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
// all partitions are required, no need to prune partitions
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} finally {
// close the cache cache client to clear LRU cache memory
cacheClient.close();
}
}
use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class CarbonInputFormat method getQueryModel.
public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
Configuration configuration = taskAttemptContext.getConfiguration();
CarbonTable carbonTable = getCarbonTable(configuration);
// getting the table absoluteTableIdentifier from the carbonTable
// to avoid unnecessary deserialization
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
// query plan includes projection column
String projection = getColumnProjection(configuration);
CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
// set the filter to the query model in order to filter blocklet before scan
Expression filter = getFilterPredicates(configuration);
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
queryModel.setFilterExpressionResolverTree(filterIntf);
// update the file level index store if there are invalid segment
if (inputSplit instanceof CarbonMultiBlockSplit) {
CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
if (invalidSegments.size() > 0) {
queryModel.setInvalidSegmentIds(invalidSegments);
}
List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
}
}
return queryModel;
}
Aggregations