use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class CarbonInputFormat method getSegmentAbstractIndexs.
/**
* It returns index for each task file.
* @param job
* @param absoluteTableIdentifier
* @param segmentId
* @return
* @throws IOException
*/
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
boolean isSegmentUpdated = false;
Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
if (null != segmentTaskIndexWrapper) {
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
taskKeys = segmentIndexMap.keySet();
isSegmentUpdated = true;
}
}
// if segment tree is not loaded, load the segment tree
if (segmentIndexMap == null || isSegmentUpdated) {
// if the segment is updated only the updated blocks TableInfo instance has to be
// retrieved. the same will be filtered based on taskKeys , if the task is same
// for the block then dont add it since already its btree is loaded.
Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
if (!tableBlockInfoList.isEmpty()) {
Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
// get Btree blocks for given segment
tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
}
if (null != taskKeys) {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
finalMap.put(key, segmentIndexMap.get(key));
}
segmentIndexMap = finalMap;
}
}
return segmentIndexMap;
}
use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class CarbonTableReader method getSegmentAbstractIndexs.
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(/*JobContext job,*/
AbsoluteTableIdentifier absoluteTableIdentifier, CarbonTablePath tablePath, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
boolean isSegmentUpdated = false;
Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
if (null != segmentTaskIndexWrapper) {
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
taskKeys = segmentIndexMap.keySet();
isSegmentUpdated = true;
}
}
// if segment tree is not loaded, load the segment tree
if (segmentIndexMap == null || isSegmentUpdated) {
List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
List<String> segs = new ArrayList<>();
segs.add(segmentId);
FileSystem fs = getFileStatusOfSegments(new String[] { segmentId }, tablePath, fileStatusList);
List<InputSplit> splits = getSplit(fileStatusList, fs);
List<FileSplit> carbonSplits = new ArrayList<>();
for (InputSplit inputSplit : splits) {
FileSplit fileSplit = (FileSplit) inputSplit;
String segId = CarbonTablePath.DataPathUtil.getSegmentId(//这里的seperator应该怎么加??
fileSplit.getPath().toString());
if (segId.equals(CarbonCommonConstants.INVALID_SEGMENT_ID)) {
continue;
}
carbonSplits.add(fileSplit);
}
List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
for (FileSplit inputSplit : carbonSplits) {
if (isValidBlockBasedOnUpdateDetails(taskKeys, inputSplit, updateDetails, updateStatusManager, segmentId)) {
BlockletInfos blockletInfos = new BlockletInfos(0, 0, //this level we do not need blocklet info!!!! Is this a trick?
0);
tableBlockInfoList.add(new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.getStart(), segmentId, inputSplit.getLocations(), inputSplit.getLength(), blockletInfos, ColumnarFormatVersion.valueOf(CarbonCommonConstants.CARBON_DATA_FILE_DEFAULT_VERSION), null));
}
}
Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
// get Btree blocks for given segment
tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
}
return segmentIndexMap;
}
use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.
the class CarbonTableReader method getInputSplits2.
public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel, Expression filters) throws Exception {
// need apply filters to segment
FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
AbsoluteTableIdentifier absoluteTableIdentifier = tableCacheModel.carbonTable.getAbsoluteTableIdentifier();
CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
List<String> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
// get all valid segments and set them into the configuration
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
tableCacheModel.segments = segments.getValidSegments().toArray(new String[0]);
if (segments.getValidSegments().size() == 0) {
return new ArrayList<>(0);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (String invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
}
if (invalidSegments.size() > 0) {
List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
for (String segId : invalidSegments) {
invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segId));
}
cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
}
// get filter for segment
CarbonInputFormatUtil.processFilterExpression(filters, tableCacheModel.carbonTable);
FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filters, tableCacheModel.carbonTable.getAbsoluteTableIdentifier());
List<CarbonLocalInputSplit> result = new ArrayList<>();
// for each segment fetch blocks matching filter in Driver BTree
for (String segmentNo : tableCacheModel.segments) {
try {
List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(filterExpressionProcessor, absoluteTableIdentifier, tableCacheModel.carbonTablePath, filterInterface, segmentNo, cacheClient, updateStatusManager);
for (DataRefNode dataRefNode : dataRefNodes) {
BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
continue;
}
result.add(new CarbonLocalInputSplit(segmentNo, tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), Arrays.asList(tableBlockInfo.getLocations()), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion().number()));
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
cacheClient.close();
return result;
}
Aggregations